we do not want to shift by the block size, which is much larger than
[dragonfly.git] / sys / kern / vfs_rangelock.c
blob87d42128989062eaad512102b80dc8e8fd7ec2aa
1 /*
2 * Copyright (c) 2004 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
34 * $DragonFly: src/sys/kern/vfs_rangelock.c,v 1.2 2006/12/23 00:35:04 swildner Exp $
37 * This module implements hard range locks for files and directories. It is
38 * not to be confused with the UNIX advisory lock mechanism. This module
39 * will allow the kernel and VFS to break large I/O requests into smaller
40 * pieces without losing atomicy guarentees and, eventually, this module will
41 * be responsible for providing hooks for remote cache coherency protocols
42 * as well.
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/buf.h>
48 #include <sys/conf.h>
49 #include <sys/kernel.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/mount.h>
53 #include <sys/unistd.h>
54 #include <sys/vnode.h>
55 #include <sys/poll.h>
57 #include <machine/limits.h>
59 #include <vm/vm.h>
60 #include <vm/vm_object.h>
61 #include <vm/vm_page.h>
62 #include <vm/vm_pager.h>
63 #include <vm/vnode_pager.h>
65 static void vrange_lock_overlapped(struct vnode *vp,
66 struct vrangelock *vr, struct vrangelock *scan);
67 static int vrange_lock_conflicted(struct vnode *vp, struct vrangelock *vr);
70 * Lock a range within a vnode.
72 * The lock list is sorted by vr_offset.
74 void
75 vrange_lock(struct vnode *vp, struct vrangelock *vr)
77 struct vrangelock *scan;
78 off_t eoff;
80 eoff = vr->vr_offset + vr->vr_length;
82 KKASSERT((vr->vr_flags & RNGL_ONLIST) == 0);
83 vr->vr_flags |= RNGL_ONLIST;
85 TAILQ_FOREACH(scan, &vp->v_range.vh_list, vr_node) {
87 * If the new element is entirely in front of the scan element
88 * we are done. If it is entirely beyond the scan element we
89 * loop. Otherwise an overlap has occured.
91 if (eoff <= scan->vr_offset) {
92 TAILQ_INSERT_BEFORE(scan, vr, vr_node);
93 return;
95 if (vr->vr_offset >= scan->vr_offset + scan->vr_length)
96 continue;
97 vrange_lock_overlapped(vp, vr, scan);
99 TAILQ_INSERT_TAIL(&vp->v_range.vh_list, vr, vr_node);
103 * An overlap occured. The request is still inserted sorted based on
104 * vr_offset but we must also scan the conflict space and block while
105 * conflicts exist.
107 static void
108 vrange_lock_overlapped(struct vnode *vp,
109 struct vrangelock *vr, struct vrangelock *scan)
111 int conflicted = 0;
112 int inserted = 0;
113 int warned = 0;
114 off_t eoff;
116 eoff = vr->vr_offset + vr->vr_length;
118 while (scan->vr_offset < eoff) {
119 if ((vr->vr_flags & scan->vr_flags & RNGL_SHARED) == 0) {
120 scan->vr_flags |= RNGL_CHECK;
121 vr->vr_flags |= RNGL_WAITING;
122 conflicted = 1;
124 if (inserted == 0 && vr->vr_offset < scan->vr_offset) {
125 TAILQ_INSERT_BEFORE(scan, vr, vr_node);
126 inserted = 1;
128 if ((scan = TAILQ_NEXT(scan, vr_node)) == NULL) {
129 if (inserted == 0)
130 TAILQ_INSERT_TAIL(&vp->v_range.vh_list, vr, vr_node);
131 break;
136 * sleep until the conflict has been resolved.
138 while (conflicted) {
139 if (tsleep(&vp->v_range.vh_list, 0, "vrnglk", hz * 3) == EWOULDBLOCK) {
140 if (warned == 0)
141 kprintf("warning: conflicted lock vp %p %lld,%lld blocked\n",
142 vp, vr->vr_offset, vr->vr_length);
143 warned = 1;
145 conflicted = vrange_lock_conflicted(vp, vr);
147 if (warned) {
148 kprintf("waring: conflicted lock vp %p %lld,%lld unblocked\n",
149 vp, vr->vr_offset, vr->vr_length);
154 * Check for conflicts by scanning both forwards and backwards from the
155 * node in question. The list is sorted by vr_offset but ending offsets
156 * may vary. Because of this, the reverse scan cannot stop early.
158 * Return 0 on success, 1 if the lock is still conflicted. We do not
159 * check elements that are waiting as that might result in a deadlock.
160 * We can stop the moment we hit a conflict.
162 static int
163 vrange_lock_conflicted(struct vnode *vp, struct vrangelock *vr)
165 struct vrangelock *scan;
166 off_t eoff;
168 eoff = vr->vr_offset + vr->vr_length;
170 KKASSERT(vr->vr_flags & RNGL_WAITING);
171 scan = vr;
172 while ((scan = TAILQ_PREV(scan, vrangelock_list, vr_node)) != NULL) {
173 if (scan->vr_flags & RNGL_WAITING)
174 continue;
175 if (scan->vr_offset + scan->vr_length > vr->vr_offset) {
176 if ((vr->vr_flags & scan->vr_flags & RNGL_SHARED) == 0) {
177 scan->vr_flags |= RNGL_CHECK;
178 return(1);
182 scan = vr;
183 while ((scan = TAILQ_NEXT(scan, vr_node)) != NULL) {
184 if (eoff <= scan->vr_offset)
185 break;
186 if (scan->vr_flags & RNGL_WAITING)
187 continue;
188 if ((vr->vr_flags & scan->vr_flags & RNGL_SHARED) == 0) {
189 scan->vr_flags |= RNGL_CHECK;
190 return(1);
193 vr->vr_flags &= ~RNGL_WAITING;
194 return(0);
197 void
198 vrange_unlock(struct vnode *vp, struct vrangelock *vr)
200 KKASSERT((vr->vr_flags & RNGL_ONLIST) != 0);
201 vr->vr_flags &= ~RNGL_ONLIST;
202 TAILQ_REMOVE(&vp->v_range.vh_list, vr, vr_node);
203 if (vr->vr_flags & RNGL_CHECK) {
204 vr->vr_flags &= ~RNGL_CHECK;
205 wakeup(&vp->v_range.vh_list);