2 * Copyright (c) 2008 Isilon Inc http://www.isilon.com/
3 * Authors: Doug Rabson <dfr@rabson.org>
4 * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org>
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * Copyright (c) 1982, 1986, 1989, 1993
29 * The Regents of the University of California. All rights reserved.
31 * This code is derived from software contributed to Berkeley by
32 * Scooter Morris at Genentech Inc.
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 * 4. Neither the name of the University nor the names of its contributors
43 * may be used to endorse or promote products derived from this software
44 * without specific prior written permission.
46 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
47 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
48 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
49 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
50 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
51 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
52 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
54 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
55 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * @(#)ufs_lockf.c 8.3 (Berkeley) 1/6/94
61 #include <sys/cdefs.h>
62 __FBSDID("$FreeBSD$");
64 #include "opt_debug_lockf.h"
66 #include <sys/param.h>
67 #include <sys/systm.h>
69 #include <sys/kernel.h>
70 #include <sys/limits.h>
72 #include <sys/mount.h>
73 #include <sys/mutex.h>
76 #include <sys/unistd.h>
77 #include <sys/vnode.h>
78 #include <sys/malloc.h>
79 #include <sys/fcntl.h>
80 #include <sys/lockf.h>
81 #include <sys/taskqueue.h>
84 #include <sys/sysctl.h>
86 #include <ufs/ufs/quota.h>
87 #include <ufs/ufs/inode.h>
89 static int lockf_debug
= 0; /* control debug output */
90 SYSCTL_INT(_debug
, OID_AUTO
, lockf_debug
, CTLFLAG_RW
, &lockf_debug
, 0, "");
93 MALLOC_DEFINE(M_LOCKF
, "lockf", "Byte-range locking structures");
97 struct owner_vertex_list
;
100 #define NOLOCKF (struct lockf_entry *)0
103 static void lf_init(void *);
104 static int lf_hash_owner(caddr_t
, struct flock
*, int);
105 static int lf_owner_matches(struct lock_owner
*, caddr_t
, struct flock
*,
107 static struct lockf_entry
*
108 lf_alloc_lock(struct lock_owner
*);
109 static void lf_free_lock(struct lockf_entry
*);
110 static int lf_clearlock(struct lockf
*, struct lockf_entry
*);
111 static int lf_overlaps(struct lockf_entry
*, struct lockf_entry
*);
112 static int lf_blocks(struct lockf_entry
*, struct lockf_entry
*);
113 static void lf_free_edge(struct lockf_edge
*);
114 static struct lockf_edge
*
116 static void lf_alloc_vertex(struct lockf_entry
*);
117 static int lf_add_edge(struct lockf_entry
*, struct lockf_entry
*);
118 static void lf_remove_edge(struct lockf_edge
*);
119 static void lf_remove_outgoing(struct lockf_entry
*);
120 static void lf_remove_incoming(struct lockf_entry
*);
121 static int lf_add_outgoing(struct lockf
*, struct lockf_entry
*);
122 static int lf_add_incoming(struct lockf
*, struct lockf_entry
*);
123 static int lf_findoverlap(struct lockf_entry
**, struct lockf_entry
*,
125 static struct lockf_entry
*
126 lf_getblock(struct lockf
*, struct lockf_entry
*);
127 static int lf_getlock(struct lockf
*, struct lockf_entry
*, struct flock
*);
128 static void lf_insert_lock(struct lockf
*, struct lockf_entry
*);
129 static void lf_wakeup_lock(struct lockf
*, struct lockf_entry
*);
130 static void lf_update_dependancies(struct lockf
*, struct lockf_entry
*,
131 int all
, struct lockf_entry_list
*);
132 static void lf_set_start(struct lockf
*, struct lockf_entry
*, off_t
,
133 struct lockf_entry_list
*);
134 static void lf_set_end(struct lockf
*, struct lockf_entry
*, off_t
,
135 struct lockf_entry_list
*);
136 static int lf_setlock(struct lockf
*, struct lockf_entry
*,
137 struct vnode
*, void **cookiep
);
138 static int lf_cancel(struct lockf
*, struct lockf_entry
*, void *);
139 static void lf_split(struct lockf
*, struct lockf_entry
*,
140 struct lockf_entry
*, struct lockf_entry_list
*);
142 static int graph_reaches(struct owner_vertex
*x
, struct owner_vertex
*y
,
143 struct owner_vertex_list
*path
);
144 static void graph_check(struct owner_graph
*g
, int checkorder
);
145 static void graph_print_vertices(struct owner_vertex_list
*set
);
147 static int graph_delta_forward(struct owner_graph
*g
,
148 struct owner_vertex
*x
, struct owner_vertex
*y
,
149 struct owner_vertex_list
*delta
);
150 static int graph_delta_backward(struct owner_graph
*g
,
151 struct owner_vertex
*x
, struct owner_vertex
*y
,
152 struct owner_vertex_list
*delta
);
153 static int graph_add_indices(int *indices
, int n
,
154 struct owner_vertex_list
*set
);
155 static int graph_assign_indices(struct owner_graph
*g
, int *indices
,
156 int nextunused
, struct owner_vertex_list
*set
);
157 static int graph_add_edge(struct owner_graph
*g
,
158 struct owner_vertex
*x
, struct owner_vertex
*y
);
159 static void graph_remove_edge(struct owner_graph
*g
,
160 struct owner_vertex
*x
, struct owner_vertex
*y
);
161 static struct owner_vertex
*graph_alloc_vertex(struct owner_graph
*g
,
162 struct lock_owner
*lo
);
163 static void graph_free_vertex(struct owner_graph
*g
,
164 struct owner_vertex
*v
);
165 static struct owner_graph
* graph_init(struct owner_graph
*g
);
167 static void lf_print(char *, struct lockf_entry
*);
168 static void lf_printlist(char *, struct lockf_entry
*);
169 static void lf_print_owner(struct lock_owner
*);
173 * This structure is used to keep track of both local and remote lock
174 * owners. The lf_owner field of the struct lockf_entry points back at
175 * the lock owner structure. Each possible lock owner (local proc for
176 * POSIX fcntl locks, local file for BSD flock locks or <pid,sysid>
177 * pair for remote locks) is represented by a unique instance of
180 * If a lock owner has a lock that blocks some other lock or a lock
181 * that is waiting for some other lock, it also has a vertex in the
185 * (s) locked by state->ls_lock
186 * (S) locked by lf_lock_states_lock
187 * (l) locked by lf_lock_owners_lock
188 * (g) locked by lf_owner_graph_lock
189 * (c) const until freeing
191 #define LOCK_OWNER_HASH_SIZE 256
194 LIST_ENTRY(lock_owner
) lo_link
; /* (l) hash chain */
195 int lo_refs
; /* (l) Number of locks referring to this */
196 int lo_flags
; /* (c) Flags passwd to lf_advlock */
197 caddr_t lo_id
; /* (c) Id value passed to lf_advlock */
198 pid_t lo_pid
; /* (c) Process Id of the lock owner */
199 int lo_sysid
; /* (c) System Id of the lock owner */
200 struct owner_vertex
*lo_vertex
; /* (g) entry in deadlock graph */
203 LIST_HEAD(lock_owner_list
, lock_owner
);
205 static struct sx lf_lock_states_lock
;
206 static struct lockf_list lf_lock_states
; /* (S) */
207 static struct sx lf_lock_owners_lock
;
208 static struct lock_owner_list lf_lock_owners
[LOCK_OWNER_HASH_SIZE
]; /* (l) */
211 * Structures for deadlock detection.
213 * We have two types of directed graph, the first is the set of locks,
214 * both active and pending on a vnode. Within this graph, active locks
215 * are terminal nodes in the graph (i.e. have no out-going
216 * edges). Pending locks have out-going edges to each blocking active
217 * lock that prevents the lock from being granted and also to each
218 * older pending lock that would block them if it was active. The
219 * graph for each vnode is naturally acyclic; new edges are only ever
220 * added to or from new nodes (either new pending locks which only add
221 * out-going edges or new active locks which only add in-coming edges)
222 * therefore they cannot create loops in the lock graph.
224 * The second graph is a global graph of lock owners. Each lock owner
225 * is a vertex in that graph and an edge is added to the graph
226 * whenever an edge is added to a vnode graph, with end points
227 * corresponding to owner of the new pending lock and the owner of the
228 * lock upon which it waits. In order to prevent deadlock, we only add
229 * an edge to this graph if the new edge would not create a cycle.
231 * The lock owner graph is topologically sorted, i.e. if a node has
232 * any outgoing edges, then it has an order strictly less than any
233 * node to which it has an outgoing edge. We preserve this ordering
234 * (and detect cycles) on edge insertion using Algorithm PK from the
235 * paper "A Dynamic Topological Sort Algorithm for Directed Acyclic
236 * Graphs" (ACM Journal of Experimental Algorithms, Vol 11, Article
242 LIST_ENTRY(owner_edge
) e_outlink
; /* (g) link from's out-edge list */
243 LIST_ENTRY(owner_edge
) e_inlink
; /* (g) link to's in-edge list */
244 int e_refs
; /* (g) number of times added */
245 struct owner_vertex
*e_from
; /* (c) out-going from here */
246 struct owner_vertex
*e_to
; /* (c) in-coming to here */
248 LIST_HEAD(owner_edge_list
, owner_edge
);
250 struct owner_vertex
{
251 TAILQ_ENTRY(owner_vertex
) v_link
; /* (g) workspace for edge insertion */
252 uint32_t v_gen
; /* (g) workspace for edge insertion */
253 int v_order
; /* (g) order of vertex in graph */
254 struct owner_edge_list v_outedges
;/* (g) list of out-edges */
255 struct owner_edge_list v_inedges
; /* (g) list of in-edges */
256 struct lock_owner
*v_owner
; /* (c) corresponding lock owner */
258 TAILQ_HEAD(owner_vertex_list
, owner_vertex
);
261 struct owner_vertex
** g_vertices
; /* (g) pointers to vertices */
262 int g_size
; /* (g) number of vertices */
263 int g_space
; /* (g) space allocated for vertices */
264 int *g_indexbuf
; /* (g) workspace for loop detection */
265 uint32_t g_gen
; /* (g) increment when re-ordering */
268 static struct sx lf_owner_graph_lock
;
269 static struct owner_graph lf_owner_graph
;
272 * Initialise various structures and locks.
279 sx_init(&lf_lock_states_lock
, "lock states lock");
280 LIST_INIT(&lf_lock_states
);
282 sx_init(&lf_lock_owners_lock
, "lock owners lock");
283 for (i
= 0; i
< LOCK_OWNER_HASH_SIZE
; i
++)
284 LIST_INIT(&lf_lock_owners
[i
]);
286 sx_init(&lf_owner_graph_lock
, "owner graph lock");
287 graph_init(&lf_owner_graph
);
289 SYSINIT(lf_init
, SI_SUB_LOCK
, SI_ORDER_FIRST
, lf_init
, NULL
);
292 * Generate a hash value for a lock owner.
295 lf_hash_owner(caddr_t id
, struct flock
*fl
, int flags
)
299 if (flags
& F_REMOTE
) {
300 h
= HASHSTEP(0, fl
->l_pid
);
301 h
= HASHSTEP(h
, fl
->l_sysid
);
302 } else if (flags
& F_FLOCK
) {
303 h
= ((uintptr_t) id
) >> 7;
305 struct proc
*p
= (struct proc
*) id
;
306 h
= HASHSTEP(0, p
->p_pid
);
310 return (h
% LOCK_OWNER_HASH_SIZE
);
314 * Return true if a lock owner matches the details passed to
318 lf_owner_matches(struct lock_owner
*lo
, caddr_t id
, struct flock
*fl
,
321 if (flags
& F_REMOTE
) {
322 return lo
->lo_pid
== fl
->l_pid
323 && lo
->lo_sysid
== fl
->l_sysid
;
325 return lo
->lo_id
== id
;
329 static struct lockf_entry
*
330 lf_alloc_lock(struct lock_owner
*lo
)
332 struct lockf_entry
*lf
;
334 lf
= malloc(sizeof(struct lockf_entry
), M_LOCKF
, M_WAITOK
|M_ZERO
);
338 printf("Allocated lock %p\n", lf
);
341 sx_xlock(&lf_lock_owners_lock
);
343 sx_xunlock(&lf_lock_owners_lock
);
351 lf_free_lock(struct lockf_entry
*lock
)
354 * Adjust the lock_owner reference count and
355 * reclaim the entry if this is the last lock
358 struct lock_owner
*lo
= lock
->lf_owner
;
360 KASSERT(LIST_EMPTY(&lock
->lf_outedges
),
361 ("freeing lock with dependancies"));
362 KASSERT(LIST_EMPTY(&lock
->lf_inedges
),
363 ("freeing lock with dependants"));
364 sx_xlock(&lf_lock_owners_lock
);
365 KASSERT(lo
->lo_refs
> 0, ("lock owner refcount"));
367 if (lo
->lo_refs
== 0) {
370 printf("lf_free_lock: freeing lock owner %p\n",
374 sx_xlock(&lf_owner_graph_lock
);
375 graph_free_vertex(&lf_owner_graph
,
377 sx_xunlock(&lf_owner_graph_lock
);
379 LIST_REMOVE(lo
, lo_link
);
383 printf("Freed lock owner %p\n", lo
);
386 sx_unlock(&lf_lock_owners_lock
);
388 if ((lock
->lf_flags
& F_REMOTE
) && lock
->lf_vnode
) {
389 vrele(lock
->lf_vnode
);
390 lock
->lf_vnode
= NULL
;
394 printf("Freed lock %p\n", lock
);
400 * Advisory record locking support
403 lf_advlockasync(struct vop_advlockasync_args
*ap
, struct lockf
**statep
,
406 struct lockf
*state
, *freestate
= NULL
;
407 struct flock
*fl
= ap
->a_fl
;
408 struct lockf_entry
*lock
;
409 struct vnode
*vp
= ap
->a_vp
;
410 caddr_t id
= ap
->a_id
;
411 int flags
= ap
->a_flags
;
413 struct lock_owner
*lo
;
414 off_t start
, end
, oadd
;
418 * Handle the F_UNLKSYS case first - no need to mess about
419 * creating a lock owner for this one.
421 if (ap
->a_op
== F_UNLCKSYS
) {
422 lf_clearremotesys(fl
->l_sysid
);
427 * Convert the flock structure into a start and end.
429 switch (fl
->l_whence
) {
434 * Caller is responsible for adding any necessary offset
435 * when SEEK_CUR is used.
441 if (size
> OFF_MAX
||
442 (fl
->l_start
> 0 && size
> OFF_MAX
- fl
->l_start
))
444 start
= size
+ fl
->l_start
;
459 } else if (fl
->l_len
== 0) {
462 oadd
= fl
->l_len
- 1;
463 if (oadd
> OFF_MAX
- start
)
468 * Avoid the common case of unlocking when inode has no locks.
470 if ((*statep
) == NULL
|| LIST_EMPTY(&(*statep
)->ls_active
)) {
471 if (ap
->a_op
!= F_SETLK
) {
472 fl
->l_type
= F_UNLCK
;
478 * Map our arguments to an existing lock owner or create one
479 * if this is the first time we have seen this owner.
481 hash
= lf_hash_owner(id
, fl
, flags
);
482 sx_xlock(&lf_lock_owners_lock
);
483 LIST_FOREACH(lo
, &lf_lock_owners
[hash
], lo_link
)
484 if (lf_owner_matches(lo
, id
, fl
, flags
))
488 * We initialise the lock with a reference
489 * count which matches the new lockf_entry
490 * structure created below.
492 lo
= malloc(sizeof(struct lock_owner
), M_LOCKF
,
496 printf("Allocated lock owner %p\n", lo
);
500 lo
->lo_flags
= flags
;
502 if (flags
& F_REMOTE
) {
503 lo
->lo_pid
= fl
->l_pid
;
504 lo
->lo_sysid
= fl
->l_sysid
;
505 } else if (flags
& F_FLOCK
) {
509 struct proc
*p
= (struct proc
*) id
;
510 lo
->lo_pid
= p
->p_pid
;
513 lo
->lo_vertex
= NULL
;
516 if (lockf_debug
& 1) {
517 printf("lf_advlockasync: new lock owner %p ", lo
);
523 LIST_INSERT_HEAD(&lf_lock_owners
[hash
], lo
, lo_link
);
526 * We have seen this lock owner before, increase its
527 * reference count to account for the new lockf_entry
528 * structure we create below.
532 sx_xunlock(&lf_lock_owners_lock
);
535 * Create the lockf structure. We initialise the lf_owner
536 * field here instead of in lf_alloc_lock() to avoid paying
537 * the lf_lock_owners_lock tax twice.
539 lock
= lf_alloc_lock(NULL
);
540 lock
->lf_start
= start
;
544 if (flags
& F_REMOTE
) {
546 * For remote locks, the caller may release its ref to
547 * the vnode at any time - we have to ref it here to
548 * prevent it from being recycled unexpectedly.
554 * XXX The problem is that VTOI is ufs specific, so it will
555 * break LOCKF_DEBUG for all other FS's other than UFS because
556 * it casts the vnode->data ptr to struct inode *.
558 /* lock->lf_inode = VTOI(ap->a_vp); */
559 lock
->lf_inode
= (struct inode
*)0;
560 lock
->lf_type
= fl
->l_type
;
561 LIST_INIT(&lock
->lf_outedges
);
562 LIST_INIT(&lock
->lf_inedges
);
563 lock
->lf_async_task
= ap
->a_task
;
564 lock
->lf_flags
= ap
->a_flags
;
567 * Do the requested operation. First find our state structure
568 * and create a new one if necessary - the caller's *statep
569 * variable and the state's ls_threads count is protected by
570 * the vnode interlock.
573 if (vp
->v_iflag
& VI_DOOMED
) {
580 * Allocate a state structure if necessary.
588 ls
= malloc(sizeof(struct lockf
), M_LOCKF
, M_WAITOK
|M_ZERO
);
589 sx_init(&ls
->ls_lock
, "ls_lock");
590 LIST_INIT(&ls
->ls_active
);
591 LIST_INIT(&ls
->ls_pending
);
594 sx_xlock(&lf_lock_states_lock
);
595 LIST_INSERT_HEAD(&lf_lock_states
, ls
, ls_link
);
596 sx_xunlock(&lf_lock_states_lock
);
599 * Cope if we lost a race with some other thread while
600 * trying to allocate memory.
603 if (vp
->v_iflag
& VI_DOOMED
) {
605 sx_xlock(&lf_lock_states_lock
);
606 LIST_REMOVE(ls
, ls_link
);
607 sx_xunlock(&lf_lock_states_lock
);
608 sx_destroy(&ls
->ls_lock
);
613 if ((*statep
) == NULL
) {
614 state
= *statep
= ls
;
621 sx_xlock(&lf_lock_states_lock
);
622 LIST_REMOVE(ls
, ls_link
);
623 sx_xunlock(&lf_lock_states_lock
);
624 sx_destroy(&ls
->ls_lock
);
632 sx_xlock(&state
->ls_lock
);
635 error
= lf_setlock(state
, lock
, vp
, ap
->a_cookiep
);
639 error
= lf_clearlock(state
, lock
);
644 error
= lf_getlock(state
, lock
, fl
);
650 error
= lf_cancel(state
, lock
, *ap
->a_cookiep
);
664 * Check for some can't happen stuff. In this case, the active
665 * lock list becoming disordered or containing mutually
666 * blocking locks. We also check the pending list for locks
667 * which should be active (i.e. have no out-going edges).
669 LIST_FOREACH(lock
, &state
->ls_active
, lf_link
) {
670 struct lockf_entry
*lf
;
671 if (LIST_NEXT(lock
, lf_link
))
672 KASSERT((lock
->lf_start
673 <= LIST_NEXT(lock
, lf_link
)->lf_start
),
674 ("locks disordered"));
675 LIST_FOREACH(lf
, &state
->ls_active
, lf_link
) {
678 KASSERT(!lf_blocks(lock
, lf
),
679 ("two conflicting active locks"));
680 if (lock
->lf_owner
== lf
->lf_owner
)
681 KASSERT(!lf_overlaps(lock
, lf
),
682 ("two overlapping locks from same owner"));
685 LIST_FOREACH(lock
, &state
->ls_pending
, lf_link
) {
686 KASSERT(!LIST_EMPTY(&lock
->lf_outedges
),
687 ("pending lock which should be active"));
690 sx_xunlock(&state
->ls_lock
);
693 * If we have removed the last active lock on the vnode and
694 * this is the last thread that was in-progress, we can free
695 * the state structure. We update the caller's pointer inside
696 * the vnode interlock but call free outside.
698 * XXX alternatively, keep the state structure around until
699 * the filesystem recycles - requires a callback from the
706 if (LIST_EMPTY(&state
->ls_active
) && state
->ls_threads
== 0) {
707 KASSERT(LIST_EMPTY(&state
->ls_pending
),
708 ("freeing state with pending locks"));
716 sx_xlock(&lf_lock_states_lock
);
717 LIST_REMOVE(freestate
, ls_link
);
718 sx_xunlock(&lf_lock_states_lock
);
719 sx_destroy(&freestate
->ls_lock
);
720 free(freestate
, M_LOCKF
);
726 lf_advlock(struct vop_advlock_args
*ap
, struct lockf
**statep
, u_quad_t size
)
728 struct vop_advlockasync_args a
;
734 a
.a_flags
= ap
->a_flags
;
738 return (lf_advlockasync(&a
, statep
, size
));
742 lf_purgelocks(struct vnode
*vp
, struct lockf
**statep
)
745 struct lockf_entry
*lock
, *nlock
;
748 * For this to work correctly, the caller must ensure that no
749 * other threads enter the locking system for this vnode,
750 * e.g. by checking VI_DOOMED. We wake up any threads that are
751 * sleeping waiting for locks on this vnode and then free all
752 * the remaining locks.
760 sx_xlock(&state
->ls_lock
);
761 sx_xlock(&lf_owner_graph_lock
);
762 LIST_FOREACH_SAFE(lock
, &state
->ls_pending
, lf_link
, nlock
) {
763 LIST_REMOVE(lock
, lf_link
);
764 lf_remove_outgoing(lock
);
765 lf_remove_incoming(lock
);
768 * If its an async lock, we can just free it
769 * here, otherwise we let the sleeping thread
772 if (lock
->lf_async_task
) {
775 lock
->lf_flags
|= F_INTR
;
779 sx_xunlock(&lf_owner_graph_lock
);
780 sx_xunlock(&state
->ls_lock
);
783 * Wait for all other threads, sleeping and otherwise
787 while (state
->ls_threads
> 1)
788 msleep(state
, VI_MTX(vp
), 0, "purgelocks", 0);
793 * We can just free all the active locks since they
794 * will have no dependancies (we removed them all
795 * above). We don't need to bother locking since we
796 * are the last thread using this state structure.
798 LIST_FOREACH_SAFE(lock
, &state
->ls_pending
, lf_link
, nlock
) {
799 LIST_REMOVE(lock
, lf_link
);
802 sx_xlock(&lf_lock_states_lock
);
803 LIST_REMOVE(state
, ls_link
);
804 sx_xunlock(&lf_lock_states_lock
);
805 sx_destroy(&state
->ls_lock
);
806 free(state
, M_LOCKF
);
813 * Return non-zero if locks 'x' and 'y' overlap.
816 lf_overlaps(struct lockf_entry
*x
, struct lockf_entry
*y
)
819 return (x
->lf_start
<= y
->lf_end
&& x
->lf_end
>= y
->lf_start
);
823 * Return non-zero if lock 'x' is blocked by lock 'y' (or vice versa).
826 lf_blocks(struct lockf_entry
*x
, struct lockf_entry
*y
)
829 return x
->lf_owner
!= y
->lf_owner
830 && (x
->lf_type
== F_WRLCK
|| y
->lf_type
== F_WRLCK
)
831 && lf_overlaps(x
, y
);
835 * Allocate a lock edge from the free list
837 static struct lockf_edge
*
841 return (malloc(sizeof(struct lockf_edge
), M_LOCKF
, M_WAITOK
|M_ZERO
));
848 lf_free_edge(struct lockf_edge
*e
)
856 * Ensure that the lock's owner has a corresponding vertex in the
860 lf_alloc_vertex(struct lockf_entry
*lock
)
862 struct owner_graph
*g
= &lf_owner_graph
;
864 if (!lock
->lf_owner
->lo_vertex
)
865 lock
->lf_owner
->lo_vertex
=
866 graph_alloc_vertex(g
, lock
->lf_owner
);
870 * Attempt to record an edge from lock x to lock y. Return EDEADLK if
871 * the new edge would cause a cycle in the owner graph.
874 lf_add_edge(struct lockf_entry
*x
, struct lockf_entry
*y
)
876 struct owner_graph
*g
= &lf_owner_graph
;
877 struct lockf_edge
*e
;
881 LIST_FOREACH(e
, &x
->lf_outedges
, le_outlink
)
882 KASSERT(e
->le_to
!= y
, ("adding lock edge twice"));
886 * Make sure the two owners have entries in the owner graph.
891 error
= graph_add_edge(g
, x
->lf_owner
->lo_vertex
,
892 y
->lf_owner
->lo_vertex
);
897 LIST_INSERT_HEAD(&x
->lf_outedges
, e
, le_outlink
);
898 LIST_INSERT_HEAD(&y
->lf_inedges
, e
, le_inlink
);
906 * Remove an edge from the lock graph.
909 lf_remove_edge(struct lockf_edge
*e
)
911 struct owner_graph
*g
= &lf_owner_graph
;
912 struct lockf_entry
*x
= e
->le_from
;
913 struct lockf_entry
*y
= e
->le_to
;
915 graph_remove_edge(g
, x
->lf_owner
->lo_vertex
, y
->lf_owner
->lo_vertex
);
916 LIST_REMOVE(e
, le_outlink
);
917 LIST_REMOVE(e
, le_inlink
);
924 * Remove all out-going edges from lock x.
927 lf_remove_outgoing(struct lockf_entry
*x
)
929 struct lockf_edge
*e
;
931 while ((e
= LIST_FIRST(&x
->lf_outedges
)) != NULL
) {
937 * Remove all in-coming edges from lock x.
940 lf_remove_incoming(struct lockf_entry
*x
)
942 struct lockf_edge
*e
;
944 while ((e
= LIST_FIRST(&x
->lf_inedges
)) != NULL
) {
950 * Walk the list of locks for the file and create an out-going edge
951 * from lock to each blocking lock.
954 lf_add_outgoing(struct lockf
*state
, struct lockf_entry
*lock
)
956 struct lockf_entry
*overlap
;
959 LIST_FOREACH(overlap
, &state
->ls_active
, lf_link
) {
961 * We may assume that the active list is sorted by
964 if (overlap
->lf_start
> lock
->lf_end
)
966 if (!lf_blocks(lock
, overlap
))
970 * We've found a blocking lock. Add the corresponding
971 * edge to the graphs and see if it would cause a
974 error
= lf_add_edge(lock
, overlap
);
977 * The only error that lf_add_edge returns is EDEADLK.
978 * Remove any edges we added and return the error.
981 lf_remove_outgoing(lock
);
987 * We also need to add edges to sleeping locks that block
988 * us. This ensures that lf_wakeup_lock cannot grant two
989 * mutually blocking locks simultaneously and also enforces a
990 * 'first come, first served' fairness model. Note that this
991 * only happens if we are blocked by at least one active lock
992 * due to the call to lf_getblock in lf_setlock below.
994 LIST_FOREACH(overlap
, &state
->ls_pending
, lf_link
) {
995 if (!lf_blocks(lock
, overlap
))
998 * We've found a blocking lock. Add the corresponding
999 * edge to the graphs and see if it would cause a
1002 error
= lf_add_edge(lock
, overlap
);
1005 * The only error that lf_add_edge returns is EDEADLK.
1006 * Remove any edges we added and return the error.
1009 lf_remove_outgoing(lock
);
1018 * Walk the list of pending locks for the file and create an in-coming
1019 * edge from lock to each blocking lock.
1022 lf_add_incoming(struct lockf
*state
, struct lockf_entry
*lock
)
1024 struct lockf_entry
*overlap
;
1027 LIST_FOREACH(overlap
, &state
->ls_pending
, lf_link
) {
1028 if (!lf_blocks(lock
, overlap
))
1032 * We've found a blocking lock. Add the corresponding
1033 * edge to the graphs and see if it would cause a
1036 error
= lf_add_edge(overlap
, lock
);
1039 * The only error that lf_add_edge returns is EDEADLK.
1040 * Remove any edges we added and return the error.
1043 lf_remove_incoming(lock
);
1051 * Insert lock into the active list, keeping list entries ordered by
1052 * increasing values of lf_start.
1055 lf_insert_lock(struct lockf
*state
, struct lockf_entry
*lock
)
1057 struct lockf_entry
*lf
, *lfprev
;
1059 if (LIST_EMPTY(&state
->ls_active
)) {
1060 LIST_INSERT_HEAD(&state
->ls_active
, lock
, lf_link
);
1065 LIST_FOREACH(lf
, &state
->ls_active
, lf_link
) {
1066 if (lf
->lf_start
> lock
->lf_start
) {
1067 LIST_INSERT_BEFORE(lf
, lock
, lf_link
);
1072 LIST_INSERT_AFTER(lfprev
, lock
, lf_link
);
1076 * Wake up a sleeping lock and remove it from the pending list now
1077 * that all its dependancies have been resolved. The caller should
1078 * arrange for the lock to be added to the active list, adjusting any
1079 * existing locks for the same owner as needed.
1082 lf_wakeup_lock(struct lockf
*state
, struct lockf_entry
*wakelock
)
1086 * Remove from ls_pending list and wake up the caller
1087 * or start the async notification, as appropriate.
1089 LIST_REMOVE(wakelock
, lf_link
);
1091 if (lockf_debug
& 1)
1092 lf_print("lf_wakeup_lock: awakening", wakelock
);
1093 #endif /* LOCKF_DEBUG */
1094 if (wakelock
->lf_async_task
) {
1095 taskqueue_enqueue(taskqueue_thread
, wakelock
->lf_async_task
);
1102 * Re-check all dependant locks and remove edges to locks that we no
1103 * longer block. If 'all' is non-zero, the lock has been removed and
1104 * we must remove all the dependancies, otherwise it has simply been
1105 * reduced but remains active. Any pending locks which have been been
1106 * unblocked are added to 'granted'
1109 lf_update_dependancies(struct lockf
*state
, struct lockf_entry
*lock
, int all
,
1110 struct lockf_entry_list
*granted
)
1112 struct lockf_edge
*e
, *ne
;
1113 struct lockf_entry
*deplock
;
1115 LIST_FOREACH_SAFE(e
, &lock
->lf_inedges
, le_inlink
, ne
) {
1116 deplock
= e
->le_from
;
1117 if (all
|| !lf_blocks(lock
, deplock
)) {
1118 sx_xlock(&lf_owner_graph_lock
);
1120 sx_xunlock(&lf_owner_graph_lock
);
1121 if (LIST_EMPTY(&deplock
->lf_outedges
)) {
1122 lf_wakeup_lock(state
, deplock
);
1123 LIST_INSERT_HEAD(granted
, deplock
, lf_link
);
1130 * Set the start of an existing active lock, updating dependancies and
1131 * adding any newly woken locks to 'granted'.
1134 lf_set_start(struct lockf
*state
, struct lockf_entry
*lock
, off_t new_start
,
1135 struct lockf_entry_list
*granted
)
1138 KASSERT(new_start
>= lock
->lf_start
, ("can't increase lock"));
1139 lock
->lf_start
= new_start
;
1140 LIST_REMOVE(lock
, lf_link
);
1141 lf_insert_lock(state
, lock
);
1142 lf_update_dependancies(state
, lock
, FALSE
, granted
);
1146 * Set the end of an existing active lock, updating dependancies and
1147 * adding any newly woken locks to 'granted'.
1150 lf_set_end(struct lockf
*state
, struct lockf_entry
*lock
, off_t new_end
,
1151 struct lockf_entry_list
*granted
)
1154 KASSERT(new_end
<= lock
->lf_end
, ("can't increase lock"));
1155 lock
->lf_end
= new_end
;
1156 lf_update_dependancies(state
, lock
, FALSE
, granted
);
1160 * Add a lock to the active list, updating or removing any current
1161 * locks owned by the same owner and processing any pending locks that
1162 * become unblocked as a result. This code is also used for unlock
1163 * since the logic for updating existing locks is identical.
1165 * As a result of processing the new lock, we may unblock existing
1166 * pending locks as a result of downgrading/unlocking. We simply
1167 * activate the newly granted locks by looping.
1169 * Since the new lock already has its dependancies set up, we always
1170 * add it to the list (unless its an unlock request). This may
1171 * fragment the lock list in some pathological cases but its probably
1172 * not a real problem.
1175 lf_activate_lock(struct lockf
*state
, struct lockf_entry
*lock
)
1177 struct lockf_entry
*overlap
, *lf
;
1178 struct lockf_entry_list granted
;
1181 LIST_INIT(&granted
);
1182 LIST_INSERT_HEAD(&granted
, lock
, lf_link
);
1184 while (!LIST_EMPTY(&granted
)) {
1185 lock
= LIST_FIRST(&granted
);
1186 LIST_REMOVE(lock
, lf_link
);
1189 * Skip over locks owned by other processes. Handle
1190 * any locks that overlap and are owned by ourselves.
1192 overlap
= LIST_FIRST(&state
->ls_active
);
1194 ovcase
= lf_findoverlap(&overlap
, lock
, SELF
);
1197 if (ovcase
&& (lockf_debug
& 2)) {
1198 printf("lf_setlock: overlap %d", ovcase
);
1199 lf_print("", overlap
);
1205 * 1) overlap == lock
1206 * 2) overlap contains lock
1207 * 3) lock contains overlap
1208 * 4) overlap starts before lock
1209 * 5) overlap ends after lock
1212 case 0: /* no overlap */
1215 case 1: /* overlap == lock */
1217 * We have already setup the
1218 * dependants for the new lock, taking
1219 * into account a possible downgrade
1220 * or unlock. Remove the old lock.
1222 LIST_REMOVE(overlap
, lf_link
);
1223 lf_update_dependancies(state
, overlap
, TRUE
,
1225 lf_free_lock(overlap
);
1228 case 2: /* overlap contains lock */
1230 * Just split the existing lock.
1232 lf_split(state
, overlap
, lock
, &granted
);
1235 case 3: /* lock contains overlap */
1237 * Delete the overlap and advance to
1238 * the next entry in the list.
1240 lf
= LIST_NEXT(overlap
, lf_link
);
1241 LIST_REMOVE(overlap
, lf_link
);
1242 lf_update_dependancies(state
, overlap
, TRUE
,
1244 lf_free_lock(overlap
);
1248 case 4: /* overlap starts before lock */
1250 * Just update the overlap end and
1253 lf_set_end(state
, overlap
, lock
->lf_start
- 1,
1255 overlap
= LIST_NEXT(overlap
, lf_link
);
1258 case 5: /* overlap ends after lock */
1260 * Change the start of overlap and
1263 lf_set_start(state
, overlap
, lock
->lf_end
+ 1,
1270 if (lockf_debug
& 1) {
1271 if (lock
->lf_type
!= F_UNLCK
)
1272 lf_print("lf_activate_lock: activated", lock
);
1274 lf_print("lf_activate_lock: unlocked", lock
);
1275 lf_printlist("lf_activate_lock", lock
);
1277 #endif /* LOCKF_DEBUG */
1278 if (lock
->lf_type
!= F_UNLCK
)
1279 lf_insert_lock(state
, lock
);
1284 * Cancel a pending lock request, either as a result of a signal or a
1285 * cancel request for an async lock.
1288 lf_cancel_lock(struct lockf
*state
, struct lockf_entry
*lock
)
1290 struct lockf_entry_list granted
;
1293 * Note it is theoretically possible that cancelling this lock
1294 * may allow some other pending lock to become
1295 * active. Consider this case:
1297 * Owner Action Result Dependancies
1299 * A: lock [0..0] succeeds
1300 * B: lock [2..2] succeeds
1301 * C: lock [1..2] blocked C->B
1302 * D: lock [0..1] blocked C->B,D->A,D->C
1303 * A: unlock [0..0] C->B,D->C
1307 LIST_REMOVE(lock
, lf_link
);
1310 * Removing out-going edges is simple.
1312 sx_xlock(&lf_owner_graph_lock
);
1313 lf_remove_outgoing(lock
);
1314 sx_xunlock(&lf_owner_graph_lock
);
1317 * Removing in-coming edges may allow some other lock to
1318 * become active - we use lf_update_dependancies to figure
1321 LIST_INIT(&granted
);
1322 lf_update_dependancies(state
, lock
, TRUE
, &granted
);
1326 * Feed any newly active locks to lf_activate_lock.
1328 while (!LIST_EMPTY(&granted
)) {
1329 lock
= LIST_FIRST(&granted
);
1330 LIST_REMOVE(lock
, lf_link
);
1331 lf_activate_lock(state
, lock
);
1336 * Set a byte-range lock.
1339 lf_setlock(struct lockf
*state
, struct lockf_entry
*lock
, struct vnode
*vp
,
1342 struct lockf_entry
*block
;
1343 static char lockstr
[] = "lockf";
1344 int priority
, error
;
1347 if (lockf_debug
& 1)
1348 lf_print("lf_setlock", lock
);
1349 #endif /* LOCKF_DEBUG */
1355 if (lock
->lf_type
== F_WRLCK
)
1357 if (!(lock
->lf_flags
& F_NOINTR
))
1360 * Scan lock list for this file looking for locks that would block us.
1362 while ((block
= lf_getblock(state
, lock
))) {
1364 * Free the structure and return if nonblocking.
1366 if ((lock
->lf_flags
& F_WAIT
) == 0
1367 && lock
->lf_async_task
== NULL
) {
1374 * For flock type locks, we must first remove
1375 * any shared locks that we hold before we sleep
1376 * waiting for an exclusive lock.
1378 if ((lock
->lf_flags
& F_FLOCK
) &&
1379 lock
->lf_type
== F_WRLCK
) {
1380 lock
->lf_type
= F_UNLCK
;
1381 lf_activate_lock(state
, lock
);
1382 lock
->lf_type
= F_WRLCK
;
1386 * We are blocked. Create edges to each blocking lock,
1387 * checking for deadlock using the owner graph. For
1388 * simplicity, we run deadlock detection for all
1389 * locks, posix and otherwise.
1391 sx_xlock(&lf_owner_graph_lock
);
1392 error
= lf_add_outgoing(state
, lock
);
1393 sx_xunlock(&lf_owner_graph_lock
);
1397 if (lockf_debug
& 1)
1398 lf_print("lf_setlock: deadlock", lock
);
1405 * We have added edges to everything that blocks
1406 * us. Sleep until they all go away.
1408 LIST_INSERT_HEAD(&state
->ls_pending
, lock
, lf_link
);
1410 if (lockf_debug
& 1) {
1411 struct lockf_edge
*e
;
1412 LIST_FOREACH(e
, &lock
->lf_outedges
, le_outlink
) {
1413 lf_print("lf_setlock: blocking on", e
->le_to
);
1414 lf_printlist("lf_setlock", e
->le_to
);
1417 #endif /* LOCKF_DEBUG */
1419 if ((lock
->lf_flags
& F_WAIT
) == 0) {
1421 * The caller requested async notification -
1422 * this callback happens when the blocking
1423 * lock is released, allowing the caller to
1424 * make another attempt to take the lock.
1426 *cookiep
= (void *) lock
;
1427 error
= EINPROGRESS
;
1431 error
= sx_sleep(lock
, &state
->ls_lock
, priority
, lockstr
, 0);
1433 * We may have been awakened by a signal and/or by a
1434 * debugger continuing us (in which cases we must
1435 * remove our lock graph edges) and/or by another
1436 * process releasing a lock (in which case our edges
1437 * have already been removed and we have been moved to
1438 * the active list). We may also have been woken by
1439 * lf_purgelocks which we report to the caller as
1440 * EINTR. In that case, lf_purgelocks will have
1441 * removed our lock graph edges.
1443 * Note that it is possible to receive a signal after
1444 * we were successfully woken (and moved to the active
1445 * list) but before we resumed execution. In this
1446 * case, our lf_outedges list will be clear. We
1447 * pretend there was no error.
1449 * Note also, if we have been sleeping long enough, we
1450 * may now have incoming edges from some newer lock
1451 * which is waiting behind us in the queue.
1453 if (lock
->lf_flags
& F_INTR
) {
1458 if (LIST_EMPTY(&lock
->lf_outedges
)) {
1461 lf_cancel_lock(state
, lock
);
1465 if (lockf_debug
& 1) {
1466 lf_print("lf_setlock: granted", lock
);
1472 * It looks like we are going to grant the lock. First add
1473 * edges from any currently pending lock that the new lock
1476 sx_xlock(&lf_owner_graph_lock
);
1477 error
= lf_add_incoming(state
, lock
);
1478 sx_xunlock(&lf_owner_graph_lock
);
1481 if (lockf_debug
& 1)
1482 lf_print("lf_setlock: deadlock", lock
);
1489 * No blocks!! Add the lock. Note that we will
1490 * downgrade or upgrade any overlapping locks this
1491 * process already owns.
1493 lf_activate_lock(state
, lock
);
1500 * Remove a byte-range lock on an inode.
1502 * Generally, find the lock (or an overlap to that lock)
1503 * and remove it (or shrink it), then wakeup anyone we can.
1506 lf_clearlock(struct lockf
*state
, struct lockf_entry
*unlock
)
1508 struct lockf_entry
*overlap
;
1510 overlap
= LIST_FIRST(&state
->ls_active
);
1512 if (overlap
== NOLOCKF
)
1515 if (unlock
->lf_type
!= F_UNLCK
)
1516 panic("lf_clearlock: bad type");
1517 if (lockf_debug
& 1)
1518 lf_print("lf_clearlock", unlock
);
1519 #endif /* LOCKF_DEBUG */
1521 lf_activate_lock(state
, unlock
);
1527 * Check whether there is a blocking lock, and if so return its
1531 lf_getlock(struct lockf
*state
, struct lockf_entry
*lock
, struct flock
*fl
)
1533 struct lockf_entry
*block
;
1536 if (lockf_debug
& 1)
1537 lf_print("lf_getlock", lock
);
1538 #endif /* LOCKF_DEBUG */
1540 if ((block
= lf_getblock(state
, lock
))) {
1541 fl
->l_type
= block
->lf_type
;
1542 fl
->l_whence
= SEEK_SET
;
1543 fl
->l_start
= block
->lf_start
;
1544 if (block
->lf_end
== OFF_MAX
)
1547 fl
->l_len
= block
->lf_end
- block
->lf_start
+ 1;
1548 fl
->l_pid
= block
->lf_owner
->lo_pid
;
1549 fl
->l_sysid
= block
->lf_owner
->lo_sysid
;
1551 fl
->l_type
= F_UNLCK
;
1557 * Cancel an async lock request.
1560 lf_cancel(struct lockf
*state
, struct lockf_entry
*lock
, void *cookie
)
1562 struct lockf_entry
*reallock
;
1565 * We need to match this request with an existing lock
1568 LIST_FOREACH(reallock
, &state
->ls_pending
, lf_link
) {
1569 if ((void *) reallock
== cookie
) {
1571 * Double-check that this lock looks right
1572 * (maybe use a rolling ID for the cancel
1575 if (!(reallock
->lf_vnode
== lock
->lf_vnode
1576 && reallock
->lf_start
== lock
->lf_start
1577 && reallock
->lf_end
== lock
->lf_end
)) {
1582 * Make sure this lock was async and then just
1583 * remove it from its wait lists.
1585 if (!reallock
->lf_async_task
) {
1590 * Note that since any other thread must take
1591 * state->ls_lock before it can possibly
1592 * trigger the async callback, we are safe
1593 * from a race with lf_wakeup_lock, i.e. we
1594 * can free the lock (actually our caller does
1597 lf_cancel_lock(state
, reallock
);
1603 * We didn't find a matching lock - not much we can do here.
1609 * Walk the list of locks for an inode and
1610 * return the first blocking lock.
1612 static struct lockf_entry
*
1613 lf_getblock(struct lockf
*state
, struct lockf_entry
*lock
)
1615 struct lockf_entry
*overlap
;
1617 LIST_FOREACH(overlap
, &state
->ls_active
, lf_link
) {
1619 * We may assume that the active list is sorted by
1622 if (overlap
->lf_start
> lock
->lf_end
)
1624 if (!lf_blocks(lock
, overlap
))
1632 * Walk the list of locks for an inode to find an overlapping lock (if
1633 * any) and return a classification of that overlap.
1636 * *overlap The place in the lock list to start looking
1637 * lock The lock which is being tested
1638 * type Pass 'SELF' to test only locks with the same
1639 * owner as lock, or 'OTHER' to test only locks
1640 * with a different owner
1642 * Returns one of six values:
1644 * 1) overlap == lock
1645 * 2) overlap contains lock
1646 * 3) lock contains overlap
1647 * 4) overlap starts before lock
1648 * 5) overlap ends after lock
1650 * If there is an overlapping lock, '*overlap' is set to point at the
1653 * NOTE: this returns only the FIRST overlapping lock. There
1654 * may be more than one.
1657 lf_findoverlap(struct lockf_entry
**overlap
, struct lockf_entry
*lock
, int type
)
1659 struct lockf_entry
*lf
;
1663 if ((*overlap
) == NOLOCKF
) {
1667 if (lockf_debug
& 2)
1668 lf_print("lf_findoverlap: looking for overlap in", lock
);
1669 #endif /* LOCKF_DEBUG */
1670 start
= lock
->lf_start
;
1675 if (lf
->lf_start
> end
)
1677 if (((type
& SELF
) && lf
->lf_owner
!= lock
->lf_owner
) ||
1678 ((type
& OTHERS
) && lf
->lf_owner
== lock
->lf_owner
)) {
1679 *overlap
= LIST_NEXT(lf
, lf_link
);
1683 if (lockf_debug
& 2)
1684 lf_print("\tchecking", lf
);
1685 #endif /* LOCKF_DEBUG */
1687 * OK, check for overlap
1691 * 1) overlap == lock
1692 * 2) overlap contains lock
1693 * 3) lock contains overlap
1694 * 4) overlap starts before lock
1695 * 5) overlap ends after lock
1697 if (start
> lf
->lf_end
) {
1700 if (lockf_debug
& 2)
1701 printf("no overlap\n");
1702 #endif /* LOCKF_DEBUG */
1703 *overlap
= LIST_NEXT(lf
, lf_link
);
1706 if (lf
->lf_start
== start
&& lf
->lf_end
== end
) {
1709 if (lockf_debug
& 2)
1710 printf("overlap == lock\n");
1711 #endif /* LOCKF_DEBUG */
1715 if (lf
->lf_start
<= start
&& lf
->lf_end
>= end
) {
1718 if (lockf_debug
& 2)
1719 printf("overlap contains lock\n");
1720 #endif /* LOCKF_DEBUG */
1724 if (start
<= lf
->lf_start
&& end
>= lf
->lf_end
) {
1727 if (lockf_debug
& 2)
1728 printf("lock contains overlap\n");
1729 #endif /* LOCKF_DEBUG */
1733 if (lf
->lf_start
< start
&& lf
->lf_end
>= start
) {
1736 if (lockf_debug
& 2)
1737 printf("overlap starts before lock\n");
1738 #endif /* LOCKF_DEBUG */
1742 if (lf
->lf_start
> start
&& lf
->lf_end
> end
) {
1745 if (lockf_debug
& 2)
1746 printf("overlap ends after lock\n");
1747 #endif /* LOCKF_DEBUG */
1751 panic("lf_findoverlap: default");
1757 * Split an the existing 'lock1', based on the extent of the lock
1758 * described by 'lock2'. The existing lock should cover 'lock2'
1761 * Any pending locks which have been been unblocked are added to
1765 lf_split(struct lockf
*state
, struct lockf_entry
*lock1
,
1766 struct lockf_entry
*lock2
, struct lockf_entry_list
*granted
)
1768 struct lockf_entry
*splitlock
;
1771 if (lockf_debug
& 2) {
1772 lf_print("lf_split", lock1
);
1773 lf_print("splitting from", lock2
);
1775 #endif /* LOCKF_DEBUG */
1777 * Check to see if we don't need to split at all.
1779 if (lock1
->lf_start
== lock2
->lf_start
) {
1780 lf_set_start(state
, lock1
, lock2
->lf_end
+ 1, granted
);
1783 if (lock1
->lf_end
== lock2
->lf_end
) {
1784 lf_set_end(state
, lock1
, lock2
->lf_start
- 1, granted
);
1788 * Make a new lock consisting of the last part of
1789 * the encompassing lock.
1791 splitlock
= lf_alloc_lock(lock1
->lf_owner
);
1792 memcpy(splitlock
, lock1
, sizeof *splitlock
);
1793 if (splitlock
->lf_flags
& F_REMOTE
)
1794 vref(splitlock
->lf_vnode
);
1797 * This cannot cause a deadlock since any edges we would add
1798 * to splitlock already exist in lock1. We must be sure to add
1799 * necessary dependancies to splitlock before we reduce lock1
1800 * otherwise we may accidentally grant a pending lock that
1801 * was blocked by the tail end of lock1.
1803 splitlock
->lf_start
= lock2
->lf_end
+ 1;
1804 LIST_INIT(&splitlock
->lf_outedges
);
1805 LIST_INIT(&splitlock
->lf_inedges
);
1806 sx_xlock(&lf_owner_graph_lock
);
1807 lf_add_incoming(state
, splitlock
);
1808 sx_xunlock(&lf_owner_graph_lock
);
1810 lf_set_end(state
, lock1
, lock2
->lf_start
- 1, granted
);
1813 * OK, now link it in
1815 lf_insert_lock(state
, splitlock
);
1819 STAILQ_ENTRY(lockdesc
) link
;
1823 STAILQ_HEAD(lockdesclist
, lockdesc
);
1826 lf_iteratelocks_sysid(int sysid
, lf_iterator
*fn
, void *arg
)
1829 struct lockf_entry
*lf
;
1830 struct lockdesc
*ldesc
;
1831 struct lockdesclist locks
;
1835 * In order to keep the locking simple, we iterate over the
1836 * active lock lists to build a list of locks that need
1837 * releasing. We then call the iterator for each one in turn.
1839 * We take an extra reference to the vnode for the duration to
1840 * make sure it doesn't go away before we are finished.
1842 STAILQ_INIT(&locks
);
1843 sx_xlock(&lf_lock_states_lock
);
1844 LIST_FOREACH(ls
, &lf_lock_states
, ls_link
) {
1845 sx_xlock(&ls
->ls_lock
);
1846 LIST_FOREACH(lf
, &ls
->ls_active
, lf_link
) {
1847 if (lf
->lf_owner
->lo_sysid
!= sysid
)
1850 ldesc
= malloc(sizeof(struct lockdesc
), M_LOCKF
,
1852 ldesc
->vp
= lf
->lf_vnode
;
1854 ldesc
->fl
.l_start
= lf
->lf_start
;
1855 if (lf
->lf_end
== OFF_MAX
)
1856 ldesc
->fl
.l_len
= 0;
1859 lf
->lf_end
- lf
->lf_start
+ 1;
1860 ldesc
->fl
.l_whence
= SEEK_SET
;
1861 ldesc
->fl
.l_type
= F_UNLCK
;
1862 ldesc
->fl
.l_pid
= lf
->lf_owner
->lo_pid
;
1863 ldesc
->fl
.l_sysid
= sysid
;
1864 STAILQ_INSERT_TAIL(&locks
, ldesc
, link
);
1866 sx_xunlock(&ls
->ls_lock
);
1868 sx_xunlock(&lf_lock_states_lock
);
1871 * Call the iterator function for each lock in turn. If the
1872 * iterator returns an error code, just free the rest of the
1873 * lockdesc structures.
1876 while ((ldesc
= STAILQ_FIRST(&locks
)) != NULL
) {
1877 STAILQ_REMOVE_HEAD(&locks
, link
);
1879 error
= fn(ldesc
->vp
, &ldesc
->fl
, arg
);
1881 free(ldesc
, M_LOCKF
);
1888 lf_iteratelocks_vnode(struct vnode
*vp
, lf_iterator
*fn
, void *arg
)
1891 struct lockf_entry
*lf
;
1892 struct lockdesc
*ldesc
;
1893 struct lockdesclist locks
;
1897 * In order to keep the locking simple, we iterate over the
1898 * active lock lists to build a list of locks that need
1899 * releasing. We then call the iterator for each one in turn.
1901 * We take an extra reference to the vnode for the duration to
1902 * make sure it doesn't go away before we are finished.
1904 STAILQ_INIT(&locks
);
1909 sx_xlock(&ls
->ls_lock
);
1910 LIST_FOREACH(lf
, &ls
->ls_active
, lf_link
) {
1911 ldesc
= malloc(sizeof(struct lockdesc
), M_LOCKF
,
1913 ldesc
->vp
= lf
->lf_vnode
;
1915 ldesc
->fl
.l_start
= lf
->lf_start
;
1916 if (lf
->lf_end
== OFF_MAX
)
1917 ldesc
->fl
.l_len
= 0;
1920 lf
->lf_end
- lf
->lf_start
+ 1;
1921 ldesc
->fl
.l_whence
= SEEK_SET
;
1922 ldesc
->fl
.l_type
= F_UNLCK
;
1923 ldesc
->fl
.l_pid
= lf
->lf_owner
->lo_pid
;
1924 ldesc
->fl
.l_sysid
= lf
->lf_owner
->lo_sysid
;
1925 STAILQ_INSERT_TAIL(&locks
, ldesc
, link
);
1927 sx_xunlock(&ls
->ls_lock
);
1930 * Call the iterator function for each lock in turn. If the
1931 * iterator returns an error code, just free the rest of the
1932 * lockdesc structures.
1935 while ((ldesc
= STAILQ_FIRST(&locks
)) != NULL
) {
1936 STAILQ_REMOVE_HEAD(&locks
, link
);
1938 error
= fn(ldesc
->vp
, &ldesc
->fl
, arg
);
1940 free(ldesc
, M_LOCKF
);
1947 lf_clearremotesys_iterator(struct vnode
*vp
, struct flock
*fl
, void *arg
)
1950 VOP_ADVLOCK(vp
, 0, F_UNLCK
, fl
, F_REMOTE
);
1955 lf_clearremotesys(int sysid
)
1958 KASSERT(sysid
!= 0, ("Can't clear local locks with F_UNLCKSYS"));
1959 lf_iteratelocks_sysid(sysid
, lf_clearremotesys_iterator
, NULL
);
1963 lf_countlocks(int sysid
)
1966 struct lock_owner
*lo
;
1970 sx_xlock(&lf_lock_owners_lock
);
1971 for (i
= 0; i
< LOCK_OWNER_HASH_SIZE
; i
++)
1972 LIST_FOREACH(lo
, &lf_lock_owners
[i
], lo_link
)
1973 if (lo
->lo_sysid
== sysid
)
1974 count
+= lo
->lo_refs
;
1975 sx_xunlock(&lf_lock_owners_lock
);
1983 * Return non-zero if y is reachable from x using a brute force
1984 * search. If reachable and path is non-null, return the route taken
1988 graph_reaches(struct owner_vertex
*x
, struct owner_vertex
*y
,
1989 struct owner_vertex_list
*path
)
1991 struct owner_edge
*e
;
1995 TAILQ_INSERT_HEAD(path
, x
, v_link
);
1999 LIST_FOREACH(e
, &x
->v_outedges
, e_outlink
) {
2000 if (graph_reaches(e
->e_to
, y
, path
)) {
2002 TAILQ_INSERT_HEAD(path
, x
, v_link
);
2010 * Perform consistency checks on the graph. Make sure the values of
2011 * v_order are correct. If checkorder is non-zero, check no vertex can
2012 * reach any other vertex with a smaller order.
2015 graph_check(struct owner_graph
*g
, int checkorder
)
2019 for (i
= 0; i
< g
->g_size
; i
++) {
2020 if (!g
->g_vertices
[i
]->v_owner
)
2022 KASSERT(g
->g_vertices
[i
]->v_order
== i
,
2023 ("lock graph vertices disordered"));
2025 for (j
= 0; j
< i
; j
++) {
2026 if (!g
->g_vertices
[j
]->v_owner
)
2028 KASSERT(!graph_reaches(g
->g_vertices
[i
],
2029 g
->g_vertices
[j
], NULL
),
2030 ("lock graph vertices disordered"));
2037 graph_print_vertices(struct owner_vertex_list
*set
)
2039 struct owner_vertex
*v
;
2042 TAILQ_FOREACH(v
, set
, v_link
) {
2043 printf("%d:", v
->v_order
);
2044 lf_print_owner(v
->v_owner
);
2045 if (TAILQ_NEXT(v
, v_link
))
2054 * Calculate the sub-set of vertices v from the affected region [y..x]
2055 * where v is reachable from y. Return -1 if a loop was detected
2056 * (i.e. x is reachable from y, otherwise the number of vertices in
2060 graph_delta_forward(struct owner_graph
*g
, struct owner_vertex
*x
,
2061 struct owner_vertex
*y
, struct owner_vertex_list
*delta
)
2064 struct owner_vertex
*v
;
2065 struct owner_edge
*e
;
2069 * We start with a set containing just y. Then for each vertex
2070 * v in the set so far unprocessed, we add each vertex that v
2071 * has an out-edge to and that is within the affected region
2072 * [y..x]. If we see the vertex x on our travels, stop
2076 TAILQ_INSERT_TAIL(delta
, y
, v_link
);
2081 LIST_FOREACH(e
, &v
->v_outedges
, e_outlink
) {
2084 if (e
->e_to
->v_order
< x
->v_order
2085 && e
->e_to
->v_gen
!= gen
) {
2086 e
->e_to
->v_gen
= gen
;
2087 TAILQ_INSERT_TAIL(delta
, e
->e_to
, v_link
);
2091 v
= TAILQ_NEXT(v
, v_link
);
2098 * Calculate the sub-set of vertices v from the affected region [y..x]
2099 * where v reaches x. Return the number of vertices in this subset.
2102 graph_delta_backward(struct owner_graph
*g
, struct owner_vertex
*x
,
2103 struct owner_vertex
*y
, struct owner_vertex_list
*delta
)
2106 struct owner_vertex
*v
;
2107 struct owner_edge
*e
;
2111 * We start with a set containing just x. Then for each vertex
2112 * v in the set so far unprocessed, we add each vertex that v
2113 * has an in-edge from and that is within the affected region
2117 TAILQ_INSERT_TAIL(delta
, x
, v_link
);
2122 LIST_FOREACH(e
, &v
->v_inedges
, e_inlink
) {
2123 if (e
->e_from
->v_order
> y
->v_order
2124 && e
->e_from
->v_gen
!= gen
) {
2125 e
->e_from
->v_gen
= gen
;
2126 TAILQ_INSERT_HEAD(delta
, e
->e_from
, v_link
);
2130 v
= TAILQ_PREV(v
, owner_vertex_list
, v_link
);
2137 graph_add_indices(int *indices
, int n
, struct owner_vertex_list
*set
)
2139 struct owner_vertex
*v
;
2142 TAILQ_FOREACH(v
, set
, v_link
) {
2144 i
> 0 && indices
[i
- 1] > v
->v_order
; i
--)
2146 for (j
= n
- 1; j
>= i
; j
--)
2147 indices
[j
+ 1] = indices
[j
];
2148 indices
[i
] = v
->v_order
;
2156 graph_assign_indices(struct owner_graph
*g
, int *indices
, int nextunused
,
2157 struct owner_vertex_list
*set
)
2159 struct owner_vertex
*v
, *vlowest
;
2161 while (!TAILQ_EMPTY(set
)) {
2163 TAILQ_FOREACH(v
, set
, v_link
) {
2164 if (!vlowest
|| v
->v_order
< vlowest
->v_order
)
2167 TAILQ_REMOVE(set
, vlowest
, v_link
);
2168 vlowest
->v_order
= indices
[nextunused
];
2169 g
->g_vertices
[vlowest
->v_order
] = vlowest
;
2173 return (nextunused
);
2177 graph_add_edge(struct owner_graph
*g
, struct owner_vertex
*x
,
2178 struct owner_vertex
*y
)
2180 struct owner_edge
*e
;
2181 struct owner_vertex_list deltaF
, deltaB
;
2182 int nF
, nB
, n
, vi
, i
;
2185 sx_assert(&lf_owner_graph_lock
, SX_XLOCKED
);
2187 LIST_FOREACH(e
, &x
->v_outedges
, e_outlink
) {
2195 if (lockf_debug
& 8) {
2196 printf("adding edge %d:", x
->v_order
);
2197 lf_print_owner(x
->v_owner
);
2198 printf(" -> %d:", y
->v_order
);
2199 lf_print_owner(y
->v_owner
);
2203 if (y
->v_order
< x
->v_order
) {
2205 * The new edge violates the order. First find the set
2206 * of affected vertices reachable from y (deltaF) and
2207 * the set of affect vertices affected that reach x
2208 * (deltaB), using the graph generation number to
2209 * detect whether we have visited a given vertex
2210 * already. We re-order the graph so that each vertex
2211 * in deltaB appears before each vertex in deltaF.
2213 * If x is a member of deltaF, then the new edge would
2214 * create a cycle. Otherwise, we may assume that
2215 * deltaF and deltaB are disjoint.
2218 if (g
->g_gen
== 0) {
2222 for (vi
= 0; vi
< g
->g_size
; vi
++) {
2223 g
->g_vertices
[vi
]->v_gen
= 0;
2227 nF
= graph_delta_forward(g
, x
, y
, &deltaF
);
2230 if (lockf_debug
& 8) {
2231 struct owner_vertex_list path
;
2232 printf("deadlock: ");
2234 graph_reaches(y
, x
, &path
);
2235 graph_print_vertices(&path
);
2242 if (lockf_debug
& 8) {
2243 printf("re-ordering graph vertices\n");
2244 printf("deltaF = ");
2245 graph_print_vertices(&deltaF
);
2249 nB
= graph_delta_backward(g
, x
, y
, &deltaB
);
2252 if (lockf_debug
& 8) {
2253 printf("deltaB = ");
2254 graph_print_vertices(&deltaB
);
2259 * We first build a set of vertex indices (vertex
2260 * order values) that we may use, then we re-assign
2261 * orders first to those vertices in deltaB, then to
2262 * deltaF. Note that the contents of deltaF and deltaB
2263 * may be partially disordered - we perform an
2264 * insertion sort while building our index set.
2266 indices
= g
->g_indexbuf
;
2267 n
= graph_add_indices(indices
, 0, &deltaF
);
2268 graph_add_indices(indices
, n
, &deltaB
);
2271 * We must also be sure to maintain the relative
2272 * ordering of deltaF and deltaB when re-assigning
2273 * vertices. We do this by iteratively removing the
2274 * lowest ordered element from the set and assigning
2275 * it the next value from our new ordering.
2277 i
= graph_assign_indices(g
, indices
, 0, &deltaB
);
2278 graph_assign_indices(g
, indices
, i
, &deltaF
);
2281 if (lockf_debug
& 8) {
2282 struct owner_vertex_list set
;
2284 for (i
= 0; i
< nB
+ nF
; i
++)
2285 TAILQ_INSERT_TAIL(&set
,
2286 g
->g_vertices
[indices
[i
]], v_link
);
2287 printf("new ordering = ");
2288 graph_print_vertices(&set
);
2293 KASSERT(x
->v_order
< y
->v_order
, ("Failed to re-order graph"));
2296 if (lockf_debug
& 8) {
2297 graph_check(g
, TRUE
);
2301 e
= malloc(sizeof(struct owner_edge
), M_LOCKF
, M_WAITOK
);
2303 LIST_INSERT_HEAD(&x
->v_outedges
, e
, e_outlink
);
2304 LIST_INSERT_HEAD(&y
->v_inedges
, e
, e_inlink
);
2313 * Remove an edge x->y from the graph.
2316 graph_remove_edge(struct owner_graph
*g
, struct owner_vertex
*x
,
2317 struct owner_vertex
*y
)
2319 struct owner_edge
*e
;
2321 sx_assert(&lf_owner_graph_lock
, SX_XLOCKED
);
2323 LIST_FOREACH(e
, &x
->v_outedges
, e_outlink
) {
2327 KASSERT(e
, ("Removing non-existent edge from deadlock graph"));
2330 if (e
->e_refs
== 0) {
2332 if (lockf_debug
& 8) {
2333 printf("removing edge %d:", x
->v_order
);
2334 lf_print_owner(x
->v_owner
);
2335 printf(" -> %d:", y
->v_order
);
2336 lf_print_owner(y
->v_owner
);
2340 LIST_REMOVE(e
, e_outlink
);
2341 LIST_REMOVE(e
, e_inlink
);
2347 * Allocate a vertex from the free list. Return ENOMEM if there are
2350 static struct owner_vertex
*
2351 graph_alloc_vertex(struct owner_graph
*g
, struct lock_owner
*lo
)
2353 struct owner_vertex
*v
;
2355 sx_assert(&lf_owner_graph_lock
, SX_XLOCKED
);
2357 v
= malloc(sizeof(struct owner_vertex
), M_LOCKF
, M_WAITOK
);
2358 if (g
->g_size
== g
->g_space
) {
2359 g
->g_vertices
= realloc(g
->g_vertices
,
2360 2 * g
->g_space
* sizeof(struct owner_vertex
*),
2362 free(g
->g_indexbuf
, M_LOCKF
);
2363 g
->g_indexbuf
= malloc(2 * g
->g_space
* sizeof(int),
2365 g
->g_space
= 2 * g
->g_space
;
2367 v
->v_order
= g
->g_size
;
2368 v
->v_gen
= g
->g_gen
;
2369 g
->g_vertices
[g
->g_size
] = v
;
2372 LIST_INIT(&v
->v_outedges
);
2373 LIST_INIT(&v
->v_inedges
);
2380 graph_free_vertex(struct owner_graph
*g
, struct owner_vertex
*v
)
2382 struct owner_vertex
*w
;
2385 sx_assert(&lf_owner_graph_lock
, SX_XLOCKED
);
2387 KASSERT(LIST_EMPTY(&v
->v_outedges
), ("Freeing vertex with edges"));
2388 KASSERT(LIST_EMPTY(&v
->v_inedges
), ("Freeing vertex with edges"));
2391 * Remove from the graph's array and close up the gap,
2392 * renumbering the other vertices.
2394 for (i
= v
->v_order
+ 1; i
< g
->g_size
; i
++) {
2395 w
= g
->g_vertices
[i
];
2397 g
->g_vertices
[i
- 1] = w
;
2404 static struct owner_graph
*
2405 graph_init(struct owner_graph
*g
)
2408 g
->g_vertices
= malloc(10 * sizeof(struct owner_vertex
*),
2412 g
->g_indexbuf
= malloc(g
->g_space
* sizeof(int), M_LOCKF
, M_WAITOK
);
2420 * Print description of a lock owner
2423 lf_print_owner(struct lock_owner
*lo
)
2426 if (lo
->lo_flags
& F_REMOTE
) {
2427 printf("remote pid %d, system %d",
2428 lo
->lo_pid
, lo
->lo_sysid
);
2429 } else if (lo
->lo_flags
& F_FLOCK
) {
2430 printf("file %p", lo
->lo_id
);
2432 printf("local pid %d", lo
->lo_pid
);
2440 lf_print(char *tag
, struct lockf_entry
*lock
)
2443 printf("%s: lock %p for ", tag
, (void *)lock
);
2444 lf_print_owner(lock
->lf_owner
);
2445 if (lock
->lf_inode
!= (struct inode
*)0)
2446 printf(" in ino %ju on dev <%s>,",
2447 (uintmax_t)lock
->lf_inode
->i_number
,
2448 devtoname(lock
->lf_inode
->i_dev
));
2449 printf(" %s, start %jd, end ",
2450 lock
->lf_type
== F_RDLCK
? "shared" :
2451 lock
->lf_type
== F_WRLCK
? "exclusive" :
2452 lock
->lf_type
== F_UNLCK
? "unlock" : "unknown",
2453 (intmax_t)lock
->lf_start
);
2454 if (lock
->lf_end
== OFF_MAX
)
2457 printf("%jd", (intmax_t)lock
->lf_end
);
2458 if (!LIST_EMPTY(&lock
->lf_outedges
))
2459 printf(" block %p\n",
2460 (void *)LIST_FIRST(&lock
->lf_outedges
)->le_to
);
2466 lf_printlist(char *tag
, struct lockf_entry
*lock
)
2468 struct lockf_entry
*lf
, *blk
;
2469 struct lockf_edge
*e
;
2471 if (lock
->lf_inode
== (struct inode
*)0)
2474 printf("%s: Lock list for ino %ju on dev <%s>:\n",
2475 tag
, (uintmax_t)lock
->lf_inode
->i_number
,
2476 devtoname(lock
->lf_inode
->i_dev
));
2477 LIST_FOREACH(lf
, &lock
->lf_vnode
->v_lockf
->ls_active
, lf_link
) {
2478 printf("\tlock %p for ",(void *)lf
);
2479 lf_print_owner(lock
->lf_owner
);
2480 printf(", %s, start %jd, end %jd",
2481 lf
->lf_type
== F_RDLCK
? "shared" :
2482 lf
->lf_type
== F_WRLCK
? "exclusive" :
2483 lf
->lf_type
== F_UNLCK
? "unlock" :
2484 "unknown", (intmax_t)lf
->lf_start
, (intmax_t)lf
->lf_end
);
2485 LIST_FOREACH(e
, &lf
->lf_outedges
, le_outlink
) {
2487 printf("\n\t\tlock request %p for ", (void *)blk
);
2488 lf_print_owner(blk
->lf_owner
);
2489 printf(", %s, start %jd, end %jd",
2490 blk
->lf_type
== F_RDLCK
? "shared" :
2491 blk
->lf_type
== F_WRLCK
? "exclusive" :
2492 blk
->lf_type
== F_UNLCK
? "unlock" :
2493 "unknown", (intmax_t)blk
->lf_start
,
2494 (intmax_t)blk
->lf_end
);
2495 if (!LIST_EMPTY(&blk
->lf_inedges
))
2496 panic("lf_printlist: bad list");
2501 #endif /* LOCKF_DEBUG */