mtree/BSD.root.dist: Use spaces.
[dragonfly.git] / sys / kern / kern_lockf.c
blob0f7700822cc148b6fde06077fa76773d378dd3fd
1 /*
2 * Copyright (c) 2004 Joerg Sonnenberger <joerg@bec.de>. All rights reserved.
3 * Copyright (c) 2006-2018 Matthew Dillon <dillon@backplane.com>. All rights reserved.
5 * Copyright (c) 1982, 1986, 1989, 1993
6 * The Regents of the University of California. All rights reserved.
8 * This code is derived from software contributed to Berkeley by
9 * Scooter Morris at Genentech Inc.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
35 * @(#)ufs_lockf.c 8.3 (Berkeley) 1/6/94
36 * $FreeBSD: src/sys/kern/kern_lockf.c,v 1.25 1999/11/16 16:28:56 phk Exp $
39 #include "opt_debug_lockf.h"
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/kernel.h>
44 #include <sys/lock.h>
45 #include <sys/proc.h>
46 #include <sys/unistd.h>
47 #include <sys/vnode.h>
48 #include <sys/malloc.h>
49 #include <sys/fcntl.h>
50 #include <sys/resourcevar.h>
52 #include <sys/lockf.h>
53 #include <machine/limits.h> /* for LLONG_MAX */
54 #include <machine/stdarg.h>
56 #include <sys/spinlock2.h>
58 struct lf_pcpu {
59 struct lockf_range *free1;
60 struct lockf_range *free2;
61 } __cachealign;
63 static struct lf_pcpu *lf_pcpu_array;
65 #ifdef LOCKF_DEBUG
66 int lf_print_ranges = 0;
68 static void _lf_print_lock(const struct lockf *);
69 static void _lf_printf(const char *, ...) __printflike(1, 2);
71 #define lf_print_lock(lock) if (lf_print_ranges) _lf_print_lock(lock)
72 #define lf_printf(ctl, args...) if (lf_print_ranges) _lf_printf(ctl, args)
73 #else
74 #define lf_print_lock(lock)
75 #define lf_printf(ctl, args...)
76 #endif
78 static MALLOC_DEFINE(M_LOCKF, "lockf", "Byte-range locking structures");
80 static void lf_wakeup(struct lockf *, off_t, off_t);
81 static struct lockf_range *lf_alloc_range(void);
82 static void lf_create_range(struct lockf_range *, struct proc *, int, int,
83 off_t, off_t);
84 static void lf_insert(struct lockf_range_list *list,
85 struct lockf_range *elm,
86 struct lockf_range *insert_point);
87 static void lf_destroy_range(struct lockf_range *);
89 static int lf_setlock(struct lockf *, struct proc *, int, int,
90 off_t, off_t);
91 static int lf_getlock(struct flock *, struct lockf *, struct proc *,
92 int, int, off_t, off_t);
94 static int lf_count_change(struct proc *, int);
97 * Return TRUE (non-zero) if the type and posix flags match.
99 static __inline
101 lf_match(struct lockf_range *range, int type, int flags)
103 if (range->lf_type != type)
104 return(0);
105 if ((range->lf_flags ^ flags) & F_POSIX)
106 return(0);
107 return(1);
111 * Check whether range and [start, end] overlap.
113 static __inline
115 lf_overlap(const struct lockf_range *range, off_t start, off_t end)
117 if (range->lf_start >= start && range->lf_start <= end)
118 return(1);
119 else if (start >= range->lf_start && start <= range->lf_end)
120 return(1);
121 else
122 return(0);
127 * Change the POSIX lock accounting for the given process.
129 void
130 lf_count_adjust(struct proc *p, int increase)
132 struct uidinfo *uip;
133 struct uidcount *pup;
134 int n;
136 KKASSERT(p != NULL);
138 uip = p->p_ucred->cr_uidinfo;
139 pup = &uip->ui_pcpu[mycpuid];
141 if (increase) {
142 for (n = 0; n < ncpus; ++n)
143 pup->pu_posixlocks += p->p_uidpcpu[n].pu_posixlocks;
144 } else {
145 for (n = 0; n < ncpus; ++n)
146 pup->pu_posixlocks -= p->p_uidpcpu[n].pu_posixlocks;
149 if (pup->pu_posixlocks < -PUP_LIMIT ||
150 pup->pu_posixlocks > PUP_LIMIT) {
151 atomic_add_int(&uip->ui_posixlocks, pup->pu_posixlocks);
152 pup->pu_posixlocks = 0;
156 static int
157 lf_count_change(struct proc *owner, int diff)
159 struct uidinfo *uip;
160 int max, ret;
162 /* we might actually not have a process context */
163 if (owner == NULL)
164 return(0);
166 uip = owner->p_ucred->cr_uidinfo;
168 max = MIN(owner->p_rlimit[RLIMIT_POSIXLOCKS].rlim_cur,
169 maxposixlocksperuid);
171 if (diff > 0 && owner->p_ucred->cr_uid != 0 && max != -1 &&
172 uip->ui_posixlocks >= max ) {
173 ret = 1;
174 } else {
175 struct uidcount *pup;
176 int cpu = mycpuid;
178 pup = &uip->ui_pcpu[cpu];
179 pup->pu_posixlocks += diff;
180 if (pup->pu_posixlocks < -PUP_LIMIT ||
181 pup->pu_posixlocks > PUP_LIMIT) {
182 atomic_add_int(&uip->ui_posixlocks, pup->pu_posixlocks);
183 pup->pu_posixlocks = 0;
185 owner->p_uidpcpu[cpu].pu_posixlocks += diff;
186 ret = 0;
188 return ret;
192 * Advisory record locking support
195 lf_advlock(struct vop_advlock_args *ap, struct lockf *lock, u_quad_t size)
197 struct flock *fl = ap->a_fl;
198 struct proc *owner;
199 off_t start, end;
200 int type, flags, error;
201 lwkt_token_t token;
204 * Convert the flock structure into a start and end.
206 switch (fl->l_whence) {
207 case SEEK_SET:
208 case SEEK_CUR:
210 * Caller is responsible for adding any necessary offset
211 * when SEEK_CUR is used.
213 start = fl->l_start;
214 break;
216 case SEEK_END:
217 start = size + fl->l_start;
218 break;
220 default:
221 return(EINVAL);
224 flags = ap->a_flags;
225 if (start < 0)
226 return(EINVAL);
227 if (fl->l_len == 0) {
228 flags |= F_NOEND;
229 end = LLONG_MAX;
230 } else if (fl->l_len < 0) {
231 return(EINVAL);
232 } else {
233 end = start + fl->l_len - 1;
234 if (end < start)
235 return(EINVAL);
238 type = fl->l_type;
240 * This isn't really correct for flock-style locks,
241 * but the current handling is somewhat broken anyway.
243 owner = (struct proc *)ap->a_id;
246 * Do the requested operation.
248 token = lwkt_getpooltoken(lock);
250 if (lock->init_done == 0) {
251 TAILQ_INIT(&lock->lf_range);
252 TAILQ_INIT(&lock->lf_blocked);
253 lock->init_done = 1;
256 switch(ap->a_op) {
257 case F_SETLK:
259 * NOTE: It is possible for both lf_range and lf_blocked to
260 * be empty if we block and get woken up, but another process
261 * then gets in and issues an unlock. So VMAYHAVELOCKS must
262 * be set after the lf_setlock() operation completes rather
263 * then before.
265 error = lf_setlock(lock, owner, type, flags, start, end);
266 if ((ap->a_vp->v_flag & VMAYHAVELOCKS) == 0)
267 vsetflags(ap->a_vp, VMAYHAVELOCKS);
268 break;
270 case F_UNLCK:
271 error = lf_setlock(lock, owner, type, flags, start, end);
272 #if 0
274 * XXX REMOVED. don't bother doing this in the critical path.
275 * close() overhead is minimal.
277 if (TAILQ_EMPTY(&lock->lf_range) &&
278 TAILQ_EMPTY(&lock->lf_blocked)) {
279 vclrflags(ap->a_vp, VMAYHAVELOCKS);
281 #endif
282 break;
284 case F_GETLK:
285 error = lf_getlock(fl, lock, owner, type, flags, start, end);
286 break;
288 default:
289 error = EINVAL;
290 break;
292 lwkt_reltoken(token);
293 return(error);
296 static int
297 lf_setlock(struct lockf *lock, struct proc *owner, int type, int flags,
298 off_t start, off_t end)
300 struct lockf_range *range;
301 struct lockf_range *brange;
302 struct lockf_range *next;
303 struct lockf_range *first_match;
304 struct lockf_range *last_match;
305 struct lockf_range *insert_point;
306 struct lockf_range *new_range1;
307 struct lockf_range *new_range2;
308 int wakeup_needed;
309 int double_clip;
310 int unlock_override;
311 int error = 0;
312 int count;
313 struct lockf_range_list deadlist;
315 new_range1 = NULL;
316 new_range2 = NULL;
317 count = 0;
319 restart:
321 * Preallocate two ranges so we don't have to worry about blocking
322 * in the middle of the lock code.
324 if (new_range1 == NULL)
325 new_range1 = lf_alloc_range();
326 if (new_range2 == NULL)
327 new_range2 = lf_alloc_range();
328 first_match = NULL;
329 last_match = NULL;
330 insert_point = NULL;
331 wakeup_needed = 0;
333 lf_print_lock(lock);
336 * Locate the insertion point for the new lock (the first range
337 * with an lf_start >= start).
339 * Locate the first and latch ranges owned by us that overlap
340 * the requested range.
342 TAILQ_FOREACH(range, &lock->lf_range, lf_link) {
343 if (insert_point == NULL && range->lf_start >= start)
344 insert_point = range;
347 * Skip non-overlapping locks. Locks are sorted by lf_start
348 * So we can terminate the search when lf_start exceeds the
349 * requested range (insert_point is still guarenteed to be
350 * set properly).
352 if (range->lf_end < start)
353 continue;
354 if (range->lf_start > end) {
355 range = NULL;
356 break;
360 * Overlapping lock. Set first_match and last_match if we
361 * are the owner.
363 if (range->lf_owner == owner) {
364 if (first_match == NULL)
365 first_match = range;
366 last_match = range;
367 continue;
371 * If we aren't the owner check for a conflicting lock. Only
372 * if not unlocking.
374 if (type != F_UNLCK) {
375 if (type == F_WRLCK || range->lf_type == F_WRLCK)
376 break;
381 * If a conflicting lock was observed, block or fail as appropriate.
382 * (this code is skipped when unlocking)
384 if (range != NULL) {
385 if ((flags & F_WAIT) == 0) {
386 error = EAGAIN;
387 goto do_cleanup;
391 * We are blocked. For POSIX locks we have to check
392 * for deadlocks and return with EDEADLK. This is done
393 * by checking whether range->lf_owner is already
394 * blocked.
396 * Since flock-style locks cover the whole file, a
397 * deadlock between those is nearly impossible.
398 * This can only occur if a process tries to lock the
399 * same inode exclusively while holding a shared lock
400 * with another descriptor.
401 * XXX How can we cleanly detect this?
402 * XXX The current mixing of flock & fcntl/lockf is evil.
404 * Handle existing locks of flock-style like POSIX locks.
406 if (flags & F_POSIX) {
407 TAILQ_FOREACH(brange, &lock->lf_blocked, lf_link) {
408 if (brange->lf_owner == range->lf_owner) {
409 error = EDEADLK;
410 goto do_cleanup;
416 * For flock-style locks, we must first remove
417 * any shared locks that we hold before we sleep
418 * waiting for an exclusive lock.
420 if ((flags & F_POSIX) == 0 && type == F_WRLCK)
421 lf_setlock(lock, owner, F_UNLCK, 0, start, end);
423 brange = new_range1;
424 new_range1 = NULL;
425 lf_create_range(brange, owner, type, 0, start, end);
426 TAILQ_INSERT_TAIL(&lock->lf_blocked, brange, lf_link);
427 error = tsleep(brange, PCATCH, "lockf", 0);
430 * We may have been awaked by a signal and/or by a
431 * debugger continuing us (in which case we must remove
432 * ourselves from the blocked list) and/or by another
433 * process releasing/downgrading a lock (in which case
434 * we have already been removed from the blocked list
435 * and our lf_flags field is 1).
437 * Sleep if it looks like we might be livelocking.
439 if (brange->lf_flags == 0)
440 TAILQ_REMOVE(&lock->lf_blocked, brange, lf_link);
441 if (count == 2)
442 tsleep(brange, 0, "lockfz", 2);
443 else
444 ++count;
445 lf_destroy_range(brange);
447 if (error)
448 goto do_cleanup;
449 goto restart;
453 * If there are no overlapping locks owned by us then creating
454 * the new lock is easy. This is the most common case.
456 if (first_match == NULL) {
457 if (type == F_UNLCK)
458 goto do_wakeup;
459 if (flags & F_POSIX) {
460 if (lf_count_change(owner, 1)) {
461 error = ENOLCK;
462 goto do_cleanup;
465 range = new_range1;
466 new_range1 = NULL;
467 lf_create_range(range, owner, type, flags, start, end);
468 lf_insert(&lock->lf_range, range, insert_point);
469 goto do_wakeup;
473 * double_clip - Calculate a special case where TWO locks may have
474 * to be added due to the new lock breaking up an
475 * existing incompatible lock in the middle.
477 * unlock_override - Calculate a special case where NO locks
478 * need to be created. This occurs when an unlock
479 * does not clip any locks at the front and rear.
481 * WARNING! closef() and fdrop() assume that an F_UNLCK of the
482 * entire range will always succeed so the unlock_override
483 * case is mandatory.
485 double_clip = 0;
486 unlock_override = 0;
487 if (first_match->lf_start < start) {
488 if (first_match == last_match && last_match->lf_end > end)
489 double_clip = 1;
490 } else if (type == F_UNLCK && last_match->lf_end <= end) {
491 unlock_override = 1;
495 * Figure out the worst case net increase in POSIX locks and account
496 * for it now before we start modifying things. If neither the
497 * first or last locks match we have an issue. If there is only
498 * one overlapping range which needs to be clipped on both ends
499 * we wind up having to create up to two new locks, else only one.
501 * When unlocking the worst case is always 1 new lock if our
502 * unlock request cuts the middle out of an existing lock range.
504 * count represents the 'cleanup' adjustment needed. It starts
505 * negative, is incremented whenever we create a new POSIX lock,
506 * and decremented whenever we delete an existing one. At the
507 * end of the day it had better be <= 0 or we didn't calculate the
508 * worse case properly here.
510 count = 0;
511 if ((flags & F_POSIX) && !unlock_override) {
512 if (!lf_match(first_match, type, flags) &&
513 !lf_match(last_match, type, flags)
515 if (double_clip && type != F_UNLCK)
516 count = -2;
517 else
518 count = -1;
520 if (count && lf_count_change(owner, -count)) {
521 error = ENOLCK;
522 goto do_cleanup;
525 /* else flock style lock which encompasses entire range */
528 * Create and insert the lock represented the requested range.
529 * Adjust the net POSIX lock count. We have to move our insertion
530 * point since brange now represents the first record >= start.
532 * When unlocking, no new lock is inserted but we still clip.
534 if (type != F_UNLCK) {
535 brange = new_range1;
536 new_range1 = NULL;
537 lf_create_range(brange, owner, type, flags, start, end);
538 lf_insert(&lock->lf_range, brange, insert_point);
539 insert_point = brange;
540 if (flags & F_POSIX)
541 ++count;
542 } else {
543 brange = NULL;
547 * Handle the double_clip case. This is the only case where
548 * we wind up having to add TWO locks.
550 if (double_clip) {
551 KKASSERT(first_match == last_match);
552 last_match = new_range2;
553 new_range2 = NULL;
554 lf_create_range(last_match, first_match->lf_owner,
555 first_match->lf_type, first_match->lf_flags,
556 end + 1, first_match->lf_end);
557 first_match->lf_end = start - 1;
558 first_match->lf_flags &= ~F_NOEND;
561 * Figure out where to insert the right side clip.
563 lf_insert(&lock->lf_range, last_match, first_match);
564 if (last_match->lf_flags & F_POSIX)
565 ++count;
569 * Clip or destroy the locks between first_match and last_match,
570 * inclusive. Ignore the primary lock we created (brange). Note
571 * that if double-clipped, first_match and last_match will be
572 * outside our clipping range. Otherwise first_match and last_match
573 * will be deleted.
575 * We have already taken care of any double clipping.
577 * The insert_point may become invalid as we delete records, do not
578 * use that pointer any more. Also, when removing something other
579 * then 'range' we have to check to see if the item we are removing
580 * is 'next' and adjust 'next' properly.
582 * NOTE: brange will be NULL if F_UNLCKing.
584 TAILQ_INIT(&deadlist);
585 next = first_match;
587 while ((range = next) != NULL) {
588 next = TAILQ_NEXT(range, lf_link);
591 * Ignore elements that we do not own and ignore the
592 * primary request range which we just created.
594 if (range->lf_owner != owner || range == brange)
595 continue;
598 * We may have to wakeup a waiter when downgrading a lock.
600 if (type == F_UNLCK)
601 wakeup_needed = 1;
602 if (type == F_RDLCK && range->lf_type == F_WRLCK)
603 wakeup_needed = 1;
606 * Clip left. This can only occur on first_match.
608 * Merge the left clip with brange if possible. This must
609 * be done specifically, not in the optimized merge heuristic
610 * below, since we may have counted on it in our 'count'
611 * calculation above.
613 if (range->lf_start < start) {
614 KKASSERT(range == first_match);
615 if (brange &&
616 range->lf_end >= start - 1 &&
617 lf_match(range, type, flags)) {
618 range->lf_end = brange->lf_end;
619 range->lf_flags |= brange->lf_flags & F_NOEND;
621 * Removing something other then 'range',
622 * adjust 'next' if necessary.
624 if (next == brange)
625 next = TAILQ_NEXT(next, lf_link);
626 TAILQ_REMOVE(&lock->lf_range, brange, lf_link);
627 if (brange->lf_flags & F_POSIX)
628 --count;
629 TAILQ_INSERT_TAIL(&deadlist, brange, lf_link);
630 brange = range;
631 } else if (range->lf_end >= start) {
632 range->lf_end = start - 1;
633 if (type != F_UNLCK)
634 range->lf_flags &= ~F_NOEND;
636 if (range == last_match)
637 break;
638 continue;
642 * Clip right. This can only occur on last_match.
644 * Merge the right clip if possible. This must be done
645 * specifically, not in the optimized merge heuristic
646 * below, since we may have counted on it in our 'count'
647 * calculation.
649 * Since we are adjusting lf_start, we have to move the
650 * record to maintain the sorted list. Since lf_start is
651 * only getting larger we can use the next element as the
652 * insert point (we don't have to backtrack).
654 if (range->lf_end > end) {
655 KKASSERT(range == last_match);
656 if (brange &&
657 range->lf_start <= end + 1 &&
658 lf_match(range, type, flags)) {
659 brange->lf_end = range->lf_end;
660 brange->lf_flags |= range->lf_flags & F_NOEND;
661 TAILQ_REMOVE(&lock->lf_range, range, lf_link);
662 if (range->lf_flags & F_POSIX)
663 --count;
664 TAILQ_INSERT_TAIL(&deadlist, range, lf_link);
665 } else if (range->lf_start <= end) {
666 range->lf_start = end + 1;
667 TAILQ_REMOVE(&lock->lf_range, range, lf_link);
668 lf_insert(&lock->lf_range, range, next);
670 /* range == last_match, we are done */
671 break;
675 * The record must be entirely enclosed. Note that the
676 * record could be first_match or last_match, and will be
677 * deleted.
679 KKASSERT(range->lf_start >= start && range->lf_end <= end);
680 TAILQ_REMOVE(&lock->lf_range, range, lf_link);
681 if (range->lf_flags & F_POSIX)
682 --count;
683 TAILQ_INSERT_TAIL(&deadlist, range, lf_link);
684 if (range == last_match)
685 break;
689 * Attempt to merge locks adjacent to brange. For example, we may
690 * have had to clip first_match and/or last_match, and they might
691 * be adjacent. Or there might simply have been an adjacent lock
692 * already there.
694 * Don't get fancy, just check adjacent elements in the list if they
695 * happen to be owned by us.
697 * This case only gets hit if we have a situation where a shared
698 * and exclusive lock are adjacent, and the exclusive lock is
699 * downgraded to shared or the shared lock is upgraded to exclusive.
701 if (brange) {
702 range = TAILQ_PREV(brange, lockf_range_list, lf_link);
703 if (range &&
704 range->lf_owner == owner &&
705 range->lf_end == brange->lf_start - 1 &&
706 lf_match(range, type, flags)
709 * Extend range to cover brange and scrap brange.
711 range->lf_end = brange->lf_end;
712 range->lf_flags |= brange->lf_flags & F_NOEND;
713 TAILQ_REMOVE(&lock->lf_range, brange, lf_link);
714 if (brange->lf_flags & F_POSIX)
715 --count;
716 TAILQ_INSERT_TAIL(&deadlist, brange, lf_link);
717 brange = range;
719 range = TAILQ_NEXT(brange, lf_link);
720 if (range &&
721 range->lf_owner == owner &&
722 range->lf_start == brange->lf_end + 1 &&
723 lf_match(range, type, flags)
726 * Extend brange to cover range and scrap range.
728 brange->lf_end = range->lf_end;
729 brange->lf_flags |= range->lf_flags & F_NOEND;
730 TAILQ_REMOVE(&lock->lf_range, range, lf_link);
731 if (range->lf_flags & F_POSIX)
732 --count;
733 TAILQ_INSERT_TAIL(&deadlist, range, lf_link);
738 * Destroy deleted elements. We didn't want to do it in the loop
739 * because the free() might have blocked.
741 * Adjust the count for any posix locks we thought we might create
742 * but didn't.
744 while ((range = TAILQ_FIRST(&deadlist)) != NULL) {
745 TAILQ_REMOVE(&deadlist, range, lf_link);
746 lf_destroy_range(range);
749 KKASSERT(count <= 0);
750 if (count < 0)
751 lf_count_change(owner, count);
752 do_wakeup:
753 lf_print_lock(lock);
754 if (wakeup_needed)
755 lf_wakeup(lock, start, end);
756 error = 0;
757 do_cleanup:
758 if (new_range1 != NULL)
759 lf_destroy_range(new_range1);
760 if (new_range2 != NULL)
761 lf_destroy_range(new_range2);
762 return(error);
766 * Check whether there is a blocking lock,
767 * and if so return its process identifier.
769 static int
770 lf_getlock(struct flock *fl, struct lockf *lock, struct proc *owner,
771 int type, int flags, off_t start, off_t end)
773 struct lockf_range *range;
775 TAILQ_FOREACH(range, &lock->lf_range, lf_link)
776 if (range->lf_owner != owner &&
777 lf_overlap(range, start, end) &&
778 (type == F_WRLCK || range->lf_type == F_WRLCK))
779 break;
780 if (range == NULL) {
781 fl->l_type = F_UNLCK;
782 return(0);
784 fl->l_type = range->lf_type;
785 fl->l_whence = SEEK_SET;
786 fl->l_start = range->lf_start;
787 if (range->lf_flags & F_NOEND)
788 fl->l_len = 0;
789 else
790 fl->l_len = range->lf_end - range->lf_start + 1;
791 if (range->lf_owner != NULL && (range->lf_flags & F_POSIX))
792 fl->l_pid = range->lf_owner->p_pid;
793 else
794 fl->l_pid = -1;
795 return(0);
799 * Wakeup pending lock attempts. Theoretically we can stop as soon as
800 * we encounter an exclusive request that covers the whole range (at least
801 * insofar as the sleep code above calls lf_wakeup() if it would otherwise
802 * exit instead of loop), but for now just wakeup all overlapping
803 * requests. XXX
805 static void
806 lf_wakeup(struct lockf *lock, off_t start, off_t end)
808 struct lockf_range *range, *nrange;
810 TAILQ_FOREACH_MUTABLE(range, &lock->lf_blocked, lf_link, nrange) {
811 if (lf_overlap(range, start, end) == 0)
812 continue;
813 TAILQ_REMOVE(&lock->lf_blocked, range, lf_link);
814 range->lf_flags = 1;
815 wakeup(range);
820 * Allocate a range structure and initialize it sufficiently such that
821 * lf_destroy_range() does not barf.
823 * Most use cases are temporary, implement a small 2-entry-per-cpu
824 * cache.
826 static struct lockf_range *
827 lf_alloc_range(void)
829 struct lockf_range *range;
830 struct lf_pcpu *lfpc;
832 lfpc = &lf_pcpu_array[mycpuid];
833 if ((range = lfpc->free1) != NULL) {
834 lfpc->free1 = NULL;
835 return range;
837 if ((range = lfpc->free2) != NULL) {
838 lfpc->free2 = NULL;
839 return range;
841 range = kmalloc(sizeof(struct lockf_range), M_LOCKF, M_WAITOK);
842 range->lf_owner = NULL;
844 return(range);
847 static void
848 lf_insert(struct lockf_range_list *list, struct lockf_range *elm,
849 struct lockf_range *insert_point)
851 while (insert_point && insert_point->lf_start < elm->lf_start)
852 insert_point = TAILQ_NEXT(insert_point, lf_link);
853 if (insert_point != NULL)
854 TAILQ_INSERT_BEFORE(insert_point, elm, lf_link);
855 else
856 TAILQ_INSERT_TAIL(list, elm, lf_link);
859 static void
860 lf_create_range(struct lockf_range *range, struct proc *owner, int type,
861 int flags, off_t start, off_t end)
863 KKASSERT(start <= end);
864 range->lf_type = type;
865 range->lf_flags = flags;
866 range->lf_start = start;
867 range->lf_end = end;
868 range->lf_owner = owner;
870 lf_printf("lf_create_range: %ju..%ju\n",
871 (uintmax_t)range->lf_start, (uintmax_t)range->lf_end);
874 static void
875 lf_destroy_range(struct lockf_range *range)
877 struct lf_pcpu *lfpc;
879 lf_printf("lf_destroy_range: %ju..%ju\n",
880 (uintmax_t)range->lf_start, (uintmax_t)range->lf_end);
882 lfpc = &lf_pcpu_array[mycpuid];
883 if (lfpc->free1 == NULL) {
884 range->lf_owner = NULL;
885 lfpc->free1 = range;
886 return;
888 if (lfpc->free2 == NULL) {
889 range->lf_owner = NULL;
890 lfpc->free2 = range;
891 return;
893 kfree(range, M_LOCKF);
896 #ifdef LOCKF_DEBUG
898 static void
899 _lf_printf(const char *ctl, ...)
901 struct proc *p;
902 __va_list va;
904 if (lf_print_ranges) {
905 if ((p = curproc) != NULL)
906 kprintf("pid %d (%s): ", p->p_pid, p->p_comm);
908 __va_start(va, ctl);
909 kvprintf(ctl, va);
910 __va_end(va);
913 static void
914 _lf_print_lock(const struct lockf *lock)
916 struct lockf_range *range;
918 if (lf_print_ranges == 0)
919 return;
921 if (TAILQ_EMPTY(&lock->lf_range)) {
922 lf_printf("lockf %p: no ranges locked\n", lock);
923 } else {
924 lf_printf("lockf %p:\n", lock);
926 TAILQ_FOREACH(range, &lock->lf_range, lf_link)
927 kprintf("\t%jd..%jd type %s owned by %d\n",
928 (uintmax_t)range->lf_start, (uintmax_t)range->lf_end,
929 range->lf_type == F_RDLCK ? "shared" : "exclusive",
930 range->lf_flags & F_POSIX ? range->lf_owner->p_pid : -1);
931 if (TAILQ_EMPTY(&lock->lf_blocked))
932 kprintf("no process waiting for range\n");
933 else
934 kprintf("blocked locks:");
935 TAILQ_FOREACH(range, &lock->lf_blocked, lf_link)
936 kprintf("\t%jd..%jd type %s waiting on %p\n",
937 (uintmax_t)range->lf_start, (uintmax_t)range->lf_end,
938 range->lf_type == F_RDLCK ? "shared" : "exclusive",
939 range);
941 #endif /* LOCKF_DEBUG */
943 static void
944 lf_init(void *dummy __unused)
946 lf_pcpu_array = kmalloc(sizeof(*lf_pcpu_array) * ncpus,
947 M_LOCKF, M_WAITOK | M_ZERO);
950 SYSINIT(lockf, SI_BOOT2_MACHDEP, SI_ORDER_ANY, lf_init, NULL);