2 * linux/fs/lockd/svclock.c
4 * Handling of server-side locks, mostly of the blocked variety.
5 * This is the ugliest part of lockd because we tread on very thin ice.
6 * GRANT and CANCEL calls may get stuck, meet in mid-flight, etc.
7 * IMNSHO introducing the grant callback into the NLM protocol was one
8 * of the worst ideas Sun ever had. Except maybe for the idea of doing
9 * NFS file locking at all.
11 * I'm trying hard to avoid race conditions by protecting most accesses
12 * to a file's list of blocked locks through a semaphore. The global
13 * list of blocked locks is not protected in this fashion however.
14 * Therefore, some functions (such as the RPC callback for the async grant
15 * call) move blocked locks towards the head of the list *while some other
16 * process might be traversing it*. This should not be a problem in
17 * practice, because this will only cause functions traversing the list
18 * to visit some blocks twice.
20 * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
23 #include <linux/types.h>
24 #include <linux/errno.h>
25 #include <linux/kernel.h>
26 #include <linux/sched.h>
27 #include <linux/smp_lock.h>
28 #include <linux/sunrpc/clnt.h>
29 #include <linux/sunrpc/svc.h>
30 #include <linux/lockd/nlm.h>
31 #include <linux/lockd/lockd.h>
33 #define NLMDBG_FACILITY NLMDBG_SVCLOCK
35 #ifdef CONFIG_LOCKD_V4
36 #define nlm_deadlock nlm4_deadlock
38 #define nlm_deadlock nlm_lck_denied
41 static void nlmsvc_release_block(struct nlm_block
*block
);
42 static void nlmsvc_insert_block(struct nlm_block
*block
, unsigned long);
43 static int nlmsvc_remove_block(struct nlm_block
*block
);
45 static int nlmsvc_setgrantargs(struct nlm_rqst
*call
, struct nlm_lock
*lock
);
46 static void nlmsvc_freegrantargs(struct nlm_rqst
*call
);
47 static const struct rpc_call_ops nlmsvc_grant_ops
;
50 * The list of blocked locks to retry
52 static struct nlm_block
* nlm_blocked
;
55 * Insert a blocked lock into the global list
58 nlmsvc_insert_block(struct nlm_block
*block
, unsigned long when
)
60 struct nlm_block
**bp
, *b
;
62 dprintk("lockd: nlmsvc_insert_block(%p, %ld)\n", block
, when
);
63 kref_get(&block
->b_count
);
65 nlmsvc_remove_block(block
);
67 if (when
!= NLM_NEVER
) {
68 if ((when
+= jiffies
) == NLM_NEVER
)
70 while ((b
= *bp
) && time_before_eq(b
->b_when
,when
) && b
->b_when
!= NLM_NEVER
)
73 while ((b
= *bp
) != 0)
83 * Remove a block from the global list
86 nlmsvc_remove_block(struct nlm_block
*block
)
88 struct nlm_block
**bp
, *b
;
92 for (bp
= &nlm_blocked
; (b
= *bp
) != 0; bp
= &b
->b_next
) {
96 nlmsvc_release_block(block
);
105 * Find a block for a given lock
107 static struct nlm_block
*
108 nlmsvc_lookup_block(struct nlm_file
*file
, struct nlm_lock
*lock
)
110 struct nlm_block
**head
, *block
;
111 struct file_lock
*fl
;
113 dprintk("lockd: nlmsvc_lookup_block f=%p pd=%d %Ld-%Ld ty=%d\n",
114 file
, lock
->fl
.fl_pid
,
115 (long long)lock
->fl
.fl_start
,
116 (long long)lock
->fl
.fl_end
, lock
->fl
.fl_type
);
117 for (head
= &nlm_blocked
; (block
= *head
) != 0; head
= &block
->b_next
) {
118 fl
= &block
->b_call
->a_args
.lock
.fl
;
119 dprintk("lockd: check f=%p pd=%d %Ld-%Ld ty=%d cookie=%s\n",
120 block
->b_file
, fl
->fl_pid
,
121 (long long)fl
->fl_start
,
122 (long long)fl
->fl_end
, fl
->fl_type
,
123 nlmdbg_cookie2a(&block
->b_call
->a_args
.cookie
));
124 if (block
->b_file
== file
&& nlm_compare_locks(fl
, &lock
->fl
)) {
125 kref_get(&block
->b_count
);
133 static inline int nlm_cookie_match(struct nlm_cookie
*a
, struct nlm_cookie
*b
)
137 if(memcmp(a
->data
,b
->data
,a
->len
))
143 * Find a block with a given NLM cookie.
145 static inline struct nlm_block
*
146 nlmsvc_find_block(struct nlm_cookie
*cookie
, struct sockaddr_in
*sin
)
148 struct nlm_block
*block
;
150 for (block
= nlm_blocked
; block
; block
= block
->b_next
) {
151 dprintk("cookie: head of blocked queue %p, block %p\n",
153 if (nlm_cookie_match(&block
->b_call
->a_args
.cookie
,cookie
)
154 && nlm_cmp_addr(sin
, &block
->b_host
->h_addr
))
159 kref_get(&block
->b_count
);
164 * Create a block and initialize it.
166 * Note: we explicitly set the cookie of the grant reply to that of
167 * the blocked lock request. The spec explicitly mentions that the client
168 * should _not_ rely on the callback containing the same cookie as the
169 * request, but (as I found out later) that's because some implementations
170 * do just this. Never mind the standards comittees, they support our
171 * logging industries.
173 static inline struct nlm_block
*
174 nlmsvc_create_block(struct svc_rqst
*rqstp
, struct nlm_file
*file
,
175 struct nlm_lock
*lock
, struct nlm_cookie
*cookie
)
177 struct nlm_block
*block
;
178 struct nlm_host
*host
;
179 struct nlm_rqst
*call
= NULL
;
181 /* Create host handle for callback */
182 host
= nlmsvc_lookup_host(rqstp
);
186 call
= nlm_alloc_call(host
);
190 /* Allocate memory for block, and initialize arguments */
191 block
= kzalloc(sizeof(*block
), GFP_KERNEL
);
194 kref_init(&block
->b_count
);
196 if (!nlmsvc_setgrantargs(call
, lock
))
199 /* Set notifier function for VFS, and init args */
200 call
->a_args
.lock
.fl
.fl_flags
|= FL_SLEEP
;
201 call
->a_args
.lock
.fl
.fl_lmops
= &nlmsvc_lock_operations
;
202 call
->a_args
.cookie
= *cookie
; /* see above */
204 dprintk("lockd: created block %p...\n", block
);
206 /* Create and initialize the block */
207 block
->b_daemon
= rqstp
->rq_server
;
208 block
->b_host
= host
;
209 block
->b_file
= file
;
212 /* Add to file's list of blocks */
213 block
->b_fnext
= file
->f_blocks
;
214 file
->f_blocks
= block
;
216 /* Set up RPC arguments for callback */
217 block
->b_call
= call
;
218 call
->a_flags
= RPC_TASK_ASYNC
;
219 call
->a_block
= block
;
226 nlm_release_call(call
);
231 * Delete a block. If the lock was cancelled or the grant callback
232 * failed, unlock is set to 1.
233 * It is the caller's responsibility to check whether the file
234 * can be closed hereafter.
236 static int nlmsvc_unlink_block(struct nlm_block
*block
)
239 dprintk("lockd: unlinking block %p...\n", block
);
241 /* Remove block from list */
242 status
= posix_unblock_lock(block
->b_file
->f_file
, &block
->b_call
->a_args
.lock
.fl
);
243 nlmsvc_remove_block(block
);
247 static void nlmsvc_free_block(struct kref
*kref
)
249 struct nlm_block
*block
= container_of(kref
, struct nlm_block
, b_count
);
250 struct nlm_file
*file
= block
->b_file
;
251 struct nlm_block
**bp
;
253 dprintk("lockd: freeing block %p...\n", block
);
256 /* Remove block from file's list of blocks */
257 for (bp
= &file
->f_blocks
; *bp
; bp
= &(*bp
)->b_fnext
) {
259 *bp
= block
->b_fnext
;
265 nlmsvc_freegrantargs(block
->b_call
);
266 nlm_release_call(block
->b_call
);
267 nlm_release_file(block
->b_file
);
271 static void nlmsvc_release_block(struct nlm_block
*block
)
274 kref_put(&block
->b_count
, nlmsvc_free_block
);
277 static void nlmsvc_act_mark(struct nlm_host
*host
, struct nlm_file
*file
)
279 struct nlm_block
*block
;
282 for (block
= file
->f_blocks
; block
!= NULL
; block
= block
->b_fnext
)
283 block
->b_host
->h_inuse
= 1;
287 static void nlmsvc_act_unlock(struct nlm_host
*host
, struct nlm_file
*file
)
289 struct nlm_block
*block
;
293 for (block
= file
->f_blocks
; block
!= NULL
; block
= block
->b_fnext
) {
294 if (host
!= NULL
&& host
!= block
->b_host
)
296 if (!block
->b_queued
)
298 kref_get(&block
->b_count
);
300 nlmsvc_unlink_block(block
);
301 nlmsvc_release_block(block
);
308 * Loop over all blocks and perform the action specified.
309 * (NLM_ACT_CHECK handled by nlmsvc_inspect_file).
312 nlmsvc_traverse_blocks(struct nlm_host
*host
, struct nlm_file
*file
, int action
)
314 if (action
== NLM_ACT_MARK
)
315 nlmsvc_act_mark(host
, file
);
317 nlmsvc_act_unlock(host
, file
);
321 * Initialize arguments for GRANTED call. The nlm_rqst structure
322 * has been cleared already.
324 static int nlmsvc_setgrantargs(struct nlm_rqst
*call
, struct nlm_lock
*lock
)
326 locks_copy_lock(&call
->a_args
.lock
.fl
, &lock
->fl
);
327 memcpy(&call
->a_args
.lock
.fh
, &lock
->fh
, sizeof(call
->a_args
.lock
.fh
));
328 call
->a_args
.lock
.caller
= system_utsname
.nodename
;
329 call
->a_args
.lock
.oh
.len
= lock
->oh
.len
;
331 /* set default data area */
332 call
->a_args
.lock
.oh
.data
= call
->a_owner
;
333 call
->a_args
.lock
.svid
= lock
->fl
.fl_pid
;
335 if (lock
->oh
.len
> NLMCLNT_OHSIZE
) {
336 void *data
= kmalloc(lock
->oh
.len
, GFP_KERNEL
);
339 call
->a_args
.lock
.oh
.data
= (u8
*) data
;
342 memcpy(call
->a_args
.lock
.oh
.data
, lock
->oh
.data
, lock
->oh
.len
);
346 static void nlmsvc_freegrantargs(struct nlm_rqst
*call
)
348 if (call
->a_args
.lock
.oh
.data
!= call
->a_owner
)
349 kfree(call
->a_args
.lock
.oh
.data
);
353 * Attempt to establish a lock, and if it can't be granted, block it
357 nlmsvc_lock(struct svc_rqst
*rqstp
, struct nlm_file
*file
,
358 struct nlm_lock
*lock
, int wait
, struct nlm_cookie
*cookie
)
360 struct nlm_block
*block
, *newblock
= NULL
;
364 dprintk("lockd: nlmsvc_lock(%s/%ld, ty=%d, pi=%d, %Ld-%Ld, bl=%d)\n",
365 file
->f_file
->f_dentry
->d_inode
->i_sb
->s_id
,
366 file
->f_file
->f_dentry
->d_inode
->i_ino
,
367 lock
->fl
.fl_type
, lock
->fl
.fl_pid
,
368 (long long)lock
->fl
.fl_start
,
369 (long long)lock
->fl
.fl_end
,
373 lock
->fl
.fl_flags
&= ~FL_SLEEP
;
375 /* Lock file against concurrent access */
377 /* Get existing block (in case client is busy-waiting) */
378 block
= nlmsvc_lookup_block(file
, lock
);
380 if (newblock
!= NULL
)
381 lock
= &newblock
->b_call
->a_args
.lock
;
383 lock
= &block
->b_call
->a_args
.lock
;
385 error
= posix_lock_file(file
->f_file
, &lock
->fl
);
386 lock
->fl
.fl_flags
&= ~FL_SLEEP
;
388 dprintk("lockd: posix_lock_file returned %d\n", error
);
399 default: /* includes ENOLCK */
400 ret
= nlm_lck_denied_nolocks
;
404 ret
= nlm_lck_denied
;
408 ret
= nlm_lck_blocked
;
412 /* If we don't have a block, create and initialize it. Then
413 * retry because we may have slept in kmalloc. */
414 /* We have to release f_sema as nlmsvc_create_block may try to
415 * to claim it while doing host garbage collection */
416 if (newblock
== NULL
) {
418 dprintk("lockd: blocking on this lock (allocating).\n");
419 if (!(newblock
= nlmsvc_create_block(rqstp
, file
, lock
, cookie
)))
420 return nlm_lck_denied_nolocks
;
424 /* Append to list of blocked */
425 nlmsvc_insert_block(newblock
, NLM_NEVER
);
428 nlmsvc_release_block(newblock
);
429 nlmsvc_release_block(block
);
430 dprintk("lockd: nlmsvc_lock returned %u\n", ret
);
435 * Test for presence of a conflicting lock.
438 nlmsvc_testlock(struct nlm_file
*file
, struct nlm_lock
*lock
,
439 struct nlm_lock
*conflock
)
441 dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n",
442 file
->f_file
->f_dentry
->d_inode
->i_sb
->s_id
,
443 file
->f_file
->f_dentry
->d_inode
->i_ino
,
445 (long long)lock
->fl
.fl_start
,
446 (long long)lock
->fl
.fl_end
);
448 if (posix_test_lock(file
->f_file
, &lock
->fl
, &conflock
->fl
)) {
449 dprintk("lockd: conflicting lock(ty=%d, %Ld-%Ld)\n",
450 conflock
->fl
.fl_type
,
451 (long long)conflock
->fl
.fl_start
,
452 (long long)conflock
->fl
.fl_end
);
453 conflock
->caller
= "somehost"; /* FIXME */
454 conflock
->oh
.len
= 0; /* don't return OH info */
455 conflock
->svid
= conflock
->fl
.fl_pid
;
456 return nlm_lck_denied
;
464 * This implies a CANCEL call: We send a GRANT_MSG, the client replies
465 * with a GRANT_RES call which gets lost, and calls UNLOCK immediately
466 * afterwards. In this case the block will still be there, and hence
470 nlmsvc_unlock(struct nlm_file
*file
, struct nlm_lock
*lock
)
474 dprintk("lockd: nlmsvc_unlock(%s/%ld, pi=%d, %Ld-%Ld)\n",
475 file
->f_file
->f_dentry
->d_inode
->i_sb
->s_id
,
476 file
->f_file
->f_dentry
->d_inode
->i_ino
,
478 (long long)lock
->fl
.fl_start
,
479 (long long)lock
->fl
.fl_end
);
481 /* First, cancel any lock that might be there */
482 nlmsvc_cancel_blocked(file
, lock
);
484 lock
->fl
.fl_type
= F_UNLCK
;
485 error
= posix_lock_file(file
->f_file
, &lock
->fl
);
487 return (error
< 0)? nlm_lck_denied_nolocks
: nlm_granted
;
491 * Cancel a previously blocked request.
493 * A cancel request always overrides any grant that may currently
495 * The calling procedure must check whether the file can be closed.
498 nlmsvc_cancel_blocked(struct nlm_file
*file
, struct nlm_lock
*lock
)
500 struct nlm_block
*block
;
503 dprintk("lockd: nlmsvc_cancel(%s/%ld, pi=%d, %Ld-%Ld)\n",
504 file
->f_file
->f_dentry
->d_inode
->i_sb
->s_id
,
505 file
->f_file
->f_dentry
->d_inode
->i_ino
,
507 (long long)lock
->fl
.fl_start
,
508 (long long)lock
->fl
.fl_end
);
511 block
= nlmsvc_lookup_block(file
, lock
);
514 status
= nlmsvc_unlink_block(block
);
515 nlmsvc_release_block(block
);
517 return status
? nlm_lck_denied
: nlm_granted
;
521 * Unblock a blocked lock request. This is a callback invoked from the
522 * VFS layer when a lock on which we blocked is removed.
524 * This function doesn't grant the blocked lock instantly, but rather moves
525 * the block to the head of nlm_blocked where it can be picked up by lockd.
528 nlmsvc_notify_blocked(struct file_lock
*fl
)
530 struct nlm_block
**bp
, *block
;
532 dprintk("lockd: VFS unblock notification for block %p\n", fl
);
533 for (bp
= &nlm_blocked
; (block
= *bp
) != 0; bp
= &block
->b_next
) {
534 if (nlm_compare_locks(&block
->b_call
->a_args
.lock
.fl
, fl
)) {
535 nlmsvc_insert_block(block
, 0);
536 svc_wake_up(block
->b_daemon
);
541 printk(KERN_WARNING
"lockd: notification for unknown block!\n");
544 static int nlmsvc_same_owner(struct file_lock
*fl1
, struct file_lock
*fl2
)
546 return fl1
->fl_owner
== fl2
->fl_owner
&& fl1
->fl_pid
== fl2
->fl_pid
;
549 struct lock_manager_operations nlmsvc_lock_operations
= {
550 .fl_compare_owner
= nlmsvc_same_owner
,
551 .fl_notify
= nlmsvc_notify_blocked
,
555 * Try to claim a lock that was previously blocked.
557 * Note that we use both the RPC_GRANTED_MSG call _and_ an async
558 * RPC thread when notifying the client. This seems like overkill...
560 * - we don't want to use a synchronous RPC thread, otherwise
561 * we might find ourselves hanging on a dead portmapper.
562 * - Some lockd implementations (e.g. HP) don't react to
563 * RPC_GRANTED calls; they seem to insist on RPC_GRANTED_MSG calls.
566 nlmsvc_grant_blocked(struct nlm_block
*block
)
568 struct nlm_file
*file
= block
->b_file
;
569 struct nlm_lock
*lock
= &block
->b_call
->a_args
.lock
;
572 dprintk("lockd: grant blocked lock %p\n", block
);
574 /* Unlink block request from list */
575 nlmsvc_unlink_block(block
);
577 /* If b_granted is true this means we've been here before.
578 * Just retry the grant callback, possibly refreshing the RPC
580 if (block
->b_granted
) {
581 nlm_rebind_host(block
->b_host
);
585 /* Try the lock operation again */
586 lock
->fl
.fl_flags
|= FL_SLEEP
;
587 error
= posix_lock_file(file
->f_file
, &lock
->fl
);
588 lock
->fl
.fl_flags
&= ~FL_SLEEP
;
594 dprintk("lockd: lock still blocked\n");
595 nlmsvc_insert_block(block
, NLM_NEVER
);
598 printk(KERN_WARNING
"lockd: unexpected error %d in %s!\n",
599 -error
, __FUNCTION__
);
600 nlmsvc_insert_block(block
, 10 * HZ
);
605 /* Lock was granted by VFS. */
606 dprintk("lockd: GRANTing blocked lock.\n");
607 block
->b_granted
= 1;
609 /* Schedule next grant callback in 30 seconds */
610 nlmsvc_insert_block(block
, 30 * HZ
);
612 /* Call the client */
613 kref_get(&block
->b_count
);
614 if (nlm_async_call(block
->b_call
, NLMPROC_GRANTED_MSG
,
615 &nlmsvc_grant_ops
) < 0)
616 nlmsvc_release_block(block
);
620 * This is the callback from the RPC layer when the NLM_GRANTED_MSG
621 * RPC call has succeeded or timed out.
622 * Like all RPC callbacks, it is invoked by the rpciod process, so it
623 * better not sleep. Therefore, we put the blocked lock on the nlm_blocked
624 * chain once more in order to have it removed by lockd itself (which can
625 * then sleep on the file semaphore without disrupting e.g. the nfs client).
627 static void nlmsvc_grant_callback(struct rpc_task
*task
, void *data
)
629 struct nlm_rqst
*call
= data
;
630 struct nlm_block
*block
= call
->a_block
;
631 unsigned long timeout
;
633 dprintk("lockd: GRANT_MSG RPC callback\n");
635 /* Technically, we should down the file semaphore here. Since we
636 * move the block towards the head of the queue only, no harm
637 * can be done, though. */
638 if (task
->tk_status
< 0) {
639 /* RPC error: Re-insert for retransmission */
642 /* Call was successful, now wait for client callback */
645 nlmsvc_insert_block(block
, timeout
);
646 svc_wake_up(block
->b_daemon
);
649 static void nlmsvc_grant_release(void *data
)
651 struct nlm_rqst
*call
= data
;
653 nlmsvc_release_block(call
->a_block
);
656 static const struct rpc_call_ops nlmsvc_grant_ops
= {
657 .rpc_call_done
= nlmsvc_grant_callback
,
658 .rpc_release
= nlmsvc_grant_release
,
662 * We received a GRANT_RES callback. Try to find the corresponding
666 nlmsvc_grant_reply(struct svc_rqst
*rqstp
, struct nlm_cookie
*cookie
, u32 status
)
668 struct nlm_block
*block
;
669 struct nlm_file
*file
;
671 dprintk("grant_reply: looking for cookie %x, host (%08x), s=%d \n",
672 *(unsigned int *)(cookie
->data
),
673 ntohl(rqstp
->rq_addr
.sin_addr
.s_addr
), status
);
674 if (!(block
= nlmsvc_find_block(cookie
, &rqstp
->rq_addr
)))
676 file
= block
->b_file
;
679 if (status
== NLM_LCK_DENIED_GRACE_PERIOD
) {
680 /* Try again in a couple of seconds */
681 nlmsvc_insert_block(block
, 10 * HZ
);
683 /* Lock is now held by client, or has been rejected.
684 * In both cases, the block should be removed. */
685 nlmsvc_unlink_block(block
);
688 nlmsvc_release_block(block
);
692 * Retry all blocked locks that have been notified. This is where lockd
693 * picks up locks that can be granted, or grant notifications that must
697 nlmsvc_retry_blocked(void)
699 struct nlm_block
*block
;
701 dprintk("nlmsvc_retry_blocked(%p, when=%ld)\n",
703 nlm_blocked
? nlm_blocked
->b_when
: 0);
704 while ((block
= nlm_blocked
) != 0) {
705 if (block
->b_when
== NLM_NEVER
)
707 if (time_after(block
->b_when
,jiffies
))
709 dprintk("nlmsvc_retry_blocked(%p, when=%ld)\n",
710 block
, block
->b_when
);
711 kref_get(&block
->b_count
);
712 nlmsvc_grant_blocked(block
);
713 nlmsvc_release_block(block
);
716 if ((block
= nlm_blocked
) && block
->b_when
!= NLM_NEVER
)
717 return (block
->b_when
- jiffies
);
719 return MAX_SCHEDULE_TIMEOUT
;