2 * Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved.
4 * This copyrighted material is made available to anyone wishing to use,
5 * modify, copy, or redistribute it subject to the terms and conditions
6 * of the GNU General Public License version 2.
10 #include <linux/miscdevice.h>
11 #include <linux/poll.h>
12 #include <linux/dlm.h>
13 #include <linux/dlm_plock.h>
14 #include <linux/slab.h>
16 #include "dlm_internal.h"
17 #include "lockspace.h"
19 static spinlock_t ops_lock
;
20 static struct list_head send_list
;
21 static struct list_head recv_list
;
22 static wait_queue_head_t send_wq
;
23 static wait_queue_head_t recv_wq
;
26 struct list_head list
;
28 struct dlm_plock_info info
;
40 static inline void set_version(struct dlm_plock_info
*info
)
42 info
->version
[0] = DLM_PLOCK_VERSION_MAJOR
;
43 info
->version
[1] = DLM_PLOCK_VERSION_MINOR
;
44 info
->version
[2] = DLM_PLOCK_VERSION_PATCH
;
47 static int check_version(struct dlm_plock_info
*info
)
49 if ((DLM_PLOCK_VERSION_MAJOR
!= info
->version
[0]) ||
50 (DLM_PLOCK_VERSION_MINOR
< info
->version
[1])) {
51 log_print("plock device version mismatch: "
52 "kernel (%u.%u.%u), user (%u.%u.%u)",
53 DLM_PLOCK_VERSION_MAJOR
,
54 DLM_PLOCK_VERSION_MINOR
,
55 DLM_PLOCK_VERSION_PATCH
,
64 static void send_op(struct plock_op
*op
)
66 set_version(&op
->info
);
67 INIT_LIST_HEAD(&op
->list
);
69 list_add_tail(&op
->list
, &send_list
);
70 spin_unlock(&ops_lock
);
74 /* If a process was killed while waiting for the only plock on a file,
75 locks_remove_posix will not see any lock on the file so it won't
76 send an unlock-close to us to pass on to userspace to clean up the
77 abandoned waiter. So, we have to insert the unlock-close when the
78 lock call is interrupted. */
80 static void do_unlock_close(struct dlm_ls
*ls
, u64 number
,
81 struct file
*file
, struct file_lock
*fl
)
85 op
= kzalloc(sizeof(*op
), GFP_NOFS
);
89 op
->info
.optype
= DLM_PLOCK_OP_UNLOCK
;
90 op
->info
.pid
= fl
->fl_pid
;
91 op
->info
.fsid
= ls
->ls_global_id
;
92 op
->info
.number
= number
;
94 op
->info
.end
= OFFSET_MAX
;
95 if (fl
->fl_lmops
&& fl
->fl_lmops
->fl_grant
)
96 op
->info
.owner
= (__u64
) fl
->fl_pid
;
98 op
->info
.owner
= (__u64
)(long) fl
->fl_owner
;
100 op
->info
.flags
|= DLM_PLOCK_FL_CLOSE
;
104 int dlm_posix_lock(dlm_lockspace_t
*lockspace
, u64 number
, struct file
*file
,
105 int cmd
, struct file_lock
*fl
)
109 struct plock_xop
*xop
;
112 ls
= dlm_find_lockspace_local(lockspace
);
116 xop
= kzalloc(sizeof(*xop
), GFP_NOFS
);
123 op
->info
.optype
= DLM_PLOCK_OP_LOCK
;
124 op
->info
.pid
= fl
->fl_pid
;
125 op
->info
.ex
= (fl
->fl_type
== F_WRLCK
);
126 op
->info
.wait
= IS_SETLKW(cmd
);
127 op
->info
.fsid
= ls
->ls_global_id
;
128 op
->info
.number
= number
;
129 op
->info
.start
= fl
->fl_start
;
130 op
->info
.end
= fl
->fl_end
;
131 if (fl
->fl_lmops
&& fl
->fl_lmops
->fl_grant
) {
132 /* fl_owner is lockd which doesn't distinguish
133 processes on the nfs client */
134 op
->info
.owner
= (__u64
) fl
->fl_pid
;
135 xop
->callback
= fl
->fl_lmops
->fl_grant
;
136 locks_init_lock(&xop
->flc
);
137 locks_copy_lock(&xop
->flc
, fl
);
141 op
->info
.owner
= (__u64
)(long) fl
->fl_owner
;
142 xop
->callback
= NULL
;
147 if (xop
->callback
== NULL
) {
148 rv
= wait_event_killable(recv_wq
, (op
->done
!= 0));
149 if (rv
== -ERESTARTSYS
) {
150 log_debug(ls
, "dlm_posix_lock: wait killed %llx",
151 (unsigned long long)number
);
152 spin_lock(&ops_lock
);
154 spin_unlock(&ops_lock
);
156 do_unlock_close(ls
, number
, file
, fl
);
160 rv
= FILE_LOCK_DEFERRED
;
164 spin_lock(&ops_lock
);
165 if (!list_empty(&op
->list
)) {
166 log_error(ls
, "dlm_posix_lock: op on list %llx",
167 (unsigned long long)number
);
170 spin_unlock(&ops_lock
);
175 if (posix_lock_file_wait(file
, fl
) < 0)
176 log_error(ls
, "dlm_posix_lock: vfs lock error %llx",
177 (unsigned long long)number
);
182 dlm_put_lockspace(ls
);
185 EXPORT_SYMBOL_GPL(dlm_posix_lock
);
187 /* Returns failure iff a successful lock operation should be canceled */
188 static int dlm_plock_callback(struct plock_op
*op
)
191 struct file_lock
*fl
;
192 struct file_lock
*flc
;
193 int (*notify
)(void *, void *, int) = NULL
;
194 struct plock_xop
*xop
= (struct plock_xop
*)op
;
197 spin_lock(&ops_lock
);
198 if (!list_empty(&op
->list
)) {
199 log_print("dlm_plock_callback: op on list %llx",
200 (unsigned long long)op
->info
.number
);
203 spin_unlock(&ops_lock
);
205 /* check if the following 2 are still valid or make a copy */
209 notify
= xop
->callback
;
212 notify(fl
, NULL
, op
->info
.rv
);
216 /* got fs lock; bookkeep locally as well: */
217 flc
->fl_flags
&= ~FL_SLEEP
;
218 if (posix_lock_file(file
, flc
, NULL
)) {
220 * This can only happen in the case of kmalloc() failure.
221 * The filesystem's own lock is the authoritative lock,
222 * so a failure to get the lock locally is not a disaster.
223 * As long as the fs cannot reliably cancel locks (especially
224 * in a low-memory situation), we're better off ignoring
225 * this failure than trying to recover.
227 log_print("dlm_plock_callback: vfs lock error %llx file %p fl %p",
228 (unsigned long long)op
->info
.number
, file
, fl
);
231 rv
= notify(fl
, NULL
, 0);
233 /* XXX: We need to cancel the fs lock here: */
234 log_print("dlm_plock_callback: lock granted after lock request "
235 "failed; dangling lock!\n");
244 int dlm_posix_unlock(dlm_lockspace_t
*lockspace
, u64 number
, struct file
*file
,
245 struct file_lock
*fl
)
251 ls
= dlm_find_lockspace_local(lockspace
);
255 op
= kzalloc(sizeof(*op
), GFP_NOFS
);
261 if (posix_lock_file_wait(file
, fl
) < 0)
262 log_error(ls
, "dlm_posix_unlock: vfs unlock error %llx",
263 (unsigned long long)number
);
265 op
->info
.optype
= DLM_PLOCK_OP_UNLOCK
;
266 op
->info
.pid
= fl
->fl_pid
;
267 op
->info
.fsid
= ls
->ls_global_id
;
268 op
->info
.number
= number
;
269 op
->info
.start
= fl
->fl_start
;
270 op
->info
.end
= fl
->fl_end
;
271 if (fl
->fl_lmops
&& fl
->fl_lmops
->fl_grant
)
272 op
->info
.owner
= (__u64
) fl
->fl_pid
;
274 op
->info
.owner
= (__u64
)(long) fl
->fl_owner
;
276 if (fl
->fl_flags
& FL_CLOSE
) {
277 op
->info
.flags
|= DLM_PLOCK_FL_CLOSE
;
284 wait_event(recv_wq
, (op
->done
!= 0));
286 spin_lock(&ops_lock
);
287 if (!list_empty(&op
->list
)) {
288 log_error(ls
, "dlm_posix_unlock: op on list %llx",
289 (unsigned long long)number
);
292 spin_unlock(&ops_lock
);
301 dlm_put_lockspace(ls
);
304 EXPORT_SYMBOL_GPL(dlm_posix_unlock
);
306 int dlm_posix_get(dlm_lockspace_t
*lockspace
, u64 number
, struct file
*file
,
307 struct file_lock
*fl
)
313 ls
= dlm_find_lockspace_local(lockspace
);
317 op
= kzalloc(sizeof(*op
), GFP_NOFS
);
323 op
->info
.optype
= DLM_PLOCK_OP_GET
;
324 op
->info
.pid
= fl
->fl_pid
;
325 op
->info
.ex
= (fl
->fl_type
== F_WRLCK
);
326 op
->info
.fsid
= ls
->ls_global_id
;
327 op
->info
.number
= number
;
328 op
->info
.start
= fl
->fl_start
;
329 op
->info
.end
= fl
->fl_end
;
330 if (fl
->fl_lmops
&& fl
->fl_lmops
->fl_grant
)
331 op
->info
.owner
= (__u64
) fl
->fl_pid
;
333 op
->info
.owner
= (__u64
)(long) fl
->fl_owner
;
336 wait_event(recv_wq
, (op
->done
!= 0));
338 spin_lock(&ops_lock
);
339 if (!list_empty(&op
->list
)) {
340 log_error(ls
, "dlm_posix_get: op on list %llx",
341 (unsigned long long)number
);
344 spin_unlock(&ops_lock
);
346 /* info.rv from userspace is 1 for conflict, 0 for no-conflict,
347 -ENOENT if there are no locks on the file */
351 fl
->fl_type
= F_UNLCK
;
356 fl
->fl_type
= (op
->info
.ex
) ? F_WRLCK
: F_RDLCK
;
357 fl
->fl_flags
= FL_POSIX
;
358 fl
->fl_pid
= op
->info
.pid
;
359 fl
->fl_start
= op
->info
.start
;
360 fl
->fl_end
= op
->info
.end
;
366 dlm_put_lockspace(ls
);
369 EXPORT_SYMBOL_GPL(dlm_posix_get
);
371 /* a read copies out one plock request from the send list */
372 static ssize_t
dev_read(struct file
*file
, char __user
*u
, size_t count
,
375 struct dlm_plock_info info
;
376 struct plock_op
*op
= NULL
;
378 if (count
< sizeof(info
))
381 spin_lock(&ops_lock
);
382 if (!list_empty(&send_list
)) {
383 op
= list_entry(send_list
.next
, struct plock_op
, list
);
384 if (op
->info
.flags
& DLM_PLOCK_FL_CLOSE
)
387 list_move(&op
->list
, &recv_list
);
388 memcpy(&info
, &op
->info
, sizeof(info
));
390 spin_unlock(&ops_lock
);
395 /* there is no need to get a reply from userspace for unlocks
396 that were generated by the vfs cleaning up for a close
397 (the process did not make an unlock call). */
399 if (op
->info
.flags
& DLM_PLOCK_FL_CLOSE
)
402 if (copy_to_user(u
, &info
, sizeof(info
)))
407 /* a write copies in one plock result that should match a plock_op
409 static ssize_t
dev_write(struct file
*file
, const char __user
*u
, size_t count
,
412 struct dlm_plock_info info
;
414 int found
= 0, do_callback
= 0;
416 if (count
!= sizeof(info
))
419 if (copy_from_user(&info
, u
, sizeof(info
)))
422 if (check_version(&info
))
425 spin_lock(&ops_lock
);
426 list_for_each_entry(op
, &recv_list
, list
) {
427 if (op
->info
.fsid
== info
.fsid
&&
428 op
->info
.number
== info
.number
&&
429 op
->info
.owner
== info
.owner
) {
430 struct plock_xop
*xop
= (struct plock_xop
*)op
;
431 list_del_init(&op
->list
);
432 memcpy(&op
->info
, &info
, sizeof(info
));
441 spin_unlock(&ops_lock
);
445 dlm_plock_callback(op
);
449 log_print("dev_write no op %x %llx", info
.fsid
,
450 (unsigned long long)info
.number
);
454 static unsigned int dev_poll(struct file
*file
, poll_table
*wait
)
456 unsigned int mask
= 0;
458 poll_wait(file
, &send_wq
, wait
);
460 spin_lock(&ops_lock
);
461 if (!list_empty(&send_list
))
462 mask
= POLLIN
| POLLRDNORM
;
463 spin_unlock(&ops_lock
);
468 static const struct file_operations dev_fops
= {
472 .owner
= THIS_MODULE
,
473 .llseek
= noop_llseek
,
476 static struct miscdevice plock_dev_misc
= {
477 .minor
= MISC_DYNAMIC_MINOR
,
478 .name
= DLM_PLOCK_MISC_NAME
,
482 int dlm_plock_init(void)
486 spin_lock_init(&ops_lock
);
487 INIT_LIST_HEAD(&send_list
);
488 INIT_LIST_HEAD(&recv_list
);
489 init_waitqueue_head(&send_wq
);
490 init_waitqueue_head(&recv_wq
);
492 rv
= misc_register(&plock_dev_misc
);
494 log_print("dlm_plock_init: misc_register failed %d", rv
);
498 void dlm_plock_exit(void)
500 if (misc_deregister(&plock_dev_misc
) < 0)
501 log_print("dlm_plock_exit: misc_deregister failed");