2 * Copyright (C) 2006-2007 Red Hat, Inc. All rights reserved.
4 * This copyrighted material is made available to anyone wishing to use,
5 * modify, copy, or redistribute it subject to the terms and conditions
6 * of the GNU General Public License v.2.
9 #include <linux/miscdevice.h>
10 #include <linux/init.h>
11 #include <linux/wait.h>
12 #include <linux/module.h>
13 #include <linux/file.h>
15 #include <linux/poll.h>
16 #include <linux/signal.h>
17 #include <linux/spinlock.h>
18 #include <linux/dlm.h>
19 #include <linux/dlm_device.h>
21 #include "dlm_internal.h"
22 #include "lockspace.h"
24 #include "lvb_table.h"
27 static const char *name_prefix
="dlm";
28 static struct miscdevice ctl_device
;
29 static const struct file_operations device_fops
;
33 struct dlm_lock_params32
{
46 char lvb
[DLM_USER_LVB_LEN
];
50 struct dlm_write_request32
{
57 struct dlm_lock_params32 lock
;
58 struct dlm_lspace_params lspace
;
59 struct dlm_purge_params purge
;
70 struct dlm_lock_result32
{
75 struct dlm_lksb32 lksb
;
78 /* Offsets may be zero if no data is present */
82 static void compat_input(struct dlm_write_request
*kb
,
83 struct dlm_write_request32
*kb32
)
85 kb
->version
[0] = kb32
->version
[0];
86 kb
->version
[1] = kb32
->version
[1];
87 kb
->version
[2] = kb32
->version
[2];
90 kb
->is64bit
= kb32
->is64bit
;
91 if (kb
->cmd
== DLM_USER_CREATE_LOCKSPACE
||
92 kb
->cmd
== DLM_USER_REMOVE_LOCKSPACE
) {
93 kb
->i
.lspace
.flags
= kb32
->i
.lspace
.flags
;
94 kb
->i
.lspace
.minor
= kb32
->i
.lspace
.minor
;
95 strcpy(kb
->i
.lspace
.name
, kb32
->i
.lspace
.name
);
96 } else if (kb
->cmd
== DLM_USER_PURGE
) {
97 kb
->i
.purge
.nodeid
= kb32
->i
.purge
.nodeid
;
98 kb
->i
.purge
.pid
= kb32
->i
.purge
.pid
;
100 kb
->i
.lock
.mode
= kb32
->i
.lock
.mode
;
101 kb
->i
.lock
.namelen
= kb32
->i
.lock
.namelen
;
102 kb
->i
.lock
.flags
= kb32
->i
.lock
.flags
;
103 kb
->i
.lock
.lkid
= kb32
->i
.lock
.lkid
;
104 kb
->i
.lock
.parent
= kb32
->i
.lock
.parent
;
105 kb
->i
.lock
.castparam
= (void *)(long)kb32
->i
.lock
.castparam
;
106 kb
->i
.lock
.castaddr
= (void *)(long)kb32
->i
.lock
.castaddr
;
107 kb
->i
.lock
.bastparam
= (void *)(long)kb32
->i
.lock
.bastparam
;
108 kb
->i
.lock
.bastaddr
= (void *)(long)kb32
->i
.lock
.bastaddr
;
109 kb
->i
.lock
.lksb
= (void *)(long)kb32
->i
.lock
.lksb
;
110 memcpy(kb
->i
.lock
.lvb
, kb32
->i
.lock
.lvb
, DLM_USER_LVB_LEN
);
111 memcpy(kb
->i
.lock
.name
, kb32
->i
.lock
.name
, kb
->i
.lock
.namelen
);
115 static void compat_output(struct dlm_lock_result
*res
,
116 struct dlm_lock_result32
*res32
)
118 res32
->user_astaddr
= (__u32
)(long)res
->user_astaddr
;
119 res32
->user_astparam
= (__u32
)(long)res
->user_astparam
;
120 res32
->user_lksb
= (__u32
)(long)res
->user_lksb
;
121 res32
->bast_mode
= res
->bast_mode
;
123 res32
->lvb_offset
= res
->lvb_offset
;
124 res32
->length
= res
->length
;
126 res32
->lksb
.sb_status
= res
->lksb
.sb_status
;
127 res32
->lksb
.sb_flags
= res
->lksb
.sb_flags
;
128 res32
->lksb
.sb_lkid
= res
->lksb
.sb_lkid
;
129 res32
->lksb
.sb_lvbptr
= (__u32
)(long)res
->lksb
.sb_lvbptr
;
133 /* we could possibly check if the cancel of an orphan has resulted in the lkb
134 being removed and then remove that lkb from the orphans list and free it */
136 void dlm_user_add_ast(struct dlm_lkb
*lkb
, int type
)
139 struct dlm_user_args
*ua
;
140 struct dlm_user_proc
*proc
;
141 int eol
= 0, ast_type
;
143 if (lkb
->lkb_flags
& (DLM_IFL_ORPHAN
| DLM_IFL_DEAD
))
146 ls
= lkb
->lkb_resource
->res_ls
;
147 mutex_lock(&ls
->ls_clear_proc_locks
);
149 /* If ORPHAN/DEAD flag is set, it means the process is dead so an ast
150 can't be delivered. For ORPHAN's, dlm_clear_proc_locks() freed
151 lkb->ua so we can't try to use it. This second check is necessary
152 for cases where a completion ast is received for an operation that
153 began before clear_proc_locks did its cancel/unlock. */
155 if (lkb
->lkb_flags
& (DLM_IFL_ORPHAN
| DLM_IFL_DEAD
))
158 DLM_ASSERT(lkb
->lkb_astparam
, dlm_print_lkb(lkb
););
159 ua
= (struct dlm_user_args
*)lkb
->lkb_astparam
;
162 if (type
== AST_BAST
&& ua
->bastaddr
== NULL
)
165 spin_lock(&proc
->asts_spin
);
167 ast_type
= lkb
->lkb_ast_type
;
168 lkb
->lkb_ast_type
|= type
;
171 kref_get(&lkb
->lkb_ref
);
172 list_add_tail(&lkb
->lkb_astqueue
, &proc
->asts
);
173 wake_up_interruptible(&proc
->wait
);
175 if (type
== AST_COMP
&& (ast_type
& AST_COMP
))
176 log_debug(ls
, "ast overlap %x status %x %x",
177 lkb
->lkb_id
, ua
->lksb
.sb_status
, lkb
->lkb_flags
);
179 /* Figure out if this lock is at the end of its life and no longer
180 available for the application to use. The lkb still exists until
181 the final ast is read. A lock becomes EOL in three situations:
182 1. a noqueue request fails with EAGAIN
183 2. an unlock completes with EUNLOCK
184 3. a cancel of a waiting request completes with ECANCEL
185 An EOL lock needs to be removed from the process's list of locks.
186 And we can't allow any new operation on an EOL lock. This is
187 not related to the lifetime of the lkb struct which is managed
188 entirely by refcount. */
190 if (type
== AST_COMP
&&
191 lkb
->lkb_grmode
== DLM_LOCK_IV
&&
192 ua
->lksb
.sb_status
== -EAGAIN
)
194 else if (ua
->lksb
.sb_status
== -DLM_EUNLOCK
||
195 (ua
->lksb
.sb_status
== -DLM_ECANCEL
&&
196 lkb
->lkb_grmode
== DLM_LOCK_IV
))
199 lkb
->lkb_ast_type
&= ~AST_BAST
;
200 lkb
->lkb_flags
|= DLM_IFL_ENDOFLIFE
;
203 /* We want to copy the lvb to userspace when the completion
204 ast is read if the status is 0, the lock has an lvb and
205 lvb_ops says we should. We could probably have set_lvb_lock()
206 set update_user_lvb instead and not need old_mode */
208 if ((lkb
->lkb_ast_type
& AST_COMP
) &&
209 (lkb
->lkb_lksb
->sb_status
== 0) &&
210 lkb
->lkb_lksb
->sb_lvbptr
&&
211 dlm_lvb_operations
[ua
->old_mode
+ 1][lkb
->lkb_grmode
+ 1])
212 ua
->update_user_lvb
= 1;
214 ua
->update_user_lvb
= 0;
216 spin_unlock(&proc
->asts_spin
);
219 spin_lock(&ua
->proc
->locks_spin
);
220 if (!list_empty(&lkb
->lkb_ownqueue
)) {
221 list_del_init(&lkb
->lkb_ownqueue
);
224 spin_unlock(&ua
->proc
->locks_spin
);
227 mutex_unlock(&ls
->ls_clear_proc_locks
);
230 static int device_user_lock(struct dlm_user_proc
*proc
,
231 struct dlm_lock_params
*params
)
234 struct dlm_user_args
*ua
;
237 ls
= dlm_find_lockspace_local(proc
->lockspace
);
241 if (!params
->castaddr
|| !params
->lksb
) {
246 ua
= kzalloc(sizeof(struct dlm_user_args
), GFP_KERNEL
);
250 ua
->user_lksb
= params
->lksb
;
251 ua
->castparam
= params
->castparam
;
252 ua
->castaddr
= params
->castaddr
;
253 ua
->bastparam
= params
->bastparam
;
254 ua
->bastaddr
= params
->bastaddr
;
256 if (params
->flags
& DLM_LKF_CONVERT
)
257 error
= dlm_user_convert(ls
, ua
,
258 params
->mode
, params
->flags
,
259 params
->lkid
, params
->lvb
);
261 error
= dlm_user_request(ls
, ua
,
262 params
->mode
, params
->flags
,
263 params
->name
, params
->namelen
,
266 error
= ua
->lksb
.sb_lkid
;
269 dlm_put_lockspace(ls
);
273 static int device_user_unlock(struct dlm_user_proc
*proc
,
274 struct dlm_lock_params
*params
)
277 struct dlm_user_args
*ua
;
280 ls
= dlm_find_lockspace_local(proc
->lockspace
);
284 ua
= kzalloc(sizeof(struct dlm_user_args
), GFP_KERNEL
);
288 ua
->user_lksb
= params
->lksb
;
289 ua
->castparam
= params
->castparam
;
290 ua
->castaddr
= params
->castaddr
;
292 if (params
->flags
& DLM_LKF_CANCEL
)
293 error
= dlm_user_cancel(ls
, ua
, params
->flags
, params
->lkid
);
295 error
= dlm_user_unlock(ls
, ua
, params
->flags
, params
->lkid
,
298 dlm_put_lockspace(ls
);
302 static int create_misc_device(struct dlm_ls
*ls
, char *name
)
307 len
= strlen(name
) + strlen(name_prefix
) + 2;
308 ls
->ls_device
.name
= kzalloc(len
, GFP_KERNEL
);
309 if (!ls
->ls_device
.name
)
312 snprintf((char *)ls
->ls_device
.name
, len
, "%s_%s", name_prefix
,
314 ls
->ls_device
.fops
= &device_fops
;
315 ls
->ls_device
.minor
= MISC_DYNAMIC_MINOR
;
317 error
= misc_register(&ls
->ls_device
);
319 kfree(ls
->ls_device
.name
);
325 static int device_user_purge(struct dlm_user_proc
*proc
,
326 struct dlm_purge_params
*params
)
331 ls
= dlm_find_lockspace_local(proc
->lockspace
);
335 error
= dlm_user_purge(ls
, proc
, params
->nodeid
, params
->pid
);
337 dlm_put_lockspace(ls
);
341 static int device_create_lockspace(struct dlm_lspace_params
*params
)
343 dlm_lockspace_t
*lockspace
;
347 if (!capable(CAP_SYS_ADMIN
))
350 error
= dlm_new_lockspace(params
->name
, strlen(params
->name
),
351 &lockspace
, 0, DLM_USER_LVB_LEN
);
355 ls
= dlm_find_lockspace_local(lockspace
);
359 error
= create_misc_device(ls
, params
->name
);
360 dlm_put_lockspace(ls
);
363 dlm_release_lockspace(lockspace
, 0);
365 error
= ls
->ls_device
.minor
;
370 static int device_remove_lockspace(struct dlm_lspace_params
*params
)
372 dlm_lockspace_t
*lockspace
;
374 int error
, force
= 0;
376 if (!capable(CAP_SYS_ADMIN
))
379 ls
= dlm_find_lockspace_device(params
->minor
);
383 /* Deregister the misc device first, so we don't have
384 * a device that's not attached to a lockspace. If
385 * dlm_release_lockspace fails then we can recreate it
387 error
= misc_deregister(&ls
->ls_device
);
389 dlm_put_lockspace(ls
);
392 kfree(ls
->ls_device
.name
);
394 if (params
->flags
& DLM_USER_LSFLG_FORCEFREE
)
397 lockspace
= ls
->ls_local_handle
;
399 /* dlm_release_lockspace waits for references to go to zero,
400 so all processes will need to close their device for the ls
401 before the release will procede */
403 dlm_put_lockspace(ls
);
404 error
= dlm_release_lockspace(lockspace
, force
);
406 create_misc_device(ls
, ls
->ls_name
);
411 /* Check the user's version matches ours */
412 static int check_version(struct dlm_write_request
*req
)
414 if (req
->version
[0] != DLM_DEVICE_VERSION_MAJOR
||
415 (req
->version
[0] == DLM_DEVICE_VERSION_MAJOR
&&
416 req
->version
[1] > DLM_DEVICE_VERSION_MINOR
)) {
418 printk(KERN_DEBUG
"dlm: process %s (%d) version mismatch "
419 "user (%d.%d.%d) kernel (%d.%d.%d)\n",
425 DLM_DEVICE_VERSION_MAJOR
,
426 DLM_DEVICE_VERSION_MINOR
,
427 DLM_DEVICE_VERSION_PATCH
);
437 * dlm_user_request -> request_lock
438 * dlm_user_convert -> convert_lock
441 * dlm_user_unlock -> unlock_lock
442 * dlm_user_cancel -> cancel_lock
444 * device_create_lockspace
447 * device_remove_lockspace
448 * dlm_release_lockspace
451 /* a write to a lockspace device is a lock or unlock request, a write
452 to the control device is to create/remove a lockspace */
454 static ssize_t
device_write(struct file
*file
, const char __user
*buf
,
455 size_t count
, loff_t
*ppos
)
457 struct dlm_user_proc
*proc
= file
->private_data
;
458 struct dlm_write_request
*kbuf
;
459 sigset_t tmpsig
, allsigs
;
463 if (count
< sizeof(struct dlm_write_request32
))
465 if (count
< sizeof(struct dlm_write_request
))
469 kbuf
= kmalloc(count
, GFP_KERNEL
);
473 if (copy_from_user(kbuf
, buf
, count
)) {
478 if (check_version(kbuf
)) {
484 if (!kbuf
->is64bit
) {
485 struct dlm_write_request32
*k32buf
;
486 k32buf
= (struct dlm_write_request32
*)kbuf
;
487 kbuf
= kmalloc(count
+ (sizeof(struct dlm_write_request
) -
488 sizeof(struct dlm_write_request32
)), GFP_KERNEL
);
493 set_bit(DLM_PROC_FLAGS_COMPAT
, &proc
->flags
);
494 compat_input(kbuf
, k32buf
);
499 /* do we really need this? can a write happen after a close? */
500 if ((kbuf
->cmd
== DLM_USER_LOCK
|| kbuf
->cmd
== DLM_USER_UNLOCK
) &&
501 test_bit(DLM_PROC_FLAGS_CLOSING
, &proc
->flags
))
504 sigfillset(&allsigs
);
505 sigprocmask(SIG_BLOCK
, &allsigs
, &tmpsig
);
513 log_print("no locking on control device");
516 error
= device_user_lock(proc
, &kbuf
->i
.lock
);
519 case DLM_USER_UNLOCK
:
521 log_print("no locking on control device");
524 error
= device_user_unlock(proc
, &kbuf
->i
.lock
);
527 case DLM_USER_CREATE_LOCKSPACE
:
529 log_print("create/remove only on control device");
532 error
= device_create_lockspace(&kbuf
->i
.lspace
);
535 case DLM_USER_REMOVE_LOCKSPACE
:
537 log_print("create/remove only on control device");
540 error
= device_remove_lockspace(&kbuf
->i
.lspace
);
545 log_print("no locking on control device");
548 error
= device_user_purge(proc
, &kbuf
->i
.purge
);
552 log_print("Unknown command passed to DLM device : %d\n",
557 sigprocmask(SIG_SETMASK
, &tmpsig
, NULL
);
564 /* Every process that opens the lockspace device has its own "proc" structure
565 hanging off the open file that's used to keep track of locks owned by the
566 process and asts that need to be delivered to the process. */
568 static int device_open(struct inode
*inode
, struct file
*file
)
570 struct dlm_user_proc
*proc
;
573 ls
= dlm_find_lockspace_device(iminor(inode
));
577 proc
= kzalloc(sizeof(struct dlm_user_proc
), GFP_KERNEL
);
579 dlm_put_lockspace(ls
);
583 proc
->lockspace
= ls
->ls_local_handle
;
584 INIT_LIST_HEAD(&proc
->asts
);
585 INIT_LIST_HEAD(&proc
->locks
);
586 INIT_LIST_HEAD(&proc
->unlocking
);
587 spin_lock_init(&proc
->asts_spin
);
588 spin_lock_init(&proc
->locks_spin
);
589 init_waitqueue_head(&proc
->wait
);
590 file
->private_data
= proc
;
595 static int device_close(struct inode
*inode
, struct file
*file
)
597 struct dlm_user_proc
*proc
= file
->private_data
;
599 sigset_t tmpsig
, allsigs
;
601 ls
= dlm_find_lockspace_local(proc
->lockspace
);
605 sigfillset(&allsigs
);
606 sigprocmask(SIG_BLOCK
, &allsigs
, &tmpsig
);
608 set_bit(DLM_PROC_FLAGS_CLOSING
, &proc
->flags
);
610 dlm_clear_proc_locks(ls
, proc
);
612 /* at this point no more lkb's should exist for this lockspace,
613 so there's no chance of dlm_user_add_ast() being called and
614 looking for lkb->ua->proc */
617 file
->private_data
= NULL
;
619 dlm_put_lockspace(ls
);
620 dlm_put_lockspace(ls
); /* for the find in device_open() */
622 /* FIXME: AUTOFREE: if this ls is no longer used do
623 device_remove_lockspace() */
625 sigprocmask(SIG_SETMASK
, &tmpsig
, NULL
);
631 static int copy_result_to_user(struct dlm_user_args
*ua
, int compat
, int type
,
632 int bmode
, char __user
*buf
, size_t count
)
635 struct dlm_lock_result32 result32
;
637 struct dlm_lock_result result
;
643 memset(&result
, 0, sizeof(struct dlm_lock_result
));
644 memcpy(&result
.lksb
, &ua
->lksb
, sizeof(struct dlm_lksb
));
645 result
.user_lksb
= ua
->user_lksb
;
647 /* FIXME: dlm1 provides for the user's bastparam/addr to not be updated
648 in a conversion unless the conversion is successful. See code
649 in dlm_user_convert() for updating ua from ua_tmp. OpenVMS, though,
650 notes that a new blocking AST address and parameter are set even if
651 the conversion fails, so maybe we should just do that. */
653 if (type
== AST_BAST
) {
654 result
.user_astaddr
= ua
->bastaddr
;
655 result
.user_astparam
= ua
->bastparam
;
656 result
.bast_mode
= bmode
;
658 result
.user_astaddr
= ua
->castaddr
;
659 result
.user_astparam
= ua
->castparam
;
664 len
= sizeof(struct dlm_lock_result32
);
667 len
= sizeof(struct dlm_lock_result
);
670 /* copy lvb to userspace if there is one, it's been updated, and
671 the user buffer has space for it */
673 if (ua
->update_user_lvb
&& ua
->lksb
.sb_lvbptr
&&
674 count
>= len
+ DLM_USER_LVB_LEN
) {
675 if (copy_to_user(buf
+len
, ua
->lksb
.sb_lvbptr
,
681 result
.lvb_offset
= len
;
682 len
+= DLM_USER_LVB_LEN
;
689 compat_output(&result
, &result32
);
690 resultptr
= &result32
;
694 if (copy_to_user(buf
, resultptr
, struct_len
))
702 /* a read returns a single ast described in a struct dlm_lock_result */
704 static ssize_t
device_read(struct file
*file
, char __user
*buf
, size_t count
,
707 struct dlm_user_proc
*proc
= file
->private_data
;
709 struct dlm_user_args
*ua
;
710 DECLARE_WAITQUEUE(wait
, current
);
711 int error
, type
=0, bmode
=0, removed
= 0;
714 if (count
< sizeof(struct dlm_lock_result32
))
716 if (count
< sizeof(struct dlm_lock_result
))
720 /* do we really need this? can a read happen after a close? */
721 if (test_bit(DLM_PROC_FLAGS_CLOSING
, &proc
->flags
))
724 spin_lock(&proc
->asts_spin
);
725 if (list_empty(&proc
->asts
)) {
726 if (file
->f_flags
& O_NONBLOCK
) {
727 spin_unlock(&proc
->asts_spin
);
731 add_wait_queue(&proc
->wait
, &wait
);
734 set_current_state(TASK_INTERRUPTIBLE
);
735 if (list_empty(&proc
->asts
) && !signal_pending(current
)) {
736 spin_unlock(&proc
->asts_spin
);
738 spin_lock(&proc
->asts_spin
);
741 set_current_state(TASK_RUNNING
);
742 remove_wait_queue(&proc
->wait
, &wait
);
744 if (signal_pending(current
)) {
745 spin_unlock(&proc
->asts_spin
);
750 if (list_empty(&proc
->asts
)) {
751 spin_unlock(&proc
->asts_spin
);
755 /* there may be both completion and blocking asts to return for
756 the lkb, don't remove lkb from asts list unless no asts remain */
758 lkb
= list_entry(proc
->asts
.next
, struct dlm_lkb
, lkb_astqueue
);
760 if (lkb
->lkb_ast_type
& AST_COMP
) {
761 lkb
->lkb_ast_type
&= ~AST_COMP
;
763 } else if (lkb
->lkb_ast_type
& AST_BAST
) {
764 lkb
->lkb_ast_type
&= ~AST_BAST
;
766 bmode
= lkb
->lkb_bastmode
;
769 if (!lkb
->lkb_ast_type
) {
770 list_del(&lkb
->lkb_astqueue
);
773 spin_unlock(&proc
->asts_spin
);
775 ua
= (struct dlm_user_args
*)lkb
->lkb_astparam
;
776 error
= copy_result_to_user(ua
,
777 test_bit(DLM_PROC_FLAGS_COMPAT
, &proc
->flags
),
778 type
, bmode
, buf
, count
);
780 /* removes reference for the proc->asts lists added by
781 dlm_user_add_ast() and may result in the lkb being freed */
788 static unsigned int device_poll(struct file
*file
, poll_table
*wait
)
790 struct dlm_user_proc
*proc
= file
->private_data
;
792 poll_wait(file
, &proc
->wait
, wait
);
794 spin_lock(&proc
->asts_spin
);
795 if (!list_empty(&proc
->asts
)) {
796 spin_unlock(&proc
->asts_spin
);
797 return POLLIN
| POLLRDNORM
;
799 spin_unlock(&proc
->asts_spin
);
803 static int ctl_device_open(struct inode
*inode
, struct file
*file
)
805 file
->private_data
= NULL
;
809 static int ctl_device_close(struct inode
*inode
, struct file
*file
)
814 static const struct file_operations device_fops
= {
816 .release
= device_close
,
818 .write
= device_write
,
820 .owner
= THIS_MODULE
,
823 static const struct file_operations ctl_device_fops
= {
824 .open
= ctl_device_open
,
825 .release
= ctl_device_close
,
826 .write
= device_write
,
827 .owner
= THIS_MODULE
,
830 int dlm_user_init(void)
834 ctl_device
.name
= "dlm-control";
835 ctl_device
.fops
= &ctl_device_fops
;
836 ctl_device
.minor
= MISC_DYNAMIC_MINOR
;
838 error
= misc_register(&ctl_device
);
840 log_print("misc_register failed for control device");
845 void dlm_user_exit(void)
847 misc_deregister(&ctl_device
);