1 /******************************************************************************
2 *******************************************************************************
4 ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5 ** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
7 ** This copyrighted material is made available to anyone wishing to use,
8 ** modify, copy, or redistribute it subject to the terms and conditions
9 ** of the GNU General Public License v.2.
11 *******************************************************************************
12 ******************************************************************************/
14 #include "dlm_internal.h"
15 #include "lockspace.h"
24 #include "requestqueue.h"
29 static struct mutex ls_lock
;
30 static struct list_head lslist
;
31 static spinlock_t lslist_lock
;
32 static struct task_struct
* scand_task
;
35 static ssize_t
dlm_control_store(struct dlm_ls
*ls
, const char *buf
, size_t len
)
38 int n
= simple_strtol(buf
, NULL
, 0);
40 ls
= dlm_find_lockspace_local(ls
->ls_local_handle
);
54 dlm_put_lockspace(ls
);
58 static ssize_t
dlm_event_store(struct dlm_ls
*ls
, const char *buf
, size_t len
)
60 ls
->ls_uevent_result
= simple_strtol(buf
, NULL
, 0);
61 set_bit(LSFL_UEVENT_WAIT
, &ls
->ls_flags
);
62 wake_up(&ls
->ls_uevent_wait
);
66 static ssize_t
dlm_id_show(struct dlm_ls
*ls
, char *buf
)
68 return snprintf(buf
, PAGE_SIZE
, "%u\n", ls
->ls_global_id
);
71 static ssize_t
dlm_id_store(struct dlm_ls
*ls
, const char *buf
, size_t len
)
73 ls
->ls_global_id
= simple_strtoul(buf
, NULL
, 0);
77 static ssize_t
dlm_recover_status_show(struct dlm_ls
*ls
, char *buf
)
79 uint32_t status
= dlm_recover_status(ls
);
80 return snprintf(buf
, PAGE_SIZE
, "%x\n", status
);
83 static ssize_t
dlm_recover_nodeid_show(struct dlm_ls
*ls
, char *buf
)
85 return snprintf(buf
, PAGE_SIZE
, "%d\n", ls
->ls_recover_nodeid
);
89 struct attribute attr
;
90 ssize_t (*show
)(struct dlm_ls
*, char *);
91 ssize_t (*store
)(struct dlm_ls
*, const char *, size_t);
94 static struct dlm_attr dlm_attr_control
= {
95 .attr
= {.name
= "control", .mode
= S_IWUSR
},
96 .store
= dlm_control_store
99 static struct dlm_attr dlm_attr_event
= {
100 .attr
= {.name
= "event_done", .mode
= S_IWUSR
},
101 .store
= dlm_event_store
104 static struct dlm_attr dlm_attr_id
= {
105 .attr
= {.name
= "id", .mode
= S_IRUGO
| S_IWUSR
},
107 .store
= dlm_id_store
110 static struct dlm_attr dlm_attr_recover_status
= {
111 .attr
= {.name
= "recover_status", .mode
= S_IRUGO
},
112 .show
= dlm_recover_status_show
115 static struct dlm_attr dlm_attr_recover_nodeid
= {
116 .attr
= {.name
= "recover_nodeid", .mode
= S_IRUGO
},
117 .show
= dlm_recover_nodeid_show
120 static struct attribute
*dlm_attrs
[] = {
121 &dlm_attr_control
.attr
,
122 &dlm_attr_event
.attr
,
124 &dlm_attr_recover_status
.attr
,
125 &dlm_attr_recover_nodeid
.attr
,
129 static ssize_t
dlm_attr_show(struct kobject
*kobj
, struct attribute
*attr
,
132 struct dlm_ls
*ls
= container_of(kobj
, struct dlm_ls
, ls_kobj
);
133 struct dlm_attr
*a
= container_of(attr
, struct dlm_attr
, attr
);
134 return a
->show
? a
->show(ls
, buf
) : 0;
137 static ssize_t
dlm_attr_store(struct kobject
*kobj
, struct attribute
*attr
,
138 const char *buf
, size_t len
)
140 struct dlm_ls
*ls
= container_of(kobj
, struct dlm_ls
, ls_kobj
);
141 struct dlm_attr
*a
= container_of(attr
, struct dlm_attr
, attr
);
142 return a
->store
? a
->store(ls
, buf
, len
) : len
;
145 static void lockspace_kobj_release(struct kobject
*k
)
147 struct dlm_ls
*ls
= container_of(k
, struct dlm_ls
, ls_kobj
);
151 static const struct sysfs_ops dlm_attr_ops
= {
152 .show
= dlm_attr_show
,
153 .store
= dlm_attr_store
,
156 static struct kobj_type dlm_ktype
= {
157 .default_attrs
= dlm_attrs
,
158 .sysfs_ops
= &dlm_attr_ops
,
159 .release
= lockspace_kobj_release
,
162 static struct kset
*dlm_kset
;
164 static int do_uevent(struct dlm_ls
*ls
, int in
)
169 kobject_uevent(&ls
->ls_kobj
, KOBJ_ONLINE
);
171 kobject_uevent(&ls
->ls_kobj
, KOBJ_OFFLINE
);
173 log_debug(ls
, "%s the lockspace group...", in
? "joining" : "leaving");
175 /* dlm_controld will see the uevent, do the necessary group management
176 and then write to sysfs to wake us */
178 error
= wait_event_interruptible(ls
->ls_uevent_wait
,
179 test_and_clear_bit(LSFL_UEVENT_WAIT
, &ls
->ls_flags
));
181 log_debug(ls
, "group event done %d %d", error
, ls
->ls_uevent_result
);
186 error
= ls
->ls_uevent_result
;
189 log_error(ls
, "group %s failed %d %d", in
? "join" : "leave",
190 error
, ls
->ls_uevent_result
);
194 static int dlm_uevent(struct kset
*kset
, struct kobject
*kobj
,
195 struct kobj_uevent_env
*env
)
197 struct dlm_ls
*ls
= container_of(kobj
, struct dlm_ls
, ls_kobj
);
199 add_uevent_var(env
, "LOCKSPACE=%s", ls
->ls_name
);
203 static struct kset_uevent_ops dlm_uevent_ops
= {
204 .uevent
= dlm_uevent
,
207 int __init
dlm_lockspace_init(void)
210 mutex_init(&ls_lock
);
211 INIT_LIST_HEAD(&lslist
);
212 spin_lock_init(&lslist_lock
);
214 dlm_kset
= kset_create_and_add("dlm", &dlm_uevent_ops
, kernel_kobj
);
216 printk(KERN_WARNING
"%s: can not create kset\n", __func__
);
222 void dlm_lockspace_exit(void)
224 kset_unregister(dlm_kset
);
227 static struct dlm_ls
*find_ls_to_scan(void)
231 spin_lock(&lslist_lock
);
232 list_for_each_entry(ls
, &lslist
, ls_list
) {
233 if (time_after_eq(jiffies
, ls
->ls_scan_time
+
234 dlm_config
.ci_scan_secs
* HZ
)) {
235 spin_unlock(&lslist_lock
);
239 spin_unlock(&lslist_lock
);
243 static int dlm_scand(void *data
)
247 while (!kthread_should_stop()) {
248 ls
= find_ls_to_scan();
250 if (dlm_lock_recovery_try(ls
)) {
251 ls
->ls_scan_time
= jiffies
;
253 dlm_scan_timeout(ls
);
254 dlm_scan_waiters(ls
);
255 dlm_unlock_recovery(ls
);
257 ls
->ls_scan_time
+= HZ
;
261 schedule_timeout_interruptible(dlm_config
.ci_scan_secs
* HZ
);
266 static int dlm_scand_start(void)
268 struct task_struct
*p
;
271 p
= kthread_run(dlm_scand
, NULL
, "dlm_scand");
279 static void dlm_scand_stop(void)
281 kthread_stop(scand_task
);
284 struct dlm_ls
*dlm_find_lockspace_global(uint32_t id
)
288 spin_lock(&lslist_lock
);
290 list_for_each_entry(ls
, &lslist
, ls_list
) {
291 if (ls
->ls_global_id
== id
) {
298 spin_unlock(&lslist_lock
);
302 struct dlm_ls
*dlm_find_lockspace_local(dlm_lockspace_t
*lockspace
)
306 spin_lock(&lslist_lock
);
307 list_for_each_entry(ls
, &lslist
, ls_list
) {
308 if (ls
->ls_local_handle
== lockspace
) {
315 spin_unlock(&lslist_lock
);
319 struct dlm_ls
*dlm_find_lockspace_device(int minor
)
323 spin_lock(&lslist_lock
);
324 list_for_each_entry(ls
, &lslist
, ls_list
) {
325 if (ls
->ls_device
.minor
== minor
) {
332 spin_unlock(&lslist_lock
);
336 void dlm_put_lockspace(struct dlm_ls
*ls
)
338 spin_lock(&lslist_lock
);
340 spin_unlock(&lslist_lock
);
343 static void remove_lockspace(struct dlm_ls
*ls
)
346 spin_lock(&lslist_lock
);
347 if (ls
->ls_count
== 0) {
348 WARN_ON(ls
->ls_create_count
!= 0);
349 list_del(&ls
->ls_list
);
350 spin_unlock(&lslist_lock
);
353 spin_unlock(&lslist_lock
);
358 static int threads_start(void)
362 error
= dlm_scand_start();
364 log_print("cannot start dlm_scand thread %d", error
);
368 /* Thread for sending/receiving messages for all lockspace's */
369 error
= dlm_lowcomms_start();
371 log_print("cannot start dlm lowcomms %d", error
);
383 static void threads_stop(void)
389 static int new_lockspace(const char *name
, const char *cluster
,
390 uint32_t flags
, int lvblen
,
391 const struct dlm_lockspace_ops
*ops
, void *ops_arg
,
392 int *ops_result
, dlm_lockspace_t
**lockspace
)
397 int namelen
= strlen(name
);
399 if (namelen
> DLM_LOCKSPACE_LEN
)
402 if (!lvblen
|| (lvblen
% 8))
405 if (!try_module_get(THIS_MODULE
))
408 if (!dlm_user_daemon_available()) {
409 log_print("dlm user daemon not available");
414 if (ops
&& ops_result
) {
415 if (!dlm_config
.ci_recover_callbacks
)
416 *ops_result
= -EOPNOTSUPP
;
421 if (dlm_config
.ci_recover_callbacks
&& cluster
&&
422 strncmp(cluster
, dlm_config
.ci_cluster_name
, DLM_LOCKSPACE_LEN
)) {
423 log_print("dlm cluster name %s mismatch %s",
424 dlm_config
.ci_cluster_name
, cluster
);
431 spin_lock(&lslist_lock
);
432 list_for_each_entry(ls
, &lslist
, ls_list
) {
433 WARN_ON(ls
->ls_create_count
<= 0);
434 if (ls
->ls_namelen
!= namelen
)
436 if (memcmp(ls
->ls_name
, name
, namelen
))
438 if (flags
& DLM_LSFL_NEWEXCL
) {
442 ls
->ls_create_count
++;
447 spin_unlock(&lslist_lock
);
454 ls
= kzalloc(sizeof(struct dlm_ls
) + namelen
, GFP_NOFS
);
457 memcpy(ls
->ls_name
, name
, namelen
);
458 ls
->ls_namelen
= namelen
;
459 ls
->ls_lvblen
= lvblen
;
462 ls
->ls_scan_time
= jiffies
;
464 if (ops
&& dlm_config
.ci_recover_callbacks
) {
466 ls
->ls_ops_arg
= ops_arg
;
469 if (flags
& DLM_LSFL_TIMEWARN
)
470 set_bit(LSFL_TIMEWARN
, &ls
->ls_flags
);
472 /* ls_exflags are forced to match among nodes, and we don't
473 need to require all nodes to have some flags set */
474 ls
->ls_exflags
= (flags
& ~(DLM_LSFL_TIMEWARN
| DLM_LSFL_FS
|
477 size
= dlm_config
.ci_rsbtbl_size
;
478 ls
->ls_rsbtbl_size
= size
;
480 ls
->ls_rsbtbl
= vmalloc(sizeof(struct dlm_rsbtable
) * size
);
483 for (i
= 0; i
< size
; i
++) {
484 ls
->ls_rsbtbl
[i
].keep
.rb_node
= NULL
;
485 ls
->ls_rsbtbl
[i
].toss
.rb_node
= NULL
;
486 spin_lock_init(&ls
->ls_rsbtbl
[i
].lock
);
489 idr_init(&ls
->ls_lkbidr
);
490 spin_lock_init(&ls
->ls_lkbidr_spin
);
492 size
= dlm_config
.ci_dirtbl_size
;
493 ls
->ls_dirtbl_size
= size
;
495 ls
->ls_dirtbl
= vmalloc(sizeof(struct dlm_dirtable
) * size
);
498 for (i
= 0; i
< size
; i
++) {
499 INIT_LIST_HEAD(&ls
->ls_dirtbl
[i
].list
);
500 spin_lock_init(&ls
->ls_dirtbl
[i
].lock
);
503 INIT_LIST_HEAD(&ls
->ls_waiters
);
504 mutex_init(&ls
->ls_waiters_mutex
);
505 INIT_LIST_HEAD(&ls
->ls_orphans
);
506 mutex_init(&ls
->ls_orphans_mutex
);
507 INIT_LIST_HEAD(&ls
->ls_timeout
);
508 mutex_init(&ls
->ls_timeout_mutex
);
510 INIT_LIST_HEAD(&ls
->ls_new_rsb
);
511 spin_lock_init(&ls
->ls_new_rsb_spin
);
513 INIT_LIST_HEAD(&ls
->ls_nodes
);
514 INIT_LIST_HEAD(&ls
->ls_nodes_gone
);
515 ls
->ls_num_nodes
= 0;
516 ls
->ls_low_nodeid
= 0;
517 ls
->ls_total_weight
= 0;
518 ls
->ls_node_array
= NULL
;
520 memset(&ls
->ls_stub_rsb
, 0, sizeof(struct dlm_rsb
));
521 ls
->ls_stub_rsb
.res_ls
= ls
;
523 ls
->ls_debug_rsb_dentry
= NULL
;
524 ls
->ls_debug_waiters_dentry
= NULL
;
526 init_waitqueue_head(&ls
->ls_uevent_wait
);
527 ls
->ls_uevent_result
= 0;
528 init_completion(&ls
->ls_members_done
);
529 ls
->ls_members_result
= -1;
531 mutex_init(&ls
->ls_cb_mutex
);
532 INIT_LIST_HEAD(&ls
->ls_cb_delay
);
534 ls
->ls_recoverd_task
= NULL
;
535 mutex_init(&ls
->ls_recoverd_active
);
536 spin_lock_init(&ls
->ls_recover_lock
);
537 spin_lock_init(&ls
->ls_rcom_spin
);
538 get_random_bytes(&ls
->ls_rcom_seq
, sizeof(uint64_t));
539 ls
->ls_recover_status
= 0;
540 ls
->ls_recover_seq
= 0;
541 ls
->ls_recover_args
= NULL
;
542 init_rwsem(&ls
->ls_in_recovery
);
543 init_rwsem(&ls
->ls_recv_active
);
544 INIT_LIST_HEAD(&ls
->ls_requestqueue
);
545 mutex_init(&ls
->ls_requestqueue_mutex
);
546 mutex_init(&ls
->ls_clear_proc_locks
);
548 ls
->ls_recover_buf
= kmalloc(dlm_config
.ci_buffer_size
, GFP_NOFS
);
549 if (!ls
->ls_recover_buf
)
553 ls
->ls_num_slots
= 0;
554 ls
->ls_slots_size
= 0;
557 INIT_LIST_HEAD(&ls
->ls_recover_list
);
558 spin_lock_init(&ls
->ls_recover_list_lock
);
559 ls
->ls_recover_list_count
= 0;
560 ls
->ls_local_handle
= ls
;
561 init_waitqueue_head(&ls
->ls_wait_general
);
562 INIT_LIST_HEAD(&ls
->ls_root_list
);
563 init_rwsem(&ls
->ls_root_sem
);
565 down_write(&ls
->ls_in_recovery
);
567 spin_lock(&lslist_lock
);
568 ls
->ls_create_count
= 1;
569 list_add(&ls
->ls_list
, &lslist
);
570 spin_unlock(&lslist_lock
);
572 if (flags
& DLM_LSFL_FS
) {
573 error
= dlm_callback_start(ls
);
575 log_error(ls
, "can't start dlm_callback %d", error
);
580 /* needs to find ls in lslist */
581 error
= dlm_recoverd_start(ls
);
583 log_error(ls
, "can't start dlm_recoverd %d", error
);
587 ls
->ls_kobj
.kset
= dlm_kset
;
588 error
= kobject_init_and_add(&ls
->ls_kobj
, &dlm_ktype
, NULL
,
592 kobject_uevent(&ls
->ls_kobj
, KOBJ_ADD
);
594 /* let kobject handle freeing of ls if there's an error */
597 /* This uevent triggers dlm_controld in userspace to add us to the
598 group of nodes that are members of this lockspace (managed by the
599 cluster infrastructure.) Once it's done that, it tells us who the
600 current lockspace members are (via configfs) and then tells the
601 lockspace to start running (via sysfs) in dlm_ls_start(). */
603 error
= do_uevent(ls
, 1);
607 wait_for_completion(&ls
->ls_members_done
);
608 error
= ls
->ls_members_result
;
612 dlm_create_debug_file(ls
);
614 log_debug(ls
, "join complete");
620 dlm_clear_members(ls
);
621 kfree(ls
->ls_node_array
);
623 dlm_recoverd_stop(ls
);
625 dlm_callback_stop(ls
);
627 spin_lock(&lslist_lock
);
628 list_del(&ls
->ls_list
);
629 spin_unlock(&lslist_lock
);
630 kfree(ls
->ls_recover_buf
);
632 vfree(ls
->ls_dirtbl
);
634 idr_destroy(&ls
->ls_lkbidr
);
635 vfree(ls
->ls_rsbtbl
);
638 kobject_put(&ls
->ls_kobj
);
642 module_put(THIS_MODULE
);
646 int dlm_new_lockspace(const char *name
, const char *cluster
,
647 uint32_t flags
, int lvblen
,
648 const struct dlm_lockspace_ops
*ops
, void *ops_arg
,
649 int *ops_result
, dlm_lockspace_t
**lockspace
)
653 mutex_lock(&ls_lock
);
655 error
= threads_start();
659 error
= new_lockspace(name
, cluster
, flags
, lvblen
, ops
, ops_arg
,
660 ops_result
, lockspace
);
668 mutex_unlock(&ls_lock
);
672 static int lkb_idr_is_local(int id
, void *p
, void *data
)
674 struct dlm_lkb
*lkb
= p
;
676 if (!lkb
->lkb_nodeid
)
681 static int lkb_idr_is_any(int id
, void *p
, void *data
)
686 static int lkb_idr_free(int id
, void *p
, void *data
)
688 struct dlm_lkb
*lkb
= p
;
690 if (lkb
->lkb_lvbptr
&& lkb
->lkb_flags
& DLM_IFL_MSTCPY
)
691 dlm_free_lvb(lkb
->lkb_lvbptr
);
697 /* NOTE: We check the lkbidr here rather than the resource table.
698 This is because there may be LKBs queued as ASTs that have been unlinked
699 from their RSBs and are pending deletion once the AST has been delivered */
701 static int lockspace_busy(struct dlm_ls
*ls
, int force
)
705 spin_lock(&ls
->ls_lkbidr_spin
);
707 rv
= idr_for_each(&ls
->ls_lkbidr
, lkb_idr_is_any
, ls
);
708 } else if (force
== 1) {
709 rv
= idr_for_each(&ls
->ls_lkbidr
, lkb_idr_is_local
, ls
);
713 spin_unlock(&ls
->ls_lkbidr_spin
);
717 static int release_lockspace(struct dlm_ls
*ls
, int force
)
723 busy
= lockspace_busy(ls
, force
);
725 spin_lock(&lslist_lock
);
726 if (ls
->ls_create_count
== 1) {
730 /* remove_lockspace takes ls off lslist */
731 ls
->ls_create_count
= 0;
734 } else if (ls
->ls_create_count
> 1) {
735 rv
= --ls
->ls_create_count
;
739 spin_unlock(&lslist_lock
);
742 log_debug(ls
, "release_lockspace no remove %d", rv
);
746 dlm_device_deregister(ls
);
748 if (force
< 3 && dlm_user_daemon_available())
751 dlm_recoverd_stop(ls
);
753 dlm_callback_stop(ls
);
755 remove_lockspace(ls
);
757 dlm_delete_debug_file(ls
);
759 kfree(ls
->ls_recover_buf
);
762 * Free direntry structs.
766 vfree(ls
->ls_dirtbl
);
769 * Free all lkb's in idr
772 idr_for_each(&ls
->ls_lkbidr
, lkb_idr_free
, ls
);
773 idr_remove_all(&ls
->ls_lkbidr
);
774 idr_destroy(&ls
->ls_lkbidr
);
777 * Free all rsb's on rsbtbl[] lists
780 for (i
= 0; i
< ls
->ls_rsbtbl_size
; i
++) {
781 while ((n
= rb_first(&ls
->ls_rsbtbl
[i
].keep
))) {
782 rsb
= rb_entry(n
, struct dlm_rsb
, res_hashnode
);
783 rb_erase(n
, &ls
->ls_rsbtbl
[i
].keep
);
787 while ((n
= rb_first(&ls
->ls_rsbtbl
[i
].toss
))) {
788 rsb
= rb_entry(n
, struct dlm_rsb
, res_hashnode
);
789 rb_erase(n
, &ls
->ls_rsbtbl
[i
].toss
);
794 vfree(ls
->ls_rsbtbl
);
796 while (!list_empty(&ls
->ls_new_rsb
)) {
797 rsb
= list_first_entry(&ls
->ls_new_rsb
, struct dlm_rsb
,
799 list_del(&rsb
->res_hashchain
);
804 * Free structures on any other lists
807 dlm_purge_requestqueue(ls
);
808 kfree(ls
->ls_recover_args
);
809 dlm_clear_free_entries(ls
);
810 dlm_clear_members(ls
);
811 dlm_clear_members_gone(ls
);
812 kfree(ls
->ls_node_array
);
813 log_debug(ls
, "release_lockspace final free");
814 kobject_put(&ls
->ls_kobj
);
815 /* The ls structure will be freed when the kobject is done with */
817 module_put(THIS_MODULE
);
822 * Called when a system has released all its locks and is not going to use the
823 * lockspace any longer. We free everything we're managing for this lockspace.
824 * Remaining nodes will go through the recovery process as if we'd died. The
825 * lockspace must continue to function as usual, participating in recoveries,
826 * until this returns.
828 * Force has 4 possible values:
829 * 0 - don't destroy locksapce if it has any LKBs
830 * 1 - destroy lockspace if it has remote LKBs but not if it has local LKBs
831 * 2 - destroy lockspace regardless of LKBs
832 * 3 - destroy lockspace as part of a forced shutdown
835 int dlm_release_lockspace(void *lockspace
, int force
)
840 ls
= dlm_find_lockspace_local(lockspace
);
843 dlm_put_lockspace(ls
);
845 mutex_lock(&ls_lock
);
846 error
= release_lockspace(ls
, force
);
851 mutex_unlock(&ls_lock
);
856 void dlm_stop_lockspaces(void)
861 spin_lock(&lslist_lock
);
862 list_for_each_entry(ls
, &lslist
, ls_list
) {
863 if (!test_bit(LSFL_RUNNING
, &ls
->ls_flags
))
865 spin_unlock(&lslist_lock
);
866 log_error(ls
, "no userland control daemon, stopping lockspace");
870 spin_unlock(&lslist_lock
);