4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
31 #include <sys/types.h>
43 #include <libnvpair.h>
44 #include <fm/fmd_api.h>
45 #include <fm/fmd_fmri.h>
46 #include <sys/fm/protocol.h>
47 #include <sys/fm/io/disk.h>
48 #include <fm/libtopo.h>
50 #include "disk_monitor.h"
51 #include "hotplug_mgr.h"
53 #include "topo_gather.h"
54 #include "dm_platform.h"
56 #define THIS_FMD_MODULE_NAME "disk-monitor"
58 static enum disk_init_state
{
60 STATE_CHANGE_MGR_INITTED
= 2,
61 HOTPLUG_MGR_INITTED
= 4
62 } g_init_state
= INIT_STATE_NONE
;
70 * Global verbosity flag -- controls chattiness of debug messages and
71 * warnings. Its value is determined by the fmd property "log-level"
72 * settable in the DE's .conf file.
74 log_class_t g_verbose
= 0;
75 cfgdata_t
*config_data
= NULL
;
76 fmd_hdl_t
*g_fm_hdl
= NULL
;
78 static const fmd_prop_t fmd_props
[];
81 diskmon_teardown_all(void)
83 cleanup_hotplug_manager();
84 cleanup_state_change_manager(config_data
);
89 count_disks(diskmon_t
*disklistp
)
93 while (disklistp
!= NULL
) {
95 disklistp
= disklistp
->next
;
105 * Block the generation of state change events (generated by the
106 * hotplug manager thread) here; they will be unblocked after the
107 * state change manager thread is ready to accept state changes
108 * (shortly after it starts).
110 block_state_change_events();
112 if (dm_platform_init() != 0)
115 if (init_hotplug_manager() != 0)
118 g_init_state
|= HOTPLUG_MGR_INITTED
;
120 if (init_state_change_manager(config_data
) != 0)
123 g_init_state
|= STATE_CHANGE_MGR_INITTED
;
129 unblock_state_change_events();
132 * The cleanup order here does matter, due to dependencies between the
135 if (g_init_state
& HOTPLUG_MGR_INITTED
)
136 cleanup_hotplug_manager();
137 if (g_init_state
& STATE_CHANGE_MGR_INITTED
)
138 cleanup_state_change_manager(config_data
);
145 dm_fault_execute_actions(fmd_hdl_t
*hdl
, diskmon_t
*diskp
, nvlist_t
*nvl
)
147 const char *action_prop
= NULL
;
148 const char *action_string
;
151 * The predictive failure action is the activation of the fault
154 if (fmd_nvl_class_match(hdl
, nvl
,
155 DISK_ERROR_CLASS
"." FM_FAULT_DISK_OVERTEMP
))
156 action_prop
= DISK_PROP_OTEMPACTION
;
158 if (fmd_nvl_class_match(hdl
, nvl
,
159 DISK_ERROR_CLASS
"." FM_FAULT_DISK_TESTFAIL
))
160 action_prop
= DISK_PROP_STFAILACTION
;
162 if (fmd_nvl_class_match(hdl
, nvl
,
163 DISK_ERROR_CLASS
"." FM_FAULT_SSM_WEAROUT
))
164 action_prop
= DISK_PROP_SSMWEAROUTACTION
;
166 dm_fault_indicator_set(diskp
, INDICATOR_ON
);
168 if (action_prop
!= NULL
&&
169 (action_string
= dm_prop_lookup(diskp
->props
, action_prop
))
172 if (dm_platform_indicator_execute(action_string
) != 0) {
173 log_warn("Fault action `%s' did not successfully "
174 "complete.\n", action_string
);
180 diskmon_agent_repair(fmd_hdl_t
*hdl
, nvlist_t
*nvl
, int repair
)
190 err
|= nvlist_lookup_string(nvl
, FM_SUSPECT_UUID
, &uuid
);
191 err
|= nvlist_lookup_nvlist_array(nvl
, FM_SUSPECT_FAULT_LIST
,
200 if (nvlist_lookup_nvlist(fltnvl
, FM_FAULT_RESOURCE
, &fmri
)
204 if ((diskp
= dm_fmri_to_diskmon(hdl
, fmri
)) == NULL
)
207 log_msg(MM_MAIN
, "Disk %s repaired!\n",
210 dm_fault_indicator_set(diskp
, INDICATOR_OFF
);
212 dm_state_change(diskp
, HPS_REPAIRED
);
216 fmd_case_uuresolved(hdl
, uuid
);
221 diskmon_agent_suspect(fmd_hdl_t
*hdl
, nvlist_t
*nvl
)
231 err
|= nvlist_lookup_string(nvl
, FM_SUSPECT_UUID
, &uuid
);
232 err
|= nvlist_lookup_nvlist_array(nvl
, FM_SUSPECT_FAULT_LIST
,
237 while (nvc
-- != 0 && !fmd_case_uuclosed(hdl
, uuid
)) {
241 if (nvlist_lookup_nvlist(fltnvl
, FM_FAULT_RESOURCE
, &fmri
) != 0)
244 if ((diskp
= dm_fmri_to_diskmon(hdl
, fmri
)) == NULL
)
247 /* Execute the actions associated with this fault */
248 dm_fault_execute_actions(hdl
, diskp
, fltnvl
);
251 * Send a state change event to the state change manager
253 dm_state_change(diskp
, HPS_FAULTED
);
256 if (!fmd_case_uuclosed(hdl
, uuid
)) {
258 fmd_case_uuclose(hdl
, uuid
);
264 diskmon_recv(fmd_hdl_t
*hdl
, fmd_event_t
*ep
, nvlist_t
*nvl
, const char *class)
269 if (g_verbose
& MM_MAIN
)
270 nvlist_print(stderr
, nvl
);
273 * Act on the fault suspect list or repaired list (embedded agent
276 if (fmd_nvl_class_match(hdl
, nvl
, FM_LIST_REPAIRED_CLASS
)) {
278 diskmon_agent_repair(hdl
, nvl
, 1);
281 } else if (fmd_nvl_class_match(hdl
, nvl
, FM_LIST_UPDATED_CLASS
)) {
283 diskmon_agent_repair(hdl
, nvl
, 0);
286 } else if (fmd_nvl_class_match(hdl
, nvl
, FM_LIST_SUSPECT_CLASS
)) {
288 diskmon_agent_suspect(hdl
, nvl
);
290 } else if (fmd_nvl_class_match(hdl
, nvl
, FM_LIST_RESOLVED_CLASS
)) {
295 * If we get any replayed faults, set the diskmon's faulted
296 * flag for the appropriate fault, then change the diskmon's state
299 if (fmd_nvl_class_match(hdl
, nvl
, DISK_ERROR_CLASS
".*")) {
301 if (nvlist_lookup_nvlist(nvl
, FM_FAULT_RESOURCE
,
305 if ((diskp
= dm_fmri_to_diskmon(hdl
, fmri
)) == NULL
)
308 /* Execute the actions associated with this fault */
309 dm_fault_execute_actions(hdl
, diskp
, nvl
);
312 * If the fault wasn't generated by this module, send a
313 * state change event to the state change manager
315 dm_state_change(diskp
, HPS_FAULTED
);
320 static const fmd_hdl_ops_t fmd_ops
= {
321 diskmon_recv
, /* fmdo_recv */
322 NULL
, /* fmdo_timeout */
323 NULL
, /* fmdo_close */
324 NULL
, /* fmdo_stats */
328 static const fmd_prop_t fmd_props
[] = {
329 { GLOBAL_PROP_LOG_LEVEL
, FMD_TYPE_UINT32
, "0" },
333 static const fmd_hdl_info_t fmd_info
= {
335 DISK_MONITOR_MODULE_VERSION
,
341 _fmd_init(fmd_hdl_t
*hdl
)
348 if (fmd_hdl_register(hdl
, FMD_API_VERSION
, &fmd_info
) != 0) {
353 log_err("Could not initialize configuration!\n");
354 fmd_hdl_unregister(hdl
);
358 if (config_get(hdl
, fmd_props
)) {
360 log_err("Could not retrieve configuration from libtopo!\n");
361 fmd_hdl_unregister(hdl
);
366 * If there are no disks to monitor, bail out
368 if ((disk_count
= count_disks(config_data
->disk_list
)) == 0) {
370 fmd_hdl_unregister(hdl
);
374 if (diskmon_init() == E_ERROR
) {
376 fmd_hdl_unregister(hdl
);
380 log_msg(MM_MAIN
, "Monitoring %d disks.\n", disk_count
);
383 * Iterate over all active cases.
384 * Since we automatically solve all cases, these cases must have
385 * had the fault added, but the DE must have been interrupted
386 * before they were solved.
388 for (cp
= fmd_case_next(hdl
, NULL
);
389 cp
!= NULL
; cp
= fmd_case_next(hdl
, cp
)) {
391 if (!fmd_case_solved(hdl
, cp
))
392 fmd_case_solve(hdl
, cp
);
398 _fmd_fini(fmd_hdl_t
*hdl
)
400 diskmon_teardown_all();