ctdb-scripts: Add default filesystem usage warnings
[Samba.git] / ctdb / config / events.d / 05.system
blob69fcec2e854332ba9ed763cfc2e31734b87c4d77
1 #!/bin/sh
2 # ctdb event script for checking local file system utilization
4 [ -n "$CTDB_BASE" ] || \
5 export CTDB_BASE=$(cd -P $(dirname "$0") ; dirname "$PWD")
7 . $CTDB_BASE/functions
8 loadconfig
10 ctdb_setup_service_state_dir "system-monitoring"
12 validate_percentage ()
14 case "$1" in
15 "") return 1 ;; # A failure that doesn't need a warning
16 [0-9]|[0-9][0-9]|100) return 0 ;;
17 *) echo "WARNING: ${1} is an invalid percentage${2:+ in \"}${2}${2:+\"} check"
18 return 1
19 esac
22 check_thresholds ()
24 _thing="$1"
25 _thresholds="$2"
26 _usage="$3"
27 _unhealthy_callout="$4"
29 case "$_thresholds" in
30 *:*)
31 _warn_threshold="${_thresholds%:*}"
32 _unhealthy_threshold="${_thresholds#*:}"
35 _warn_threshold="$_thresholds"
36 _unhealthy_threshold=""
37 esac
39 _t=$(echo "$_thing" | sed -e 's@/@SLASH_@g' -e 's@ @_@g')
40 _cache="${service_state_dir}/cache_${_t}"
41 if validate_percentage "$_unhealthy_threshold" "$_thing" ; then
42 if [ "$_usage" -ge "$_unhealthy_threshold" ] ; then
43 echo "ERROR: ${_thing} utilization ${_usage}% >= threshold ${_unhealthy_threshold}%"
44 eval "$_unhealthy_callout"
45 echo "$_usage" >"$_cache"
46 exit 1
50 if validate_percentage "$_warn_threshold" "$_what" ; then
51 if [ "$_usage" -ge "$_warn_threshold" ] ; then
52 if [ -r "$_cache" ] ; then
53 read _prev <"$_cache"
54 else
55 _prev=""
57 if [ "$_usage" != "$_prev" ] ; then
58 echo "WARNING: ${_thing} utilization ${_usage}% >= threshold ${_warn_threshold}%"
59 echo "$_usage" >"$_cache"
61 else
62 if [ -r "$_cache" ] ; then
63 echo "NOTICE: ${_thing} utilization ${_usage}% < threshold ${_warn_threshold}%"
65 rm -f "$_cache"
70 set_monitor_filsystem_usage_defaults ()
72 _fs_defaults_cache="${service_state_dir}/cache_monitor_filsystem_usage_defaults"
74 if [ ! -r "$_fs_defaults_cache" ] ; then
75 # Determine filesystem for each database directory, generate
76 # an entry to warn at 90%, de-duplicate entries, put all items
77 # on 1 line (so the read below gets everything)
78 for _t in "${CTDB_DBDIR:-${CTDB_VARDIR}}" \
79 "${CTDB_DBDIR_PERSISTENT:-${CTDB_VARDIR}/persistent}" \
80 "${CTDB_DBDIR_STATE:-${CTDB_VARDIR}/state}" ; do
81 df -kP "$_t" | awk 'NR == 2 { printf "%s:90\n", $6 }'
82 done | sort -u | xargs >"$_fs_defaults_cache"
85 read CTDB_MONITOR_FILESYSTEM_USAGE <"$_fs_defaults_cache"
88 monitor_filesystem_usage ()
90 if [ -z "$CTDB_MONITOR_FILESYSTEM_USAGE" ] ; then
91 set_monitor_filsystem_usage_defaults
94 # Check each specified filesystem, specified in format
95 # <fs_mount>:<fs_warn_threshold>[:fs_unhealthy_threshold]
96 for _fs in $CTDB_MONITOR_FILESYSTEM_USAGE ; do
97 _fs_mount="${_fs%%:*}"
98 _fs_thresholds="${_fs#*:}"
100 if [ ! -d "$_fs_mount" ]; then
101 echo "WARNING: Directory ${_fs_mount} does not exist"
102 continue
105 # Get current utilization
106 _fs_usage=$(df -kP "$_fs_mount" | \
107 sed -n -e 's@.*[[:space:]]\([[:digit:]]*\)%.*@\1@p')
108 if [ -z "$_fs_usage" ] ; then
109 echo "WARNING: Unable to get FS utilization for ${_fs_mount}"
110 continue
113 check_thresholds "Filesystem ${_fs_mount}" \
114 "$_fs_thresholds" \
115 "$_fs_usage"
116 done
119 dump_memory_info ()
121 get_proc "meminfo"
122 ps auxfww
123 set_proc "sysrq-trigger" "m"
126 monitor_memory_usage ()
128 # Defaults
129 if [ -z "$CTDB_MONITOR_MEMORY_USAGE" ] ; then
130 CTDB_MONITOR_MEMORY_USAGE=80
132 if [ -z "$CTDB_MONITOR_SWAP_USAGE" ] ; then
133 CTDB_MONITOR_SWAP_USAGE=25
136 _meminfo=$(get_proc "meminfo")
137 set -- $(echo "$_meminfo" | awk '
138 $1 == "MemAvailable:" { memavail += $2 }
139 $1 == "MemFree:" { memfree += $2 }
140 $1 == "Cached:" { memfree += $2 }
141 $1 == "Buffers:" { memfree += $2 }
142 $1 == "MemTotal:" { memtotal = $2 }
143 $1 == "SwapFree:" { swapfree = $2 }
144 $1 == "SwapTotal:" { swaptotal = $2 }
145 END {
146 if (memavail != 0) { memfree = memavail ; }
147 print int((memtotal - memfree) / memtotal * 100),
148 int((swaptotal - swapfree) / swaptotal * 100)
150 _mem_usage="$1"
151 _swap_usage="$2"
153 check_thresholds "System memory" \
154 "$CTDB_MONITOR_MEMORY_USAGE" \
155 "$_mem_usage" \
156 dump_memory_info
158 check_thresholds "System swap" \
159 "$CTDB_MONITOR_SWAP_USAGE" \
160 "$_swap_usage" \
161 dump_memory_info
165 case "$1" in
166 monitor)
167 monitor_filesystem_usage
168 monitor_memory_usage
172 ctdb_standard_event_handler "$@"
174 esac
176 exit 0