VERSION: Bump version up to 4.10.12.
[Samba.git] / ctdb / tools / ctdb_diagnostics
blobccaf85963cb8c9de680c45ad012c6851eb3d0093
1 #!/bin/sh
2 # a script to test the basic setup of a CTDB/Samba install
3 # tridge@samba.org September 2007
4 # martin@meltin.net August 2010
6 usage ()
8 cat >&2 <<EOF
9 Usage: ctdb_diagnostics [OPTION] ...
10 options:
11 -n <nodes> Comma separated list of nodes to operate on
12 -c Ignore comment lines (starting with '#') in file comparisons
13 -w Ignore whitespace in file comparisons
14 --no-ads Do not use commands that assume an Active Directory Server
15 EOF
16 exit 1
20 nodes=$(ctdb listnodes -X | cut -d'|' -f2)
21 bad_nodes=""
22 diff_opts=
23 no_ads=false
25 parse_options ()
27 temp=$(getopt -n "ctdb_diagnostics" -o "n:cwh" -l no-ads,help -- "$@")
29 # No! Checking the exit code afterwards is actually clearer...
30 # shellcheck disable=SC2181
31 [ $? -eq 0 ] || usage
33 eval set -- "$temp"
35 while true ; do
36 case "$1" in
37 -n) nodes=$(echo "$2" | sed -e 's@,@ @g') ; shift 2 ;;
38 -c) diff_opts="${diff_opts} -I ^#.*" ; shift ;;
39 -w) diff_opts="${diff_opts} -w" ; shift ;;
40 --no-ads) no_ads=true ; shift ;;
41 --) shift ; break ;;
42 -h|--help|*) usage ;;
43 esac
44 done
46 [ $# -ne 0 ] && usage
49 parse_options "$@"
51 # Use 5s ssh timeout if EXTRA_SSH_OPTS doesn't set a timeout.
52 case "$EXTRA_SSH_OPTS" in
53 *ConnectTimeout=*) : ;;
55 export EXTRA_SSH_OPTS="${EXTRA_SSH_OPTS} -o ConnectTimeout=5"
56 esac
58 # Filter nodes. Remove any nodes we can't contact from $node and add
59 # them to $bad_nodes.
60 _nodes=""
61 for _i in $nodes ; do
62 if onnode "$_i" true >/dev/null 2>&1 ; then
63 _nodes="${_nodes}${_nodes:+ }${_i}"
64 else
65 bad_nodes="${bad_nodes}${bad_nodes:+,}${_i}"
67 done
68 nodes="$_nodes"
70 nodes_comma=$(echo "$nodes" | sed -e 's@[[:space:]]@,@g')
72 PATH="$PATH:/sbin:/usr/sbin:/usr/lpp/mmfs/bin"
74 # list of config files that must exist and that we check are the same
75 # on the nodes
76 if [ -d /etc/sysconfig ] ; then
77 CONFIG_FILES_MUST="/etc/krb5.conf /etc/hosts /usr/local/etc/ctdb/nodes /etc/sysconfig/ctdb /etc/resolv.conf /etc/nsswitch.conf /etc/sysctl.conf /etc/samba/smb.conf /etc/fstab /etc/multipath.conf /etc/pam.d/system-auth /etc/sysconfig/nfs /etc/exports /etc/vsftpd/vsftpd.conf"
78 else
79 CONFIG_FILES_MUST="/etc/krb5.conf /etc/hosts /usr/local/etc/ctdb/nodes /etc/default/ctdb /etc/resolv.conf /etc/nsswitch.conf /etc/sysctl.conf /etc/samba/smb.conf /etc/fstab /etc/multipath.conf /etc/pam.d/system-auth /etc/default/nfs /etc/exports /etc/vsftpd/vsftpd.conf"
82 # list of config files that may exist and should be checked that they
83 # are the same on the nodes
84 CONFIG_FILES_MAY="/usr/local/etc/ctdb/public_addresses /usr/local/etc/ctdb/static-routes"
86 exec 2>&1
88 cat <<EOF
89 --------------------------------------------------------------------
90 ctdb_diagnostics starting. This script will gather information about
91 your ctdb cluster. You should send the output of this script along
92 with any ctdb or clustered Samba bug reports.
93 --------------------------------------------------------------------
94 EOF
96 date
98 error() {
99 msg="$1"
100 echo "ERROR: $msg"
101 NUM_ERRORS=$((NUM_ERRORS + 1))
102 echo " ERROR[$NUM_ERRORS]: $msg" >> "$ERRORS"
105 show_file() {
106 fname="$1"
107 _fdetails=$(ls -l "$fname" 2>&1)
108 echo " ================================"
109 echo " File: $fname"
110 echo " $_fdetails"
111 sed 's/^/ /' "$fname" 2>&1
112 echo " ================================"
115 show_all() {
116 echo "running $1 on nodes $nodes_comma"
117 onnode "$nodes_comma" "hostname; date; $1 2>&1 | sed 's/^/ /'" 2>&1
120 show_and_compare_files () {
122 fmt="$1" ; shift
124 for f ; do
125 _bf=$(basename "$f")
126 first=true
128 for n in $nodes ; do
130 if $first ; then
131 onnode "$n" [ -r "$f" ] || {
132 # This function takes a format string
133 # shellcheck disable=SC2059
134 msg=$(printf "$fmt" "$f" "$n")
135 error "$msg"
136 continue 2;
139 fstf="${tmpdir}/${_bf}.node${n}"
140 onnode "$n" cat "$f" >"$fstf" 2>&1
142 _fdetails=$(onnode "$n" ls -l "$f" 2>&1)
143 echo " ================================"
144 echo " File (on node $n): $f"
145 echo " $_fdetails"
146 sed 's/^/ /' "$fstf"
147 echo " ================================"
148 first=false
149 else
150 echo "Testing for same config file $f on node $n"
151 tmpf="${tmpdir}/${_bf}.node${n}"
152 onnode "$n" cat "$f" >"$tmpf" 2>&1
153 # Intentional multi-word splitting on diff_opts
154 # shellcheck disable=SC2086
155 diff $diff_opts "$fstf" "$tmpf" >/dev/null 2>&1 || {
156 error "File $f is different on node $n"
157 diff -u $diff_opts "$fstf" "$tmpf"
159 rm -f "$tmpf"
161 done
163 rm -f "$fstf"
164 done
167 if ! tmpdir=$(mktemp -d) ; then
168 echo "Unable to create a temporary directory"
169 exit 1
171 ERRORS="${tmpdir}/diag_err"
172 NUM_ERRORS=0
174 cat <<EOF
175 Diagnosis started on these nodes:
176 $nodes_comma
179 if [ -n "$bad_nodes" ] ; then
180 cat <<EOF
182 NOT RUNNING DIAGNOSTICS on these uncontactable nodes:
183 $bad_nodes
188 cat <<EOF
190 For reference, here is the nodes file on the current node...
193 show_file /usr/local/etc/ctdb/nodes
195 cat <<EOF
196 --------------------------------------------------------------------
197 Comping critical config files on nodes $nodes_comma
200 # Intentional multi-word splitting on CONFIG_FILES_MUST
201 # shellcheck disable=SC2086
202 show_and_compare_files \
203 "%s is missing on node %d" \
204 $CONFIG_FILES_MUST
206 # Intentional multi-word splitting on CONFIG_FILES_MAY
207 # shellcheck disable=SC2086
208 show_and_compare_files \
209 "Optional file %s is not present on node %d" \
210 $CONFIG_FILES_MAY
212 cat <<EOF
213 --------------------------------------------------------------------
214 Checking for clock drift
216 t=$(date +%s)
217 for i in $nodes; do
218 t2=$(onnode "$i" date +%s)
219 d=$((t2 - t))
220 if [ "$d" -gt 30 -o "$d" -lt -30 ]; then
221 error "time on node $i differs by $d seconds"
223 done
225 cat <<EOF
226 --------------------------------------------------------------------
227 Showing software versions
229 show_all "uname -a"
230 [ -x /bin/rpm ] && {
231 show_all "rpm -qa | egrep 'samba|ctdb|gpfs'"
233 [ -x /usr/bin/dpkg-query ] && {
234 show_all "/usr/bin/dpkg-query --show 'ctdb'"
235 show_all "/usr/bin/dpkg-query --show 'samba'"
236 #show_all "/usr/bin/dpkg-query --show 'gpfs'"
240 cat <<EOF
241 --------------------------------------------------------------------
242 Showing ctdb status and recent log entries
244 show_all "ctdb status; ctdb ip"
245 show_all "ctdb statistics"
246 show_all "ctdb uptime"
247 show_all "ctdb listvars"
248 show_all "ctdb getdbmap"
249 show_all "ctdb -X getdbmap | awk -F'|' 'NR > 1 {print \$3}' | sort | xargs -n 1 ctdb dbstatistics"
251 echo "Showing log.ctdb"
252 show_all "test -f /usr/local/var/log/log.ctdb && tail -100 /usr/local/var/log/log.ctdb"
254 show_all "tail -200 /var/log/messages"
255 show_all "ls -lRs /usr/local/var/lib/ctdb"
256 show_all "ls -lRs /usr/local/etc/ctdb"
259 cat <<EOF
260 --------------------------------------------------------------------
261 Showing system and process status
263 show_all "df"
264 show_all "df -i"
265 show_all "mount"
266 show_all "w"
267 show_all "ps axfwu"
268 show_all "dmesg"
269 show_all "/sbin/lspci"
270 show_all "dmidecode"
271 show_all "cat /proc/partitions"
272 show_all "cat /proc/cpuinfo"
273 show_all "cat /proc/scsi/scsi"
274 show_all "/sbin/ifconfig -a"
275 show_all "/sbin/ifconfig -a"
276 show_all "/sbin/ip addr list"
277 show_all "/sbin/route -n"
278 show_all "netstat -s"
279 show_all "free"
280 show_all "crontab -l"
281 show_all "sysctl -a"
282 show_all "iptables -L -n"
283 show_all "iptables -L -n -t nat"
284 show_all "/usr/sbin/rpcinfo -p"
285 show_all "/usr/sbin/showmount -a"
286 show_all "/usr/sbin/showmount -e"
287 show_all "/usr/sbin/nfsstat -v"
288 [ -x /sbin/multipath ] && {
289 show_all "/sbin/multipath -ll"
291 [ -x /sbin/chkconfig ] && {
292 show_all "/sbin/chkconfig --list"
294 [ -x /usr/sbin/getenforce ] && {
295 show_all "/usr/sbin/getenforce"
297 [ -d /proc/net/bonding ] && {
298 for f in /proc/net/bonding/*; do
299 show_all "cat $f"
300 done
303 cat <<EOF
304 --------------------------------------------------------------------
305 Showing Samba status
307 show_all "smbstatus -n -B"
308 if $no_ads ; then
309 echo
310 echo "Skipping \"net ads testjoin\" as requested"
311 echo
312 else
313 show_all "net ads testjoin"
315 show_all "net conf list"
316 show_all "lsof -n | grep smbd"
317 show_all "lsof -n | grep ctdbd"
318 show_all "netstat -tan"
319 if $no_ads ; then
320 echo
321 echo "Skipping \"net ads info\" as requested"
322 echo
323 else
324 show_all "net ads info"
326 show_all "date"
327 show_all "smbclient -U% -L 127.0.0.1"
328 WORKGROUP=$(testparm -s --parameter-name=WORKGROUP 2> /dev/null)
329 show_all id "$WORKGROUP/Administrator"
330 show_all "wbinfo -p"
331 show_all "wbinfo --online-status"
332 show_all "smbd -b"
334 date
335 echo "Diagnostics finished with $NUM_ERRORS errors"
337 [ -r "$ERRORS" ] && {
338 cat "$ERRORS"
339 rm -f "$ERRORS"
342 rm -rf "$tmpdir"
344 exit $NUM_ERRORS