torture: convert torture_comment() -> torture_result() so we can knownfail flapping...
[Samba/wip.git] / ctdb / tools / ctdb_diagnostics
blob2a51e1bac25205d0f1427ada12bfbcbb3c636543
1 #!/bin/sh
2 # a script to test the basic setup of a CTDB/Samba install
3 # tridge@samba.org September 2007
4 # martin@meltin.net August 2010
6 usage ()
8 cat >&2 <<EOF
9 Usage: ctdb_diagnostics [OPTION] ...
10 options:
11 -n <nodes> Comma separated list of nodes to operate on
12 -c Ignore comment lines (starting with '#') in file comparisons
13 -w Ignore whitespace in file comparisons
14 --no-ads Do not use commands that assume an Active Directory Server
15 EOF
16 exit 1
20 nodes=$(ctdb listnodes -Y | cut -d: -f2)
21 bad_nodes=""
22 diff_opts=
23 no_ads=false
25 parse_options ()
27 temp=$(getopt -n "ctdb_diagnostics" -o "n:cwh" -l no-ads,help -- "$@")
29 [ $? != 0 ] && usage
31 eval set -- "$temp"
33 while true ; do
34 case "$1" in
35 -n) nodes=$(echo "$2" | sed -e 's@,@ @g') ; shift 2 ;;
36 -c) diff_opts="${diff_opts} -I ^#.*" ; shift ;;
37 -w) diff_opts="${diff_opts} -w" ; shift ;;
38 --no-ads) no_ads=true ; shift ;;
39 --) shift ; break ;;
40 -h|--help|*) usage ;;
41 esac
42 done
44 [ $# -ne 0 ] && usage
47 parse_options "$@"
49 # Use 5s ssh timeout if EXTRA_SSH_OPTS doesn't set a timeout.
50 case "$EXTRA_SSH_OPTS" in
51 *ConnectTimeout=*) : ;;
53 export EXTRA_SSH_OPTS="${EXTRA_SSH_OPTS} -o ConnectTimeout=5"
54 esac
56 # Filter nodes. Remove any nodes we can't contact from $node and add
57 # them to $bad_nodes.
58 _nodes=""
59 for _i in $nodes ; do
60 if onnode $_i true >/dev/null 2>&1 ; then
61 _nodes="${_nodes}${_nodes:+ }${_i}"
62 else
63 bad_nodes="${bad_nodes}${bad_nodes:+,}${_i}"
65 done
66 nodes="$_nodes"
68 nodes_comma=$(echo $nodes | sed -e 's@[[:space:]]@,@g')
70 PATH="$PATH:/sbin:/usr/sbin:/usr/lpp/mmfs/bin"
72 # list of config files that must exist and that we check are the same
73 # on the nodes
74 if [ -d /etc/sysconfig ] ; then
75 CONFIG_FILES_MUST="/etc/krb5.conf /etc/hosts /etc/ctdb/nodes /etc/sysconfig/ctdb /etc/resolv.conf /etc/nsswitch.conf /etc/sysctl.conf /etc/samba/smb.conf /etc/fstab /etc/multipath.conf /etc/pam.d/system-auth /etc/sysconfig/nfs /etc/exports /etc/vsftpd/vsftpd.conf"
76 else
77 CONFIG_FILES_MUST="/etc/krb5.conf /etc/hosts /etc/ctdb/nodes /etc/default/ctdb /etc/resolv.conf /etc/nsswitch.conf /etc/sysctl.conf /etc/samba/smb.conf /etc/fstab /etc/multipath.conf /etc/pam.d/system-auth /etc/default/nfs /etc/exports /etc/vsftpd/vsftpd.conf"
80 # list of config files that may exist and should be checked that they
81 # are the same on the nodes
82 CONFIG_FILES_MAY="/etc/ctdb/public_addresses /etc/ctdb/static-routes"
84 2>&1
86 cat <<EOF
87 --------------------------------------------------------------------
88 ctdb_diagnostics starting. This script will gather information about
89 your ctdb cluster. You should send the output of this script along
90 with any ctdb or clustered Samba bug reports.
91 --------------------------------------------------------------------
92 EOF
94 date
96 error() {
97 msg="$1"
98 echo "ERROR: $msg"
99 NUM_ERRORS=`expr $NUM_ERRORS + 1`
100 echo " ERROR[$NUM_ERRORS]: $msg" >> $ERRORS
103 show_file() {
104 fname="$1"
105 echo " ================================"
106 echo " File: $fname"
107 echo " `ls -l $fname 2>&1`"
108 cat "$fname" 2>&1 | sed 's/^/ /'
109 echo " ================================"
112 show_all() {
113 echo "running $1 on nodes $nodes_comma"
114 onnode $nodes_comma "hostname; date; $1 2>&1 | sed 's/^/ /'" 2>&1
117 show_and_compare_files () {
119 fmt="$1" ; shift
121 for f ; do
122 first=true
124 for n in $nodes ; do
126 if $first ; then
127 onnode $n [ -r "$f" ] || {
128 msg=$(printf "$fmt" "$f" $n)
129 error "$msg"
130 continue 2;
133 fstf=$tmpdir/`basename $f`.node$n
134 onnode $n cat $f > $fstf 2>&1
136 echo " ================================"
137 echo " File (on node $n): $f"
138 echo " `onnode $n ls -l $f 2>&1`"
139 cat "$fstf" | sed 's/^/ /'
140 echo " ================================"
141 first=false
142 else
143 echo "Testing for same config file $f on node $n"
144 tmpf=$tmpdir/`basename $f`.node$n
145 onnode $n cat $f > $tmpf 2>&1
146 diff $diff_opts $fstf $tmpf >/dev/null 2>&1 || {
147 error "File $f is different on node $n"
148 diff -u $diff_opts $fstf $tmpf
150 rm -f $tmpf
152 done
154 rm -f $fstf
155 done
158 if ! tmpdir=$(mktemp -d) ; then
159 echo "Unable to create a temporary directory"
160 exit 1
162 ERRORS="${tmpdir}/diag_err"
163 NUM_ERRORS=0
165 cat <<EOF
166 Diagnosis started on these nodes:
167 $nodes_comma
170 if [ -n "$bad_nodes" ] ; then
171 cat <<EOF
173 NOT RUNNING DIAGNOSTICS on these uncontactable nodes:
174 $bad_nodes
179 cat <<EOF
181 For reference, here is the nodes file on the current node...
184 show_file /etc/ctdb/nodes
186 cat <<EOF
187 --------------------------------------------------------------------
188 Comping critical config files on nodes $nodes_comma
191 show_and_compare_files \
192 "%s is missing on node %d" \
193 $CONFIG_FILES_MUST
195 show_and_compare_files \
196 "Optional file %s is not present on node %d" \
197 $CONFIG_FILES_MAY
199 cat <<EOF
200 --------------------------------------------------------------------
201 Checking for clock drift
203 t=`date +%s`
204 for i in $nodes; do
205 t2=`onnode $i date +%s`
206 d=`expr $t2 - $t`
207 if [ $d -gt 30 -o $d -lt -30 ]; then
208 error "time on node $i differs by $d seconds"
210 done
212 cat <<EOF
213 --------------------------------------------------------------------
214 Showing software versions
216 show_all "uname -a"
217 [ -x /bin/rpm ] && {
218 show_all "rpm -qa | egrep 'samba|ctdb|gpfs'"
220 [ -x /usr/bin/dpkg-query ] && {
221 show_all "/usr/bin/dpkg-query --show 'ctdb'"
222 show_all "/usr/bin/dpkg-query --show 'samba'"
223 #show_all "/usr/bin/dpkg-query --show 'gpfs'"
227 cat <<EOF
228 --------------------------------------------------------------------
229 Showing ctdb status and recent log entries
231 show_all "ctdb status; ctdb ip"
232 show_all "ctdb statistics"
233 show_all "ctdb uptime"
234 show_all "ctdb listvars"
235 show_all "ctdb getdbmap"
237 echo "Showing log.ctdb"
238 show_all "test -f /var/log/log.ctdb && tail -100 /var/log/log.ctdb"
240 echo "Showing log.ctdb"
241 show_all "test -f /var/log/log.ctdb && tail -100 /var/log/log.ctdb"
243 show_all "tail -200 /var/log/messages"
244 show_all "tail -200 /etc/ctdb/state/vacuum.log"
245 show_all "ls -lRs /var/ctdb"
246 show_all "ls -lRs /etc/ctdb"
249 cat <<EOF
250 --------------------------------------------------------------------
251 Showing system and process status
253 show_all "df"
254 show_all "df -i"
255 show_all "mount"
256 show_all "w"
257 show_all "ps axfwu"
258 show_all "dmesg"
259 show_all "/sbin/lspci"
260 show_all "dmidecode"
261 show_all "cat /proc/partitions"
262 show_all "cat /proc/cpuinfo"
263 show_all "cat /proc/scsi/scsi"
264 show_all "/sbin/ifconfig -a"
265 show_all "/sbin/ifconfig -a"
266 show_all "/sbin/ip addr list"
267 show_all "/sbin/route -n"
268 show_all "netstat -s"
269 show_all "free"
270 show_all "crontab -l"
271 show_all "sysctl -a"
272 show_all "iptables -L -n"
273 show_all "iptables -L -n -t nat"
274 show_all "/usr/sbin/rpcinfo -p"
275 show_all "/usr/sbin/showmount -a"
276 show_all "/usr/sbin/showmount -e"
277 show_all "/usr/sbin/nfsstat -v"
278 [ -x /sbin/multipath ] && {
279 show_all "/sbin/multipath -ll"
281 [ -x /sbin/chkconfig ] && {
282 show_all "/sbin/chkconfig --list"
284 [ -x /usr/sbin/getenforce ] && {
285 show_all "/usr/sbin/getenforce"
287 [ -d /proc/net/bonding ] && {
288 for f in /proc/net/bonding/*; do
289 show_all "cat $f"
290 done
293 cat <<EOF
294 --------------------------------------------------------------------
295 Showing Samba status
297 show_all "smbstatus -n -B"
298 if $no_ads ; then
299 echo
300 echo "Skipping \"net ads testjoin\" as requested"
301 echo
302 else
303 show_all "net ads testjoin"
305 show_all "net conf list"
306 show_all "lsof -n | grep smbd"
307 show_all "lsof -n | grep ctdbd"
308 show_all "netstat -tan"
309 if $no_ads ; then
310 echo
311 echo "Skipping \"net ads info\" as requested"
312 echo
313 else
314 show_all "net ads info"
316 show_all "date"
317 show_all "smbclient -U% -L 127.0.0.1"
318 WORKGROUP=`testparm -s --parameter-name=WORKGROUP 2> /dev/null`
319 show_all id "$WORKGROUP/Administrator"
320 show_all "wbinfo -p"
321 show_all "wbinfo --online-status"
322 show_all "smbd -b"
324 date
325 echo "Diagnostics finished with $NUM_ERRORS errors"
327 [ -r $ERRORS ] && {
328 cat $ERRORS
329 rm -f $ERRORS
332 rm -rf "$tmpdir"
334 exit $NUM_ERRORS