ctdb/config/functions

   1 # Hey Emacs, this is a -*- shell-script -*- !!!
   2
   3 # utility functions for ctdb event scripts
   4
   5 [ -z "$CTDB_VARDIR" ] && {
   6     if [ -d "/var/lib/ctdb" ] ; then
   7         export CTDB_VARDIR="/var/lib/ctdb"
   8     else
   9         export CTDB_VARDIR="/var/ctdb"
  10     fi
  11 }
  12 [ -z "$CTDB_ETCDIR" ] && {
  13     export CTDB_ETCDIR="/etc"
  14 }
  15
  16 #######################################
  17 # pull in a system config file, if any
  18 _loadconfig() {
  19
  20     if [ -z "$1" ] ; then
  21         foo="${service_config:-${service_name}}"
  22         if [ -n "$foo" ] ; then
  23             loadconfig "$foo"
  24             return
  25         fi
  26     fi
  27
  28     if [ "$1" != "ctdb" ] ; then
  29         loadconfig "ctdb"
  30     fi
  31
  32     if [ -z "$1" ] ; then
  33         return
  34     fi
  35
  36     if [ -f $CTDB_ETCDIR/sysconfig/$1 ]; then
  37         . $CTDB_ETCDIR/sysconfig/$1
  38     elif [ -f $CTDB_ETCDIR/default/$1 ]; then
  39         . $CTDB_ETCDIR/default/$1
  40     elif [ -f $CTDB_BASE/sysconfig/$1 ]; then
  41         . $CTDB_BASE/sysconfig/$1
  42     fi
  43
  44     if [ "$1" = "ctdb" ] ; then
  45         _config="${CTDB_BASE}/ctdbd.conf"
  46         if [ -r "$_config" ] ; then
  47             . "$_config"
  48         fi
  49     fi
  50 }
  51
  52 loadconfig () {
  53     _loadconfig "$@"
  54 }
  55
  56 ##############################################################
  57
  58 # CTDB_SCRIPT_DEBUGLEVEL can be overwritten by setting it in a
  59 # configuration file.
  60 debug ()
  61 {
  62     if [ ${CTDB_SCRIPT_DEBUGLEVEL:-2} -ge 4 ] ; then
  63         # If there are arguments then echo them.  Otherwise expect to
  64         # use stdin, which allows us to pass lots of debug using a
  65         # here document.
  66         if [ -n "$1" ] ; then
  67             echo "DEBUG: $*"
  68         else
  69             sed -e 's@^@DEBUG: @'
  70         fi
  71     else
  72         if [ -z "$1" ] ; then
  73             cat >/dev/null
  74         fi
  75     fi
  76 }
  77
  78 die ()
  79 {
  80     _msg="$1"
  81     _rc="${2:-1}"
  82
  83     echo "$_msg"
  84     exit $_rc
  85 }
  86
  87 # Log given message or stdin to either syslog or a CTDB log file
  88 # $1 is the tag passed to logger if syslog is in use.
  89 script_log ()
  90 {
  91     _tag="$1" ; shift
  92
  93     case "$CTDB_LOGGING" in
  94         file:*|"")
  95             if [ -n "$CTDB_LOGGING" ] ; then
  96                 _file="${CTDB_LOGGING#file:}"
  97             else
  98                 _file="/var/log/log.ctdb"
  99             fi
 100             {
 101                 if [ -n "$*" ] ; then
 102                     echo "$*"
 103                 else
 104                     cat
 105                 fi
 106             } >>"$_file"
 107             ;;
 108         *)
 109             # Handle all syslog:* variants here too.  There's no tool to do
 110             # the lossy things, so just use logger.
 111             logger -t "ctdbd: ${_tag}" $*
 112             ;;
 113     esac
 114 }
 115
 116 # When things are run in the background in an eventscript then logging
 117 # output might get lost.  This is the "solution".  :-)
 118 background_with_logging ()
 119 {
 120     (
 121         "$@" 2>&1 </dev/null |
 122         script_log "${script_name}&"
 123     )&
 124
 125     return 0
 126 }
 127
 128 ##############################################################
 129 # check number of args for different events
 130 ctdb_check_args ()
 131 {
 132     case "$1" in
 133         takeip|releaseip)
 134             if [ $# != 4 ]; then
 135                 echo "ERROR: must supply interface, IP and maskbits"
 136                 exit 1
 137             fi
 138             ;;
 139         updateip)
 140             if [ $# != 5 ]; then
 141                 echo "ERROR: must supply old interface, new interface, IP and maskbits"
 142                 exit 1
 143             fi
 144             ;;
 145     esac
 146 }
 147
 148 ##############################################################
 149 # determine on what type of system (init style) we are running
 150 detect_init_style()
 151 {
 152     # only do detection if not already set:
 153     [ -z "$CTDB_INIT_STYLE" ] || return
 154
 155     if [ -x /sbin/startproc ]; then
 156         CTDB_INIT_STYLE="suse"
 157     elif [ -x /sbin/start-stop-daemon ]; then
 158         CTDB_INIT_STYLE="debian"
 159     else
 160         CTDB_INIT_STYLE="redhat"
 161     fi
 162 }
 163
 164 ######################################################
 165 # simulate /sbin/service on platforms that don't have it
 166 # _service() makes it easier to hook the service() function for
 167 # testing.
 168 _service ()
 169 {
 170   _service_name="$1"
 171   _op="$2"
 172
 173   # do nothing, when no service was specified
 174   [ -z "$_service_name" ] && return
 175
 176   if [ -x /sbin/service ]; then
 177       $_nice /sbin/service "$_service_name" "$_op"
 178   elif [ -x /usr/sbin/service ]; then
 179       $_nice /usr/sbin/service "$_service_name" "$_op"
 180   elif [ -x $CTDB_ETCDIR/init.d/$_service_name ]; then
 181       $_nice $CTDB_ETCDIR/init.d/$_service_name "$_op"
 182   elif [ -x $CTDB_ETCDIR/rc.d/init.d/$_service_name ]; then
 183       $_nice $CTDB_ETCDIR/rc.d/init.d/$_service_name "$_op"
 184   fi
 185 }
 186
 187 service()
 188 {
 189     _nice=""
 190     _service "$@"
 191 }
 192
 193 ######################################################
 194 # simulate /sbin/service (niced) on platforms that don't have it
 195 nice_service()
 196 {
 197     _nice="nice"
 198     _service "$@"
 199 }
 200
 201 ######################################################
 202 # Cached retrieval of PNN from local node.  This never changes so why
 203 # open a client connection to the server each time this is needed?
 204 # This sets $pnn - this avoid an unnecessary subprocess.
 205 ctdb_get_pnn ()
 206 {
 207     _pnn_file="$CTDB_VARDIR/state/my-pnn"
 208     if [ ! -f "$_pnn_file" ] ; then
 209         ctdb pnn | sed -e 's@.*:@@' >"$_pnn_file"
 210     fi
 211
 212     read pnn <"$_pnn_file"
 213 }
 214
 215 ######################################################
 216 # wrapper around /proc/ settings to allow them to be hooked
 217 # for testing
 218 # 1st arg is relative path under /proc/, 2nd arg is value to set
 219 set_proc ()
 220 {
 221     echo "$2" >"/proc/$1"
 222 }
 223
 224 set_proc_maybe ()
 225 {
 226     if [ -w "/proc/$1" ] ; then
 227         set_proc "$1" "$2"
 228     fi
 229 }
 230
 231 ######################################################
 232 # wrapper around getting file contents from /proc/ to allow
 233 # this to be hooked for testing
 234 # 1st arg is relative path under /proc/
 235 get_proc ()
 236 {
 237     cat "/proc/$1"
 238 }
 239
 240 ######################################################
 241 # Print up to $_max kernel stack traces for processes named $_program
 242 program_stack_traces ()
 243 {
 244     _prog="$1"
 245     _max="${2:-1}"
 246
 247     _count=1
 248     for _pid in $(pidof "$_prog") ; do
 249         [ $_count -le $_max ] || break
 250
 251         # Do this first to avoid racing with process exit
 252         _stack=$(get_proc "${_pid}/stack" 2>/dev/null)
 253         if [ -n "$_stack" ] ; then
 254             echo "Stack trace for ${_prog}[${_pid}]:"
 255             echo "$_stack"
 256             _count=$(($_count + 1))
 257         fi
 258     done
 259 }
 260
 261 ######################################################
 262 # Check that an RPC service is healthy -
 263 # this includes allowing a certain number of failures
 264 # before marking the NFS service unhealthy.
 265 #
 266 # usage: nfs_check_rpc_service SERVICE_NAME [ triple ...]
 267 #
 268 # each triple is a set of 3 arguments: an operator, a
 269 # fail count limit and an action string.
 270 #
 271 # For example:
 272 #
 273 #       nfs_check_rpc_service "lockd" \
 274 #           -ge 15 "verbose restart unhealthy" \
 275 #           -eq 10 "restart:bs"
 276 #
 277 # says that if lockd is down for 15 iterations then do
 278 # a verbose restart of lockd and mark the node unhealthy.
 279 # Before this, after 10 iterations of failure, the
 280 # service is restarted silently in the background.
 281 # Order is important: the number of failures need to be
 282 # specified in reverse order because processing stops
 283 # after the first condition that is true.
 284 ######################################################
 285 nfs_check_rpc_service ()
 286 {
 287     _prog_name="$1" ; shift
 288
 289     if _nfs_check_rpc_common "$_prog_name" ; then
 290         return
 291     fi
 292
 293     while [ -n "$3" ] ; do
 294         if _nfs_check_rpc_action "$1" "$2" "$3" ; then
 295             break
 296         fi
 297         shift 3
 298     done
 299 }
 300
 301 # The new way of doing things...
 302 nfs_check_rpc_services ()
 303 {
 304     # Files must end with .check - avoids editor backups, RPM fu, ...
 305     for _f in "${CTDB_BASE}/nfs-rpc-checks.d/"[0-9][0-9].*.check ; do
 306         _t="${_f%.check}"
 307         _prog_name="${_t##*/[0-9][0-9].}"
 308
 309         # If $_prog_name contains '@' then the bit after it is the
 310         # address family.
 311         _family="${_prog_name#*@}"
 312         if [ "$_family" = "$_prog_name" ] ; then
 313             _family=""
 314         else
 315             _prog_name="${_prog_name%@*}"
 316         fi
 317
 318         if _nfs_check_rpc_common "$_prog_name" "$_family" ; then
 319             # This RPC service is up, check next service...
 320             continue
 321         fi
 322
 323         # Check each line in the file in turn until one of the limit
 324         # checks is hit...
 325         while read _cmp _lim _rest ; do
 326             # Skip comments
 327             case "$_cmp" in
 328                 \#*) continue ;;
 329             esac
 330
 331             if _nfs_check_rpc_action "$_cmp" "$_lim" "$_rest" ; then
 332                 # Limit was hit on this line, no further checking...
 333                 break
 334             fi
 335         done <"$_f"
 336     done
 337 }
 338
 339 _nfs_check_rpc_common ()
 340 {
 341     _prog_name="$1"
 342     _family="$2"
 343
 344     # Some platforms don't have separate programs for all services.
 345     case "$_prog_name" in
 346         statd)
 347             type "rpc.${_prog_name}" >/dev/null 2>&1 || return 0
 348     esac
 349
 350     case "$_prog_name" in
 351         nfsd)
 352             _rpc_prog=nfs
 353             _version=3
 354             ;;
 355         mountd)
 356             _rpc_prog=mountd
 357             _version=1
 358             ;;
 359         rquotad)
 360             _rpc_prog=rquotad
 361             _version=1
 362             ;;
 363         lockd)
 364             _rpc_prog=nlockmgr
 365             _version=4
 366             ;;
 367         statd)
 368             _rpc_prog=status
 369             _version=1
 370             ;;
 371         *)
 372             echo "Internal error: unknown RPC program \"$_prog_name\"."
 373             exit 1
 374     esac
 375
 376     _service_name="nfs_${_prog_name}${_family:+_}${_family}"
 377
 378     if ctdb_check_rpc "$_rpc_prog" "$_version" "$_family" >/dev/null ; then
 379         ctdb_counter_init "$_service_name"
 380         return 0
 381     fi
 382
 383     ctdb_counter_incr "$_service_name"
 384
 385     return 1
 386 }
 387
 388 _nfs_check_rpc_action ()
 389 {
 390     _cmp="$1"
 391     _limit="$2"
 392     _actions="$3"
 393
 394     if ctdb_check_counter "quiet" "$_cmp" "$_limit" "$_service_name" ; then
 395         return 1
 396     fi
 397
 398     for _action in $_actions ; do
 399         case "$_action" in
 400             verbose)
 401                 echo "ERROR: $ctdb_check_rpc_out"
 402                 ;;
 403             restart)
 404                 _nfs_restart_rpc_service "$_prog_name"
 405                 ;;
 406             restart:b)
 407                 _nfs_restart_rpc_service "$_prog_name" true
 408                 ;;
 409             unhealthy)
 410                 exit 1
 411                 ;;
 412             *)
 413                 echo "Internal error: unknown action \"$_action\"."
 414                 exit 1
 415         esac
 416     done
 417
 418     return 0
 419 }
 420
 421 _nfs_restart_rpc_service ()
 422 {
 423     _prog_name="$1"
 424     _background="${2:-false}"
 425
 426     if $_background ; then
 427         _maybe_background="background_with_logging"
 428     else
 429         _maybe_background=""
 430     fi
 431
 432     _p="rpc.${_prog_name}"
 433
 434     case "$_prog_name" in
 435         nfsd)
 436             echo "Trying to restart NFS service"
 437             $_maybe_background startstop_nfs restart
 438             ;;
 439         mountd)
 440             echo "Trying to restart $_prog_name [${_p}]"
 441             killall -q -9 "$_p"
 442             nfs_dump_some_threads "$_p"
 443             $_maybe_background $_p $RPCMOUNTDOPTS \
 444                                ${MOUNTD_PORT:+-p} $MOUNTD_PORT
 445             ;;
 446         rquotad)
 447             echo "Trying to restart $_prog_name [${_p}]"
 448             killall -q -9 "$_p"
 449             nfs_dump_some_threads "$_p"
 450             $_maybe_background $_p ${RQUOTAD_PORT:+-p} $RQUOTAD_PORT
 451             ;;
 452         lockd)
 453             echo "Trying to restart lock manager service"
 454             $_maybe_background startstop_nfslock restart
 455             ;;
 456         statd)
 457             echo "Trying to restart $_prog_name [${_p}]"
 458             killall -q -9 "$_p"
 459             nfs_dump_some_threads "$_p"
 460             $_maybe_background $_p \
 461                 ${STATD_HOSTNAME:+-n} $STATD_HOSTNAME \
 462                 ${STATD_PORT:+-p} $STATD_PORT \
 463                 ${STATD_OUTGOING_PORT:+-o} $STATD_OUTGOING_PORT
 464             ;;
 465         *)
 466             echo "Internal error: unknown RPC program \"$_prog_name\"."
 467             exit 1
 468     esac
 469 }
 470
 471 ######################################################
 472 # Check the health of NFS services
 473 #
 474 # Use .check files in given directory.
 475 # Default is "${CTDB_BASE}/nfs-checks.d/"
 476 ######################################################
 477 nfs_check_services ()
 478 {
 479     _dir="${1:-${CTDB_NFS_CHECKS_DIR:-${CTDB_BASE}/nfs-checks.d}}"
 480
 481     # Files must end with .check - avoids editor backups, RPM fu, ...
 482     for _f in "$_dir"/[0-9][0-9].*.check ; do
 483         _t="${_f%.check}"
 484         _progname="${_t##*/[0-9][0-9].}"
 485
 486         nfs_check_service "$_progname" <"$_f"
 487     done
 488 }
 489
 490 ######################################################
 491 # Check the health of an NFS service
 492 #
 493 # $1 - progname, passed to rpcinfo (looked up in /etc/rpc)
 494 #
 495 # Reads variables from stdin
 496 #
 497 # Variables are:
 498 #
 499 # * family             - "tcp" or "udp" or space separated list
 500 #                        default: tcp
 501 # * version            - optional, RPC service version number
 502 #                        default is to omit to check for any version
 503 # * unhealthy_after    - number of check fails before unhealthy
 504 #                        default: 1
 505 # * restart_every      - number of check fails before restart
 506 #                        default: 0, meaning no restart
 507 # * service_stop_cmd   - command to stop service
 508 #                        default: no default, must be provided if
 509 #                                 restart_every > 0
 510 # * service_start_cmd  - command to start service
 511 #                        default: no default, must be provided if
 512 #                                 restart_every > 0
 513 # * service_debug_cmd  - command to debug a service after trying to stop it;
 514 #                        for example, it can be useful to print stack
 515 #                        traces of threads that have not exited, since
 516 #                        they may be stuck doing I/O;
 517 #                        no default, see also function program_stack_traces()
 518 #
 519 # Quoting in values is not preserved
 520 #
 521 ######################################################
 522 nfs_check_service ()
 523 {
 524     _progname="$1"
 525
 526     (
 527         # Subshell to restrict scope variables...
 528
 529         # Defaults
 530         family="tcp"
 531         version=""
 532         unhealthy_after=1
 533         restart_every=0
 534         service_stop_cmd=""
 535         service_start_cmd=""
 536         service_debug_cmd=""
 537
 538         # Eval line-by-line.  Expands variable references in values.
 539         # Also allows variable name checking, which seems useful.
 540         while read _line ; do
 541             case "$_line" in
 542                 \#*|"") : ;; # Ignore comments, blank lines
 543
 544                 family=*|version=*|\
 545                 unhealthy_after=*|restart_every=*|\
 546                 service_stop_cmd=*|service_start_cmd=*|\
 547                 service_debug_cmd=*)
 548
 549                     eval "$_line"
 550                     ;;
 551                 *)
 552                     echo "ERROR: Unknown variable for ${_progname}: ${_line}"
 553                     exit 1
 554             esac
 555         done
 556
 557         _service_name="nfs_${_progname}"
 558
 559         if nfs_check_rpcinfo \
 560                "$_progname" "$version" "$family" >/dev/null ; then
 561             if [ $unhealthy_after -ne 1 -o $restart_every -ne 0 ] ; then
 562                 ctdb_counter_init "$_service_name"
 563             fi
 564             exit 0
 565         fi
 566
 567         ctdb_counter_incr "$_service_name"
 568         _failcount=$(ctdb_counter_get "$_service_name")
 569
 570         _unhealthy=false
 571         if [ $unhealthy_after -gt 0 ] ; then
 572             if [ $_failcount -ge $unhealthy_after ] ; then
 573                 _unhealthy=true
 574                 echo "ERROR: $ctdb_check_rpc_out"
 575             fi
 576         fi
 577
 578         if [ $restart_every -gt 0 ] ; then
 579             if [ $(($_failcount % $restart_every)) -eq 0 ] ; then
 580                 if ! $_unhealthy ; then
 581                     echo "WARNING: $ctdb_check_rpc_out"
 582                 fi
 583                 nfs_restart_service
 584             fi
 585         fi
 586
 587         if $_unhealthy ; then
 588             exit 1
 589         fi
 590
 591         return 0
 592     ) || exit 1
 593 }
 594
 595 # Uses: stop_service, start_service, debug_stuck_threads
 596 nfs_restart_service ()
 597 {
 598     if [ -z "$service_stop_cmd" -o -z "$service_start_cmd" ] ; then
 599         die "ERROR: Can not restart service \"${_progname}\" without corresponding service_start_cmd/service_stop_cmd settings"
 600     fi
 601
 602     echo "Trying to restart service \"${_progname}\"..."
 603     # Using eval means variables can contain semicolon separated commands
 604     eval "$service_stop_cmd"
 605     if [ -n "$service_debug_cmd" ] ; then
 606         eval "$service_debug_cmd"
 607     fi
 608     background_with_logging eval "$service_start_cmd"
 609 }
 610
 611 ######################################################
 612 # Check an RPC service with rpcinfo
 613 ######################################################
 614 ctdb_check_rpc ()
 615 {
 616     _progname="$1"        # passed to rpcinfo (looked up in /etc/rpc)
 617     _version="$2"         # optional, not passed if empty/unset
 618     _family="${3:-tcp}"   # optional, default is "tcp"
 619
 620     _localhost="${CTDB_RPCINFO_LOCALHOST:-127.0.0.1}"
 621
 622     if ! ctdb_check_rpc_out=$(rpcinfo -T $_family $_localhost \
 623                                       $_progname $_version 2>&1) ; then
 624         ctdb_check_rpc_out="$_progname failed RPC check:
 625 $ctdb_check_rpc_out"
 626         echo "$ctdb_check_rpc_out"
 627         return 1
 628     fi
 629 }
 630
 631 nfs_check_rpcinfo ()
 632 {
 633     _progname="$1"        # passed to rpcinfo (looked up in /etc/rpc)
 634     _versions="$2"        # optional, space separated, not passed if empty/unset
 635     _families="${3:-tcp}" # optional, space separated, default is "tcp"
 636
 637     for _family in $_families ; do
 638         if [ -n "$_versions" ] ; then
 639             for _version in $_versions ; do
 640                 ctdb_check_rpc $_progname $_version $_family || return $?
 641             done
 642         else
 643             ctdb_check_rpc $_progname "" $_family || return $?
 644         fi
 645     done
 646 }
 647
 648 ######################################################
 649 # Ensure $service_name is set
 650 assert_service_name ()
 651 {
 652     [ -n "$service_name" ] || die "INTERNAL ERROR: \$service_name not set"
 653 }
 654
 655 ######################################################
 656 # check a set of directories is available
 657 # return 1 on a missing directory
 658 # directories are read from stdin
 659 ######################################################
 660 ctdb_check_directories_probe()
 661 {
 662     while IFS="" read d ; do
 663         case "$d" in
 664             *%*)
 665                 continue
 666                 ;;
 667             *)
 668                 [ -d "${d}/." ] || return 1
 669         esac
 670     done
 671 }
 672
 673 ######################################################
 674 # check a set of directories is available
 675 # directories are read from stdin
 676 ######################################################
 677 ctdb_check_directories()
 678 {
 679     ctdb_check_directories_probe || {
 680         echo "ERROR: $service_name directory \"$d\" not available"
 681         exit 1
 682     }
 683 }
 684
 685 ######################################################
 686 # check a set of tcp ports
 687 # usage: ctdb_check_tcp_ports <ports...>
 688 ######################################################
 689
 690 # This flag file is created when a service is initially started.  It
 691 # is deleted the first time TCP port checks for that service succeed.
 692 # Until then ctdb_check_tcp_ports() prints a more subtle "error"
 693 # message if a port check fails.
 694 _ctdb_check_tcp_common ()
 695 {
 696     assert_service_name
 697     _ctdb_service_started_file="$ctdb_fail_dir/$service_name.started"
 698 }
 699
 700 ctdb_check_tcp_init ()
 701 {
 702     _ctdb_check_tcp_common
 703     mkdir -p "${_ctdb_service_started_file%/*}" # dirname
 704     touch "$_ctdb_service_started_file"
 705 }
 706
 707 # Check whether something is listening on all of the given TCP ports
 708 # using the "ctdb checktcpport" command.
 709 ctdb_check_tcp_ports()
 710 {
 711     if [ -z "$1" ] ; then
 712         echo "INTERNAL ERROR: ctdb_check_tcp_ports - no ports specified"
 713         exit 1
 714     fi
 715
 716     for _p ; do  # process each function argument (port)
 717         _cmd="ctdb checktcpport $_p"
 718         _out=$($_cmd 2>&1)
 719         _ret=$?
 720         case "$_ret" in
 721             0)
 722                 _ctdb_check_tcp_common
 723                 if [ ! -f "$_ctdb_service_started_file" ] ; then
 724                     echo "ERROR: $service_name tcp port $_p is not responding"
 725                     debug "\"ctdb checktcpport $_p\" was able to bind to port"
 726                 else
 727                     echo "INFO: $service_name tcp port $_p is not responding"
 728                 fi
 729
 730                 return 1
 731                 ;;
 732             98)
 733                 # Couldn't bind, something already listening, next port...
 734                 continue
 735                 ;;
 736             *)
 737                 echo "ERROR: unexpected error running \"ctdb checktcpport\""
 738                 debug <<EOF
 739 ctdb checktcpport (exited with $_ret) with output:
 740 $_out"
 741 EOF
 742                 return $_ret
 743         esac
 744     done
 745
 746     # All ports listening
 747     _ctdb_check_tcp_common
 748     rm -f "$_ctdb_service_started_file"
 749     return 0
 750 }
 751
 752 ######################################################
 753 # check a unix socket
 754 # usage: ctdb_check_unix_socket SERVICE_NAME <socket_path>
 755 ######################################################
 756 ctdb_check_unix_socket() {
 757     socket_path="$1"
 758     [ -z "$socket_path" ] && return
 759
 760     if ! netstat --unix -a -n | grep -q "^unix.*LISTEN.*${socket_path}$"; then
 761         echo "ERROR: $service_name socket $socket_path not found"
 762         return 1
 763     fi
 764 }
 765
 766 ######################################################
 767 # check a command returns zero status
 768 # usage: ctdb_check_command <command>
 769 ######################################################
 770 ctdb_check_command ()
 771 {
 772     _out=$("$@" 2>&1) || {
 773         echo "ERROR: $* returned error"
 774         echo "$_out" | debug
 775         exit 1
 776     }
 777 }
 778
 779 ################################################
 780 # kill off any TCP connections with the given IP
 781 ################################################
 782 kill_tcp_connections ()
 783 {
 784     _ip="$1"
 785
 786     _oneway=false
 787     if [ "$2" = "oneway" ] ; then
 788         _oneway=true
 789     fi
 790
 791     get_tcp_connections_for_ip "$_ip" | {
 792         _killcount=0
 793         _connections=""
 794         _nl="
 795 "
 796         while read _dst _src; do
 797             _destport="${_dst##*:}"
 798             __oneway=$_oneway
 799             case $_destport in
 800                 # we only do one-way killtcp for CIFS
 801                 139|445) __oneway=true ;;
 802             esac
 803
 804             echo "Killing TCP connection $_src $_dst"
 805             _connections="${_connections}${_nl}${_src} ${_dst}"
 806             if ! $__oneway ; then
 807                 _connections="${_connections}${_nl}${_dst} ${_src}"
 808             fi
 809
 810             _killcount=$(($_killcount + 1))
 811         done
 812
 813         if [ $_killcount -eq 0 ] ; then
 814             return
 815         fi
 816
 817         echo "$_connections" | ctdb killtcp || {
 818             echo "Failed to send killtcp control"
 819             return
 820         }
 821
 822         _count=0
 823         while : ; do
 824             _remaining=$(get_tcp_connections_for_ip $_ip | wc -l)
 825
 826             if [ $_remaining -eq 0 ] ; then
 827                 echo "Killed $_killcount TCP connections to released IP $_ip"
 828                 return
 829             fi
 830
 831             _count=$(($_count + 1))
 832             if [ $_count -gt 3 ] ; then
 833                 echo "Timed out killing tcp connections for IP $_ip ($_remaining remaining)"
 834                 return
 835             fi
 836
 837             echo "Waiting for $_remaining connections to be killed for IP $_ip"
 838             sleep 1
 839         done
 840     }
 841 }
 842
 843 ##################################################################
 844 # kill off the local end for any TCP connections with the given IP
 845 ##################################################################
 846 kill_tcp_connections_local_only ()
 847 {
 848     kill_tcp_connections "$1" "oneway"
 849 }
 850
 851 ##################################################################
 852 # tickle any TCP connections with the given IP
 853 ##################################################################
 854 tickle_tcp_connections ()
 855 {
 856     _ip="$1"
 857
 858     get_tcp_connections_for_ip "$_ip" |
 859     {
 860         _failed=false
 861
 862         while read dest src; do
 863             echo "Tickle TCP connection $src $dest"
 864             ctdb tickle $src $dest >/dev/null 2>&1 || _failed=true
 865             echo "Tickle TCP connection $dest $src"
 866             ctdb tickle $dest $src >/dev/null 2>&1 || _failed=true
 867         done
 868
 869         if $_failed ; then
 870             echo "Failed to send tickle control"
 871         fi
 872     }
 873 }
 874
 875 get_tcp_connections_for_ip ()
 876 {
 877     _ip="$1"
 878
 879     netstat -tn | awk -v ip=$_ip \
 880         'index($1, "tcp") == 1 && \
 881          (index($4, ip ":") == 1 || index($4, "::ffff:" ip ":") == 1) \
 882          && $6 == "ESTABLISHED" \
 883          {print $4" "$5}'
 884 }
 885
 886 ##################################################################
 887 # use statd-callout to update NFS lock info
 888 ##################################################################
 889 nfs_update_lock_info ()
 890 {
 891     if [ -x "$CTDB_BASE/statd-callout" ] ; then
 892         "$CTDB_BASE/statd-callout" update
 893     fi
 894 }
 895
 896 ########################################################
 897 # start/stop the Ganesha nfs service
 898 ########################################################
 899 startstop_ganesha()
 900 {
 901     _service_name="nfs-ganesha-$CTDB_CLUSTER_FILESYSTEM_TYPE"
 902     case "$1" in
 903         start)
 904             service "$_service_name" start
 905             ;;
 906         stop)
 907             service "$_service_name" stop
 908             ;;
 909         restart)
 910             service "$_service_name" stop
 911             nfs_dump_some_threads "rpc.statd"
 912             service "$_service_name" start
 913             ;;
 914     esac
 915 }
 916
 917 ########################################################
 918 # start/stop the nfs service on different platforms
 919 ########################################################
 920 startstop_nfs() {
 921         PLATFORM="unknown"
 922         [ -x $CTDB_ETCDIR/init.d/nfsserver ] && {
 923                 PLATFORM="sles"
 924         }
 925         [ -x $CTDB_ETCDIR/init.d/nfslock -o \
 926             -r /usr/lib/systemd/system/nfs-lock.service ] && {
 927                 PLATFORM="rhel"
 928         }
 929
 930         case $PLATFORM in
 931         sles)
 932                 case $1 in
 933                 start)
 934                         service nfsserver start
 935                         ;;
 936                 stop)
 937                         service nfsserver stop > /dev/null 2>&1
 938                         ;;
 939                 restart)
 940                         set_proc "fs/nfsd/threads" 0
 941                         service nfsserver stop > /dev/null 2>&1
 942                         pkill -9 nfsd
 943                         nfs_dump_some_threads
 944                         service nfsserver start
 945                         ;;
 946                 esac
 947                 ;;
 948         rhel)
 949                 case $1 in
 950                 start)
 951                         service nfslock start
 952                         service nfs start
 953                         ;;
 954                 stop)
 955                         service nfs stop
 956                         service nfslock stop
 957                         ;;
 958                 restart)
 959                         set_proc "fs/nfsd/threads" 0
 960                         service nfs stop > /dev/null 2>&1
 961                         service nfslock stop > /dev/null 2>&1
 962                         pkill -9 nfsd
 963                         nfs_dump_some_threads
 964                         service nfslock start
 965                         service nfs start
 966                         ;;
 967                 esac
 968                 ;;
 969         *)
 970                 echo "Unknown platform. NFS is not supported with ctdb"
 971                 exit 1
 972                 ;;
 973         esac
 974 }
 975
 976 # Dump up to the configured number of nfsd thread backtraces.
 977 nfs_dump_some_threads ()
 978 {
 979     _prog="${1:-nfsd}"
 980
 981     _num="${CTDB_NFS_DUMP_STUCK_THREADS:-5}"
 982     [ $_num -gt 0 ] || return 0
 983
 984     program_stack_traces "$_prog" $_num
 985 }
 986
 987 ########################################################
 988 # start/stop the nfs lockmanager service on different platforms
 989 ########################################################
 990 startstop_nfslock() {
 991         PLATFORM="unknown"
 992         [ -x $CTDB_ETCDIR/init.d/nfsserver ] && {
 993                 PLATFORM="sles"
 994         }
 995         [ -x $CTDB_ETCDIR/init.d/nfslock -o \
 996             -r /usr/lib/systemd/system/nfs-lock.service ] && {
 997                 PLATFORM="rhel"
 998         }
 999
1000         case $PLATFORM in
1001         sles)
1002                 # for sles there is no service for lockmanager
1003                 # so we instead just shutdown/restart nfs
1004                 case $1 in
1005                 start)
1006                         service nfsserver start
1007                         ;;
1008                 stop)
1009                         service nfsserver stop > /dev/null 2>&1
1010                         ;;
1011                 restart)
1012                         service nfsserver stop > /dev/null 2>&1
1013                         service nfsserver start
1014                         ;;
1015                 esac
1016                 ;;
1017         rhel)
1018                 case $1 in
1019                 start)
1020                         service nfslock start
1021                         ;;
1022                 stop)
1023                         service nfslock stop > /dev/null 2>&1
1024                         ;;
1025                 restart)
1026                         service nfslock stop > /dev/null 2>&1
1027                         service nfslock start
1028                         ;;
1029                 esac
1030                 ;;
1031         *)
1032                 echo "Unknown platform. NFS locking is not supported with ctdb"
1033                 exit 1
1034                 ;;
1035         esac
1036 }
1037
1038 ########################################################
1039
1040 add_ip_to_iface ()
1041 {
1042     _iface=$1
1043     _ip=$2
1044     _maskbits=$3
1045
1046     # Ensure interface is up
1047     ip link set "$_iface" up || \
1048         die "Failed to bringup interface $_iface"
1049
1050     # Only need to define broadcast for IPv4
1051     case "$ip" in
1052         *:*) _bcast=""      ;;
1053         *)   _bcast="brd +" ;;
1054     esac
1055
1056     ip addr add "$_ip/$_maskbits" $_bcast dev "$_iface" || {
1057         echo "Failed to add $_ip/$_maskbits on dev $_iface"
1058         return 1
1059     }
1060
1061     # Wait 5 seconds for IPv6 addresses to stop being tentative...
1062     if [ -z "$_bcast" ] ; then
1063         for _x in $(seq 1 10) ; do
1064             ip addr show to "${_ip}/128" | grep -q "tentative" || break
1065             sleep 0.5
1066         done
1067
1068         # If the address was a duplicate then it won't be on the
1069         # interface so flag an error.
1070         _t=$(ip addr show to "${_ip}/128")
1071         case "$_t" in
1072             "")
1073                 echo "Failed to add $_ip/$_maskbits on dev $_iface"
1074                 return 1
1075                 ;;
1076             *tentative*|*dadfailed*)
1077                 echo "Failed to add $_ip/$_maskbits on dev $_iface"
1078                 ip addr del "$_ip/$_maskbits" dev "$_iface"
1079                 return 1
1080                 ;;
1081         esac
1082     fi
1083 }
1084
1085 delete_ip_from_iface()
1086 {
1087     _iface=$1
1088     _ip=$2
1089     _maskbits=$3
1090
1091     # This could be set globally for all interfaces but it is probably
1092     # better to avoid surprises, so limit it the interfaces where CTDB
1093     # has public IP addresses.  There isn't anywhere else convenient
1094     # to do this so just set it each time.  This is much cheaper than
1095     # remembering and re-adding secondaries.
1096     set_proc "sys/net/ipv4/conf/${_iface}/promote_secondaries" 1
1097
1098     ip addr del "$_ip/$_maskbits" dev "$_iface" || {
1099         echo "Failed to del $_ip on dev $_iface"
1100         return 1
1101     }
1102 }
1103
1104 # If the given IP is hosted then print 2 items: maskbits and iface
1105 ip_maskbits_iface ()
1106 {
1107     _addr="$1"
1108
1109     case "$_addr" in
1110         *:*) _family="inet6" ; _bits=128 ;;
1111         *)   _family="inet"  ; _bits=32  ;;
1112     esac
1113
1114     ip addr show to "${_addr}/${_bits}" 2>/dev/null | \
1115         awk -v family="${_family}" \
1116             'NR == 1 { iface = $2; sub(":$", "", iface) ; \
1117                        sub("@.*", "", iface) } \
1118              $1 ~ /inet/ { mask = $2; sub(".*/", "", mask); \
1119                            print mask, iface, family }'
1120 }
1121
1122 drop_ip ()
1123 {
1124     _addr="${1%/*}"  # Remove optional maskbits
1125
1126     set -- $(ip_maskbits_iface $_addr)
1127     if [ -n "$1" ] ; then
1128         _maskbits="$1"
1129         _iface="$2"
1130         echo "Removing public address $_addr/$_maskbits from device $_iface"
1131         delete_ip_from_iface $_iface $_addr $_maskbits >/dev/null 2>&1
1132     fi
1133 }
1134
1135 drop_all_public_ips ()
1136 {
1137     while read _ip _x ; do
1138         drop_ip "$_ip"
1139     done <"${CTDB_PUBLIC_ADDRESSES:-/dev/null}"
1140 }
1141
1142 flush_route_cache ()
1143 {
1144     set_proc_maybe sys/net/ipv4/route/flush 1
1145     set_proc_maybe sys/net/ipv6/route/flush 1
1146 }
1147
1148 ########################################################
1149 # Simple counters
1150 _ctdb_counter_common () {
1151     _service_name="${1:-${service_name:-${script_name}}}"
1152     _counter_file="$ctdb_fail_dir/$_service_name"
1153     mkdir -p "${_counter_file%/*}" # dirname
1154 }
1155 ctdb_counter_init () {
1156     _ctdb_counter_common "$1"
1157
1158     >"$_counter_file"
1159 }
1160 ctdb_counter_incr () {
1161     _ctdb_counter_common "$1"
1162
1163     # unary counting!
1164     echo -n 1 >> "$_counter_file"
1165 }
1166 ctdb_counter_get () {
1167     _ctdb_counter_common "$1"
1168     # unary counting!
1169     stat -c "%s" "$_counter_file" 2>/dev/null || echo 0
1170 }
1171 ctdb_check_counter () {
1172     _msg="${1:-error}"  # "error"  - anything else is silent on fail
1173     _op="${2:--ge}"  # an integer operator supported by test
1174     _limit="${3:-${service_fail_limit}}"
1175     shift 3
1176
1177     _size=$(ctdb_counter_get "$1")
1178
1179     _hit=false
1180     if [ "$_op" != "%" ] ; then
1181         if [ $_size $_op $_limit ] ; then
1182             _hit=true
1183         fi
1184     else
1185         if [ $(($_size $_op $_limit)) -eq 0 ] ; then
1186             _hit=true
1187         fi
1188     fi
1189     if $_hit ; then
1190         if [ "$_msg" = "error" ] ; then
1191             echo "ERROR: $_size consecutive failures for $_service_name, marking node unhealthy"
1192             exit 1
1193         else
1194             return 1
1195         fi
1196     fi
1197 }
1198
1199 ########################################################
1200
1201 ctdb_status_dir="$CTDB_VARDIR/state/service_status"
1202 ctdb_fail_dir="$CTDB_VARDIR/state/failcount"
1203
1204 ctdb_setup_service_state_dir ()
1205 {
1206     service_state_dir="$CTDB_VARDIR/state/service_state/${1:-${service_name}}"
1207     mkdir -p "$service_state_dir" || {
1208         echo "Error creating state dir \"$service_state_dir\""
1209         exit 1
1210     }
1211 }
1212
1213 ########################################################
1214 # Managed status history, for auto-start/stop
1215
1216 ctdb_managed_dir="$CTDB_VARDIR/state/managed_history"
1217
1218 _ctdb_managed_common ()
1219 {
1220     _ctdb_managed_file="$ctdb_managed_dir/$service_name"
1221 }
1222
1223 ctdb_service_managed ()
1224 {
1225     _ctdb_managed_common
1226     mkdir -p "$ctdb_managed_dir"
1227     touch "$_ctdb_managed_file"
1228 }
1229
1230 ctdb_service_unmanaged ()
1231 {
1232     _ctdb_managed_common
1233     rm -f "$_ctdb_managed_file"
1234 }
1235
1236 is_ctdb_previously_managed_service ()
1237 {
1238     _ctdb_managed_common
1239     [ -f "$_ctdb_managed_file" ]
1240 }
1241
1242 ########################################################
1243 # Check and set status
1244
1245 log_status_cat ()
1246 {
1247     echo "node is \"$1\", \"${script_name}\" reports problem: $(cat $2)"
1248 }
1249
1250 ctdb_checkstatus ()
1251 {
1252     if [ -r "$ctdb_status_dir/$script_name/unhealthy" ] ; then
1253         log_status_cat "unhealthy" "$ctdb_status_dir/$script_name/unhealthy"
1254         return 1
1255     elif [ -r "$ctdb_status_dir/$script_name/banned" ] ; then
1256         log_status_cat "banned" "$ctdb_status_dir/$script_name/banned"
1257         return 2
1258     else
1259         return 0
1260     fi
1261 }
1262
1263 ctdb_setstatus ()
1264 {
1265     d="$ctdb_status_dir/$script_name"
1266     case "$1" in
1267         unhealthy|banned)
1268             mkdir -p "$d"
1269             cat "$2" >"$d/$1"
1270             ;;
1271         *)
1272             for i in "banned" "unhealthy" ; do
1273                 rm -f "$d/$i"
1274             done
1275             ;;
1276     esac
1277 }
1278
1279 ##################################################################
1280 # Reconfigure a service on demand
1281
1282 _ctdb_service_reconfigure_common ()
1283 {
1284     _d="$ctdb_status_dir/${service_name}"
1285     mkdir -p "$_d"
1286     _ctdb_service_reconfigure_flag="$_d/reconfigure"
1287 }
1288
1289 ctdb_service_needs_reconfigure ()
1290 {
1291     _ctdb_service_reconfigure_common
1292     [ -e "$_ctdb_service_reconfigure_flag" ]
1293 }
1294
1295 ctdb_service_set_reconfigure ()
1296 {
1297     _ctdb_service_reconfigure_common
1298     >"$_ctdb_service_reconfigure_flag"
1299 }
1300
1301 ctdb_service_unset_reconfigure ()
1302 {
1303     _ctdb_service_reconfigure_common
1304     rm -f "$_ctdb_service_reconfigure_flag"
1305 }
1306
1307 ctdb_service_reconfigure ()
1308 {
1309     echo "Reconfiguring service \"${service_name}\"..."
1310     ctdb_service_unset_reconfigure
1311     service_reconfigure || return $?
1312     ctdb_counter_init
1313 }
1314
1315 # Default service_reconfigure() function does nothing.
1316 service_reconfigure ()
1317 {
1318     :
1319 }
1320
1321 ctdb_reconfigure_take_lock ()
1322 {
1323     _ctdb_service_reconfigure_common
1324     _lock="${_d}/reconfigure_lock"
1325     mkdir -p "${_lock%/*}" # dirname
1326     touch "$_lock"
1327
1328     (
1329         flock 0
1330         # This is overkill but will work if we need to extend this to
1331         # allow certain events to run multiple times in parallel
1332         # (e.g. takeip) and write multiple PIDs to the file.
1333         read _locker_event
1334         if [ -n "$_locker_event" ] ; then
1335             while read _pid ; do
1336                 if [ -n "$_pid" -a "$_pid" != $$ ] && \
1337                     kill -0 "$_pid" 2>/dev/null ; then
1338                     exit 1
1339                 fi
1340             done
1341         fi
1342
1343         printf "%s\n%s\n" "$event_name" $$ >"$_lock"
1344         exit 0
1345     ) <"$_lock"
1346 }
1347
1348 ctdb_reconfigure_release_lock ()
1349 {
1350     _ctdb_service_reconfigure_common
1351     _lock="${_d}/reconfigure_lock"
1352
1353     rm -f "$_lock"
1354 }
1355
1356 ctdb_replay_monitor_status ()
1357 {
1358     echo "Replaying previous status for this script due to reconfigure..."
1359     # Leading separator ('|') is missing in some versions...
1360     _out=$(ctdb scriptstatus -X | grep -E "^\|?monitor\|${script_name}\|")
1361     # Output looks like this:
1362     # |monitor|60.nfs|1|ERROR|1314764004.030861|1314764004.035514|foo bar|
1363     # This is the cheapest way of getting fields in the middle.
1364     set -- $(IFS="|" ; echo $_out)
1365     _code="$3"
1366     _status="$4"
1367     # The error output field can include colons so we'll try to
1368     # preserve them.  The weak checking at the beginning tries to make
1369     # this work for both broken (no leading '|') and fixed output.
1370     _out="${_out%|}"
1371     _err_out="${_out#*monitor|${script_name}|*|*|*|*|}"
1372     case "$_status" in
1373         OK) : ;;  # Do nothing special.
1374         TIMEDOUT)
1375             # Recast this as an error, since we can't exit with the
1376             # correct negative number.
1377             _code=1
1378             _err_out="[Replay of TIMEDOUT scriptstatus - note incorrect return code.] ${_err_out}"
1379             ;;
1380         DISABLED)
1381             # Recast this as an OK, since we can't exit with the
1382             # correct negative number.
1383             _code=0
1384             _err_out="[Replay of DISABLED scriptstatus - note incorrect return code.] ${_err_out}"
1385             ;;
1386         *) : ;;  # Must be ERROR, do nothing special.
1387     esac
1388     if [ -n "$_err_out" ] ; then
1389         echo "$_err_out"
1390     fi
1391     exit $_code
1392 }
1393
1394 ctdb_service_check_reconfigure ()
1395 {
1396     assert_service_name
1397
1398     # We only care about some events in this function.  For others we
1399     # return now.
1400     case "$event_name" in
1401         monitor|ipreallocated|reconfigure) : ;;
1402         *) return 0 ;;
1403     esac
1404
1405     if ctdb_reconfigure_take_lock ; then
1406         # No events covered by this function are running, so proceed
1407         # with gay abandon.
1408         case "$event_name" in
1409             reconfigure)
1410                 (ctdb_service_reconfigure)
1411                 exit $?
1412                 ;;
1413             ipreallocated)
1414                 if ctdb_service_needs_reconfigure ; then
1415                     ctdb_service_reconfigure
1416                 fi
1417                 ;;
1418         esac
1419
1420         ctdb_reconfigure_release_lock
1421     else
1422         # Somebody else is running an event we don't want to collide
1423         # with.  We proceed with caution.
1424         case "$event_name" in
1425             reconfigure)
1426                 # Tell whoever called us to retry.
1427                 exit 2
1428                 ;;
1429             ipreallocated)
1430                 # Defer any scheduled reconfigure and just run the
1431                 # rest of the ipreallocated event, as per the
1432                 # eventscript.  There's an assumption here that the
1433                 # event doesn't depend on any scheduled reconfigure.
1434                 # This is true in the current code.
1435                 return 0
1436                 ;;
1437             monitor)
1438                 # There is most likely a reconfigure in progress so
1439                 # the service is possibly unstable.  As above, we
1440                 # defer any scheduled reconfigured.  We also replay
1441                 # the previous monitor status since that's the best
1442                 # information we have.
1443                 ctdb_replay_monitor_status
1444                 ;;
1445         esac
1446     fi
1447 }
1448
1449 ##################################################################
1450 # Does CTDB manage this service? - and associated auto-start/stop
1451
1452 ctdb_compat_managed_service ()
1453 {
1454     if [ "$1" = "yes" -a "$2" = "$service_name" ] ; then
1455         CTDB_MANAGED_SERVICES="$CTDB_MANAGED_SERVICES $2"
1456     fi
1457 }
1458
1459 is_ctdb_managed_service ()
1460 {
1461     assert_service_name
1462
1463     # $t is used just for readability and to allow better accurate
1464     # matching via leading/trailing spaces
1465     t=" $CTDB_MANAGED_SERVICES "
1466
1467     # Return 0 if "<space>$service_name<space>" appears in $t
1468     if [ "${t#* ${service_name} }" != "${t}" ] ; then
1469         return 0
1470     fi
1471
1472     # If above didn't match then update $CTDB_MANAGED_SERVICES for
1473     # backward compatibility and try again.
1474     ctdb_compat_managed_service "$CTDB_MANAGES_VSFTPD"   "vsftpd"
1475     ctdb_compat_managed_service "$CTDB_MANAGES_SAMBA"    "samba"
1476     ctdb_compat_managed_service "$CTDB_MANAGES_WINBIND"  "winbind"
1477     ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD"    "apache2"
1478     ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD"    "httpd"
1479     ctdb_compat_managed_service "$CTDB_MANAGES_ISCSI"    "iscsi"
1480     ctdb_compat_managed_service "$CTDB_MANAGES_CLAMD"    "clamd"
1481     ctdb_compat_managed_service "$CTDB_MANAGES_NFS"      "nfs"
1482     ctdb_compat_managed_service "$CTDB_MANAGES_NFS"      "nfs-ganesha-gpfs"
1483
1484     t=" $CTDB_MANAGED_SERVICES "
1485
1486     # Return 0 if "<space>$service_name<space>" appears in $t
1487     [ "${t#* ${service_name} }" != "${t}" ]
1488 }
1489
1490 ctdb_start_stop_service ()
1491 {
1492     assert_service_name
1493
1494     # Allow service-start/service-stop pseudo-events to start/stop
1495     # services when we're not auto-starting/stopping and we're not
1496     # monitoring.
1497     case "$event_name" in
1498         service-start)
1499             if is_ctdb_managed_service ; then
1500                 die 'service-start event not permitted when service is managed'
1501             fi
1502             if [ "$CTDB_SERVICE_AUTOSTARTSTOP" = "yes" ] ; then
1503                 die 'service-start event not permitted with $CTDB_SERVICE_AUTOSTARTSTOP = yes'
1504             fi
1505             ctdb_service_start
1506             exit $?
1507             ;;
1508         service-stop)
1509             if is_ctdb_managed_service ; then
1510                 die 'service-stop event not permitted when service is managed'
1511             fi
1512             if [ "$CTDB_SERVICE_AUTOSTARTSTOP" = "yes" ] ; then
1513                 die 'service-stop event not permitted with $CTDB_SERVICE_AUTOSTARTSTOP = yes'
1514             fi
1515             ctdb_service_stop
1516             exit $?
1517             ;;
1518     esac
1519
1520     # Do nothing unless configured to...
1521     [ "$CTDB_SERVICE_AUTOSTARTSTOP" = "yes" ] || return 0
1522
1523     [ "$event_name" = "monitor" ] || return 0
1524
1525     if is_ctdb_managed_service ; then
1526         if ! is_ctdb_previously_managed_service ; then
1527             echo "Starting service \"$service_name\" - now managed"
1528             background_with_logging ctdb_service_start
1529             exit $?
1530         fi
1531     else
1532         if is_ctdb_previously_managed_service ; then
1533             echo "Stopping service \"$service_name\" - no longer managed"
1534             background_with_logging ctdb_service_stop
1535             exit $?
1536         fi
1537     fi
1538 }
1539
1540 ctdb_service_start ()
1541 {
1542     # The service is marked managed if we've ever tried to start it.
1543     ctdb_service_managed
1544
1545     service_start || return $?
1546
1547     ctdb_counter_init
1548     ctdb_check_tcp_init
1549 }
1550
1551 ctdb_service_stop ()
1552 {
1553     ctdb_service_unmanaged
1554     service_stop
1555 }
1556
1557 # Default service_start() and service_stop() functions.
1558
1559 # These may be overridden in an eventscript.
1560 service_start ()
1561 {
1562     service "$service_name" start
1563 }
1564
1565 service_stop ()
1566 {
1567     service "$service_name" stop
1568 }
1569
1570 ##################################################################
1571
1572 ctdb_standard_event_handler ()
1573 {
1574     case "$1" in
1575         status)
1576             ctdb_checkstatus
1577             exit
1578             ;;
1579         setstatus)
1580             shift
1581             ctdb_setstatus "$@"
1582             exit
1583             ;;
1584     esac
1585 }
1586
1587 iptables_wrapper ()
1588 {
1589     _family="$1" ; shift
1590     if [ "$_family" = "inet6" ] ; then
1591         _iptables_cmd="ip6tables"
1592     else
1593         _iptables_cmd="iptables"
1594     fi
1595
1596     # iptables doesn't like being re-entered, so flock-wrap it.
1597     flock -w 30 "${CTDB_VARDIR}/iptables-ctdb.flock" "$_iptables_cmd" "$@"
1598 }
1599
1600 # AIX (and perhaps others?) doesn't have mktemp
1601 if ! type mktemp >/dev/null 2>&1 ; then
1602     mktemp ()
1603     {
1604         _dir=false
1605         if [ "$1" = "-d" ] ; then
1606             _dir=true
1607             shift
1608         fi
1609         _d="${TMPDIR:-/tmp}"
1610         _hex10=$(dd if=/dev/urandom count=20 2>/dev/null | \
1611             md5sum | \
1612             sed -e 's@\(..........\).*@\1@')
1613         _t="${_d}/tmp.${_hex10}"
1614         (
1615             umask 077
1616             if $_dir ; then
1617                 mkdir "$_t"
1618             else
1619                 >"$_t"
1620             fi
1621         )
1622         echo "$_t"
1623     }
1624 fi
1625
1626 ########################################################
1627 # tickle handling
1628 ########################################################
1629
1630 update_tickles ()
1631 {
1632         _port="$1"
1633
1634         tickledir="$CTDB_VARDIR/state/tickles"
1635         mkdir -p "$tickledir"
1636
1637         ctdb_get_pnn
1638
1639         # What public IPs do I hold?
1640         _ips=$(ctdb -X ip | awk -F'|' -v pnn=$pnn '$3 == pnn {print $2}')
1641
1642         # IPs as a regexp choice
1643         _ipschoice="($(echo $_ips | sed -e 's/ /|/g' -e 's/\./\\\\./g'))"
1644
1645         # Record connections to our public IPs in a temporary file
1646         _my_connections="${tickledir}/${_port}.connections"
1647         rm -f "$_my_connections"
1648         netstat -tn |
1649         awk -v destpat="^${_ipschoice}:${_port}\$" \
1650           '$1 == "tcp" && $6 == "ESTABLISHED" && $4 ~ destpat {print $5, $4}' |
1651         sort >"$_my_connections"
1652
1653         # Record our current tickles in a temporary file
1654         _my_tickles="${tickledir}/${_port}.tickles"
1655         rm -f "$_my_tickles"
1656         for _i in $_ips ; do
1657                 ctdb -X gettickles $_i $_port |
1658                 awk -F'|' 'NR > 1 { printf "%s:%s %s:%s\n", $2, $3, $4, $5 }'
1659         done |
1660         sort >"$_my_tickles"
1661
1662         # Add tickles for connections that we haven't already got tickles for
1663         comm -23 "$_my_connections" "$_my_tickles" |
1664         while read _src _dst ; do
1665                 ctdb addtickle $_src $_dst
1666         done
1667
1668         # Remove tickles for connections that are no longer there
1669         comm -13 "$_my_connections" "$_my_tickles" |
1670         while read _src _dst ; do
1671                 ctdb deltickle $_src $_dst
1672         done
1673
1674         rm -f "$_my_connections" "$_my_tickles"
1675 }
1676
1677 ########################################################
1678 # load a site local config file
1679 ########################################################
1680
1681 [ -n "$CTDB_RC_LOCAL" -a -x "$CTDB_RC_LOCAL" ] && {
1682         . "$CTDB_RC_LOCAL"
1683 }
1684
1685 [ -x $CTDB_BASE/rc.local ] && {
1686         . $CTDB_BASE/rc.local
1687 }
1688
1689 [ -d $CTDB_BASE/rc.local.d ] && {
1690         for i in $CTDB_BASE/rc.local.d/* ; do
1691                 [ -x "$i" ] && . "$i"
1692         done
1693 }
1694
1695 script_name="${0##*/}"       # basename
1696 service_fail_limit=1
1697 event_name="$1"