1 # Hey Emacs, this is a -*- shell-script -*- !!!
3 # utility functions for ctdb event scripts
5 if [ -z "$CTDB_BASE" ] ; then
6 echo 'CTDB_BASE unset in CTDB functions file'
11 # CTDB_VARDIR is used elsewhere
12 # shellcheck disable=SC2034
13 CTDB_VARDIR="/usr/local/var/lib/ctdb"
14 ctdb_rundir="/usr/local/var/run/ctdb"
16 CTDB="${CTDB:-/usr/local/bin/ctdb}"
18 # Only (and always) override these variables in test code
20 if [ -z "$CTDB_SCRIPT_VARDIR" ] ; then
21 CTDB_SCRIPT_VARDIR="/usr/local/var/lib/ctdb/state"
24 if [ -z "$CTDB_SYS_ETCDIR" ] ; then
25 CTDB_SYS_ETCDIR="/etc"
28 if [ -z "$CTDB_HELPER_BINDIR" ] ; then
29 CTDB_HELPER_BINDIR="/usr/local/libexec/ctdb"
32 #######################################
33 # pull in a system config file, if any
35 rewrite_ctdb_options ()
39 _opts_defaults="mode=700"
40 # Get any extra options specified after colon
41 if [ "$CTDB_DBDIR" = "tmpfs" ] ; then
44 _opts="${CTDB_DBDIR#tmpfs:}"
46 # It is OK to repeat mount options - last value wins.
47 # CTDB_DBDIR_TMPFS_OPTIONS is used by ctdbd_wrapper
48 # shellcheck disable=SC2034
49 CTDB_DBDIR_TMPFS_OPTIONS="${_opts_defaults}${_opts:+,}${_opts}"
51 CTDB_DBDIR="${ctdb_rundir}/CTDB_DBDIR"
54 # shellcheck disable=SC2034
55 CTDB_DBDIR_TMPFS_OPTIONS=""
62 foo="${service_config:-${service_name}}"
63 if [ -n "$foo" ] ; then
69 if [ "$1" != "ctdb" ] ; then
77 if [ -f "${CTDB_SYS_ETCDIR}/sysconfig/$1" ]; then
78 . "${CTDB_SYS_ETCDIR}/sysconfig/$1"
79 elif [ -f "${CTDB_SYS_ETCDIR}/default/$1" ]; then
80 . "${CTDB_SYS_ETCDIR}/default/$1"
81 elif [ -f "${CTDB_BASE}/sysconfig/$1" ]; then
82 . "${CTDB_BASE}/sysconfig/$1"
85 if [ "$1" = "ctdb" ] ; then
86 _config="${CTDBD_CONF:-${CTDB_BASE}/ctdbd.conf}"
87 if [ -r "$_config" ] ; then
98 ##############################################################
109 # Log given message or stdin to either syslog or a CTDB log file
110 # $1 is the tag passed to logger if syslog is in use.
115 case "$CTDB_LOGGING" in
117 if [ -n "$CTDB_LOGGING" ] ; then
118 _file="${CTDB_LOGGING#file:}"
120 _file="/usr/local/var/log/log.ctdb"
123 if [ -n "$*" ] ; then
131 # Handle all syslog:* variants here too. There's no tool to do
132 # the lossy things, so just use logger.
133 logger -t "ctdbd: ${_tag}" "$*"
138 # When things are run in the background in an eventscript then logging
139 # output might get lost. This is the "solution". :-)
140 background_with_logging ()
143 "$@" 2>&1 </dev/null |
144 script_log "${script_name}&"
150 ##############################################################
151 # check number of args for different events
157 echo "ERROR: must supply interface, IP and maskbits"
163 echo "ERROR: must supply old interface, new interface, IP and maskbits"
170 ##############################################################
171 # determine on what type of system (init style) we are running
174 # only do detection if not already set:
175 [ -z "$CTDB_INIT_STYLE" ] || return
177 if [ -x /sbin/startproc ]; then
178 CTDB_INIT_STYLE="suse"
179 elif [ -x /sbin/start-stop-daemon ]; then
180 CTDB_INIT_STYLE="debian"
182 CTDB_INIT_STYLE="redhat"
186 ######################################################
187 # simulate /sbin/service on platforms that don't have it
188 # _service() makes it easier to hook the service() function for
195 # do nothing, when no service was specified
196 [ -z "$_service_name" ] && return
198 if [ -x /sbin/service ]; then
199 $_nice /sbin/service "$_service_name" "$_op"
200 elif [ -x /usr/sbin/service ]; then
201 $_nice /usr/sbin/service "$_service_name" "$_op"
202 elif [ -x /bin/systemctl ]; then
203 $_nice /bin/systemctl "$_op" "$_service_name"
204 elif [ -x "${CTDB_SYS_ETCDIR}/init.d/${_service_name}" ]; then
205 $_nice "${CTDB_SYS_ETCDIR}/init.d/${_service_name}" "$_op"
206 elif [ -x "${CTDB_SYS_ETCDIR}/rc.d/init.d/${_service_name}" ]; then
207 $_nice "${CTDB_SYS_ETCDIR}/rc.d/init.d/${_service_name}" "$_op"
217 ######################################################
218 # simulate /sbin/service (niced) on platforms that don't have it
225 ######################################################
226 # Cached retrieval of PNN from local node. This never changes so why
227 # open a client connection to the server each time this is needed?
230 _pnn_file="${CTDB_SCRIPT_VARDIR}/my-pnn"
231 if [ ! -f "$_pnn_file" ] ; then
232 $CTDB pnn >"$_pnn_file"
238 # Cached retrieval of private IP address from local node. This never
240 ctdb_get_ip_address ()
242 _ip_addr_file="${CTDB_SCRIPT_VARDIR}/my-ip-address"
243 if [ ! -f "$_ip_addr_file" ] ; then
244 $CTDB -X nodestatus |
245 awk -F '|' 'NR == 2 { print $3 }' >"$_ip_addr_file"
248 # ip_address is used by caller
249 # shellcheck disable=SC2034
253 ######################################################
254 # wrapper around /proc/ settings to allow them to be hooked
256 # 1st arg is relative path under /proc/, 2nd arg is value to set
259 echo "$2" >"/proc/$1"
264 if [ -w "/proc/$1" ] ; then
269 ######################################################
270 # wrapper around getting file contents from /proc/ to allow
271 # this to be hooked for testing
272 # 1st arg is relative path under /proc/
278 ######################################################
279 # Print up to $_max kernel stack traces for processes named $_program
280 program_stack_traces ()
286 for _pid in $(pidof "$_prog") ; do
287 [ "$_count" -le "$_max" ] || break
289 # Do this first to avoid racing with process exit
290 _stack=$(get_proc "${_pid}/stack" 2>/dev/null)
291 if [ -n "$_stack" ] ; then
292 echo "Stack trace for ${_prog}[${_pid}]:"
294 _count=$((_count + 1))
299 ######################################################
300 # Ensure $service_name is set
301 assert_service_name ()
303 [ -n "$service_name" ] || die "INTERNAL ERROR: \$service_name not set"
306 ######################################################
307 # check a set of directories is available
308 # return 1 on a missing directory
309 # directories are read from stdin
310 ######################################################
311 ctdb_check_directories_probe()
313 while IFS="" read d ; do
319 [ -d "${d}/." ] || return 1
324 ######################################################
325 # check a set of directories is available
326 # directories are read from stdin
327 ######################################################
328 ctdb_check_directories()
330 ctdb_check_directories_probe || {
331 echo "ERROR: $service_name directory \"$d\" not available"
336 ######################################################
337 # check a set of tcp ports
338 # usage: ctdb_check_tcp_ports <ports...>
339 ######################################################
341 # Check whether something is listening on all of the given TCP ports
342 # using the "ctdb checktcpport" command.
343 ctdb_check_tcp_ports()
345 if [ -z "$1" ] ; then
346 echo "INTERNAL ERROR: ctdb_check_tcp_ports - no ports specified"
350 for _p ; do # process each function argument (port)
351 _cmd="$CTDB checktcpport $_p"
356 echo "$service_name not listening on TCP port $_p"
360 # Couldn't bind, something already listening, next port
364 echo "unexpected error (${_ret}) running \"${_cmd}\""
365 if [ -n "$_out" ] ; then
373 # All ports listening
377 ######################################################
378 # check a unix socket
379 # usage: ctdb_check_unix_socket SERVICE_NAME <socket_path>
380 ######################################################
381 ctdb_check_unix_socket() {
383 [ -z "$socket_path" ] && return
385 if ! netstat --unix -a -n | grep -q "^unix.*LISTEN.*${socket_path}$"; then
386 echo "ERROR: $service_name socket $socket_path not found"
391 ################################################
392 # kill off any TCP connections with the given IP
393 ################################################
394 kill_tcp_connections ()
400 if [ "$3" = "oneway" ] ; then
404 get_tcp_connections_for_ip "$_ip" | {
409 while read _dst _src; do
410 _destport="${_dst##*:}"
413 # we only do one-way killtcp for CIFS
414 139|445) __oneway=true ;;
417 echo "Killing TCP connection $_src $_dst"
418 _connections="${_connections}${_nl}${_src} ${_dst}"
419 if ! $__oneway ; then
420 _connections="${_connections}${_nl}${_dst} ${_src}"
423 _killcount=$((_killcount + 1))
426 if [ $_killcount -eq 0 ] ; then
430 echo "$_connections" | \
431 "${CTDB_HELPER_BINDIR}/ctdb_killtcp" "$_iface" || {
432 echo "Failed to kill TCP connections"
436 _remaining=$(get_tcp_connections_for_ip "$_ip" | wc -l)
438 if [ "$_remaining" -eq 0 ] ; then
439 echo "Killed $_killcount TCP connections to released IP $_ip"
443 _t="${_remaining}/${_killcount}"
444 echo "Failed to kill TCP connections for IP $_ip (${_t} remaining)"
448 ##################################################################
449 # kill off the local end for any TCP connections with the given IP
450 ##################################################################
451 kill_tcp_connections_local_only ()
453 kill_tcp_connections "$@" "oneway"
456 ##################################################################
457 # tickle any TCP connections with the given IP
458 ##################################################################
459 tickle_tcp_connections ()
463 # Get connections, both directions
464 _conns=$(get_tcp_connections_for_ip "$_ip" | \
465 awk '{ print $1, $2 ; print $2, $1 }')
467 echo "$_conns" | awk '{ print "Tickle TCP connection", $1, $2 }'
468 echo "$_conns" | ctdb tickle
471 get_tcp_connections_for_ip ()
475 ss -tn state established "src [$_ip]" | awk 'NR > 1 {print $3, $4}'
478 ########################################################
486 # Ensure interface is up
487 ip link set "$_iface" up || \
488 die "Failed to bringup interface $_iface"
490 # Only need to define broadcast for IPv4
496 # Intentionally unquoted multi-word value here
497 # shellcheck disable=SC2086
498 ip addr add "$_ip/$_maskbits" $_bcast dev "$_iface" || {
499 echo "Failed to add $_ip/$_maskbits on dev $_iface"
503 # Wait 5 seconds for IPv6 addresses to stop being tentative...
504 if [ -z "$_bcast" ] ; then
505 for _x in $(seq 1 10) ; do
506 ip addr show to "${_ip}/128" | grep -q "tentative" || break
510 # If the address was a duplicate then it won't be on the
511 # interface so flag an error.
512 _t=$(ip addr show to "${_ip}/128")
515 echo "Failed to add $_ip/$_maskbits on dev $_iface"
518 *tentative*|*dadfailed*)
519 echo "Failed to add $_ip/$_maskbits on dev $_iface"
520 ip addr del "$_ip/$_maskbits" dev "$_iface"
527 delete_ip_from_iface()
533 # This could be set globally for all interfaces but it is probably
534 # better to avoid surprises, so limit it the interfaces where CTDB
535 # has public IP addresses. There isn't anywhere else convenient
536 # to do this so just set it each time. This is much cheaper than
537 # remembering and re-adding secondaries.
538 set_proc "sys/net/ipv4/conf/${_iface}/promote_secondaries" 1
540 ip addr del "$_ip/$_maskbits" dev "$_iface" || {
541 echo "Failed to del $_ip on dev $_iface"
546 # If the given IP is hosted then print 2 items: maskbits and iface
555 ip addr show to "${_addr}/${_bits}" 2>/dev/null | \
556 awk 'NR == 1 { iface = $2; sub(":$", "", iface) ;
557 sub("@.*", "", iface) }
558 $1 ~ /inet/ { mask = $2; sub(".*/", "", mask);
564 _addr="${1%/*}" # Remove optional maskbits
566 # Intentional word splitting here
567 # shellcheck disable=SC2046
568 set -- $(ip_maskbits_iface "$_addr")
569 if [ -n "$1" ] ; then
572 echo "Removing public address $_addr/$_maskbits from device $_iface"
573 delete_ip_from_iface "$_iface" "$_addr" "$_maskbits" >/dev/null 2>&1
577 drop_all_public_ips ()
579 # _x is intentionally ignored
580 # shellcheck disable=SC2034
581 while read _ip _x ; do
583 done <"${CTDB_PUBLIC_ADDRESSES:-/dev/null}"
588 set_proc_maybe sys/net/ipv4/route/flush 1
589 set_proc_maybe sys/net/ipv6/route/flush 1
592 ########################################################
593 # Interface monitoring
595 # If the interface is a virtual one (e.g. VLAN) then get the
596 # underlying interface
597 interface_get_real ()
599 # Output of "ip link show <iface>"
602 # Extract the full interface description to see if it is a VLAN
603 _t=$(echo "$_iface_info" |
604 awk 'NR == 1 { iface = $2; sub(":$", "", iface) ;
608 # VLAN: use the underlying interface, after the '@'
612 # Not a regular VLAN. For backward compatibility, assume
613 # there is some other sort of VLAN that doesn't have the
614 # '@' in the output and only use what is before a '.'. If
615 # there is no '.' then this will be the whole interface
621 # Check whether an interface is operational
626 _iface_info=$(ip link show "$_iface" 2>&1) || {
627 echo "ERROR: Monitored interface ${_iface} does not exist"
632 # If the interface is a virtual one (e.g. VLAN) then get the
633 # underlying interface.
634 _realiface=$(interface_get_real "$_iface_info")
636 if _bi=$(get_proc "net/bonding/${_realiface}" 2>/dev/null) ; then
637 # This is a bond: various monitoring strategies
638 echo "$_bi" | grep -q 'Currently Active Slave: None' && {
639 echo "ERROR: No active slaves for bond device ${_realiface}"
642 echo "$_bi" | grep -q '^MII Status: up' || {
643 echo "ERROR: public network interface ${_realiface} is down"
646 echo "$_bi" | grep -q '^Bonding Mode: IEEE 802.3ad Dynamic link aggregation' && {
647 # This works around a bug in the driver where the
648 # overall bond status can be up but none of the actual
649 # physical interfaces have a link.
650 echo "$_bi" | grep 'MII Status:' | tail -n +2 | grep -q '^MII Status: up' || {
651 echo "ERROR: No active slaves for 802.ad bond device ${_realiface}"
661 # loopback is always working
665 # we don't know how to test ib links
669 ethtool "$_iface" | grep -q 'Link detected: yes' || {
670 # On some systems, this is not successful when a
671 # cable is plugged but the interface has not been
672 # brought up previously. Bring the interface up
674 ip link set "$_iface" up
675 ethtool "$_iface" | grep -q 'Link detected: yes' || {
676 echo "ERROR: No link on the public network interface ${_iface}"
686 ########################################################
688 _ctdb_counter_common () {
689 _service_name="${1:-${service_name:-${script_name}}}"
690 _counter_file="${CTDB_SCRIPT_VARDIR}/failcount/${_service_name}"
691 mkdir -p "${_counter_file%/*}" # dirname
693 # Some code passes an argument
694 # shellcheck disable=SC2120
695 ctdb_counter_init () {
696 _ctdb_counter_common "$1"
700 ctdb_counter_incr () {
701 _ctdb_counter_common "$1"
703 # unary counting using newlines!
704 echo >>"$_counter_file"
706 ctdb_counter_get () {
707 _ctdb_counter_common "$1"
709 stat -c "%s" "$_counter_file" 2>/dev/null || echo 0
712 ########################################################
714 ctdb_setup_service_state_dir ()
716 _s="${1:-${service_name}}"
718 _service_state_dir="${CTDB_SCRIPT_VARDIR}/service_state/${_s}"
719 mkdir -p "$_service_state_dir" ||
720 die "Error creating state dir \"${_service_state_dir}\""
722 echo "$_service_state_dir"
725 ##################################################################
726 # Reconfigure a service on demand
728 _ctdb_service_reconfigure_common ()
730 _d="${CTDB_SCRIPT_VARDIR}/service_status/${service_name}"
732 _ctdb_service_reconfigure_flag="$_d/reconfigure"
735 ctdb_service_needs_reconfigure ()
737 _ctdb_service_reconfigure_common
738 [ -e "$_ctdb_service_reconfigure_flag" ]
741 ctdb_service_set_reconfigure ()
743 _ctdb_service_reconfigure_common
744 >"$_ctdb_service_reconfigure_flag"
747 ctdb_service_unset_reconfigure ()
749 _ctdb_service_reconfigure_common
750 rm -f "$_ctdb_service_reconfigure_flag"
753 ctdb_service_reconfigure ()
755 echo "Reconfiguring service \"${service_name}\"..."
756 ctdb_service_unset_reconfigure
757 service_reconfigure || return $?
758 # Intentionally have this use $service_name as default
759 # shellcheck disable=SC2119
763 # Default service_reconfigure() function does nothing.
764 service_reconfigure ()
769 ##################################################################
770 # Does CTDB manage this service?
772 ctdb_compat_managed_service ()
774 if [ "$1" = "yes" -a "$2" = "$service_name" ] ; then
775 CTDB_MANAGED_SERVICES="$CTDB_MANAGED_SERVICES $2"
779 is_ctdb_managed_service ()
783 # $t is used just for readability and to allow better accurate
784 # matching via leading/trailing spaces
785 t=" $CTDB_MANAGED_SERVICES "
787 # Return 0 if "<space>$service_name<space>" appears in $t
788 if [ "${t#* ${service_name} }" != "${t}" ] ; then
792 # If above didn't match then update $CTDB_MANAGED_SERVICES for
793 # backward compatibility and try again.
794 ctdb_compat_managed_service "$CTDB_MANAGES_VSFTPD" "vsftpd"
795 ctdb_compat_managed_service "$CTDB_MANAGES_SAMBA" "samba"
796 ctdb_compat_managed_service "$CTDB_MANAGES_WINBIND" "winbind"
797 ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD" "apache2"
798 ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD" "httpd"
799 ctdb_compat_managed_service "$CTDB_MANAGES_ISCSI" "iscsi"
800 ctdb_compat_managed_service "$CTDB_MANAGES_CLAMD" "clamd"
801 ctdb_compat_managed_service "$CTDB_MANAGES_NFS" "nfs"
803 t=" $CTDB_MANAGED_SERVICES "
805 # Return 0 if "<space>$service_name<space>" appears in $t
806 [ "${t#* ${service_name} }" != "${t}" ]
809 # Default service_start() and service_stop() functions.
811 # These may be overridden in an eventscript.
814 service "$service_name" start
819 service "$service_name" stop
822 ##################################################################
824 # This exists only for backward compatibility with 3rd party scripts
826 ctdb_standard_event_handler ()
834 if [ "$_family" = "inet6" ] ; then
835 _iptables_cmd="ip6tables"
837 _iptables_cmd="iptables"
840 # iptables doesn't like being re-entered, so flock-wrap it.
841 flock -w 30 "${CTDB_SCRIPT_VARDIR}/iptables.flock" "$_iptables_cmd" "$@"
844 # AIX (and perhaps others?) doesn't have mktemp
845 # type is commonly supported and more portable than which(1)
846 # shellcheck disable=SC2039
847 if ! type mktemp >/dev/null 2>&1 ; then
851 if [ "$1" = "-d" ] ; then
856 _hex10=$(dd if=/dev/urandom count=20 2>/dev/null | \
858 sed -e 's@\(..........\).*@\1@')
859 _t="${_d}/tmp.${_hex10}"
872 ######################################################################
873 # NFS callout handling
879 if [ -z "$CTDB_NFS_CALLOUT" ] ; then
880 CTDB_NFS_CALLOUT="${CTDB_BASE}/nfs-linux-kernel-callout"
882 # Always export, for statd callout
883 export CTDB_NFS_CALLOUT
885 # If the callout wants to use this then it must create it
886 export CTDB_NFS_CALLOUT_STATE_DIR="${_state_dir}/callout-state"
888 # Export, if set, for use by clustered NFS callouts
889 if [ -n "$CTDB_NFS_STATE_FS_TYPE" ] ; then
890 export CTDB_NFS_STATE_FS_TYPE
892 if [ -n "$CTDB_NFS_STATE_MNT" ] ; then
893 export CTDB_NFS_STATE_MNT
896 nfs_callout_cache="${_state_dir}/nfs_callout_cache"
897 nfs_callout_cache_callout="${nfs_callout_cache}/CTDB_NFS_CALLOUT"
898 nfs_callout_cache_ops="${nfs_callout_cache}/ops"
901 nfs_callout_register ()
903 mkdir -p "$nfs_callout_cache_ops"
904 rm -f "$nfs_callout_cache_ops"/*
906 echo "$CTDB_NFS_CALLOUT" >"$nfs_callout_cache_callout"
908 _t=$(eval "$CTDB_NFS_CALLOUT" "register")
909 if [ -n "$_t" ] ; then
911 while IFS="" read _op ; do
912 touch "${nfs_callout_cache_ops}/${_op}"
915 touch "${nfs_callout_cache_ops}/ALL"
921 # Re-run registration if $CTDB_NFS_CALLOUT has changed
923 if [ -r "$nfs_callout_cache_callout" ] ; then
924 read _prev <"$nfs_callout_cache_callout"
926 if [ "$CTDB_NFS_CALLOUT" != "$_prev" ] ; then
930 # Run the operation if it is registered...
931 if [ -e "${nfs_callout_cache_ops}/${1}" ] || \
932 [ -e "${nfs_callout_cache_ops}/ALL" ]; then
933 eval "$CTDB_NFS_CALLOUT" "$@"
937 ########################################################
939 ########################################################
945 tickledir="${CTDB_SCRIPT_VARDIR}/tickles"
946 mkdir -p "$tickledir"
948 # What public IPs do I hold?
950 _ips=$($CTDB -X ip | awk -F'|' -v pnn="$_pnn" '$3 == pnn {print $2}')
952 # IPs and port as ss filters
954 for _ip in $_ips ; do
955 _ip_filter="${_ip_filter}${_ip_filter:+ || }src [${_ip}]"
957 _port_filter="sport == :${_port}"
959 # Record connections to our public IPs in a temporary file.
960 # This temporary file is in CTDB's private state directory and
961 # $$ is used to avoid a very rare race involving CTDB's script
962 # debugging. No security issue, nothing to see here...
963 _my_connections="${tickledir}/${_port}.connections.$$"
964 # Parentheses are needed around the filters for precedence but
965 # the parentheses can't be empty!
966 ss -tn state established \
967 "${_ip_filter:+( ${_ip_filter} )}" \
968 "${_port_filter:+( ${_port_filter} )}" |
969 awk 'NR > 1 {print $4, $3}' |
970 sort >"$_my_connections"
972 # Record our current tickles in a temporary file
973 _my_tickles="${tickledir}/${_port}.tickles.$$"
975 $CTDB -X gettickles "$_i" "$_port" |
976 awk -F'|' 'NR > 1 { printf "%s:%s %s:%s\n", $2, $3, $4, $5 }'
980 # Add tickles for connections that we haven't already got tickles for
981 comm -23 "$_my_connections" "$_my_tickles" | \
984 # Remove tickles for connections that are no longer there
985 comm -13 "$_my_connections" "$_my_tickles" | \
988 rm -f "$_my_connections" "$_my_tickles"
990 # Remove stale files from killed scripts
991 # Files can't have spaces in name, more portable than -print0/-0
992 # shellcheck disable=SC2038
993 (cd "$tickledir" && find . -type f -mmin +10 | xargs -r rm)
996 ########################################################
997 # load a site local config file
998 ########################################################
1000 [ -n "$CTDB_RC_LOCAL" -a -x "$CTDB_RC_LOCAL" ] && {
1004 [ -x "${CTDB_BASE}/rc.local" ] && {
1005 . "${CTDB_BASE}/rc.local"
1008 [ -d "${CTDB_BASE}/rc.local.d" ] && {
1009 for i in "${CTDB_BASE}/rc.local.d"/* ; do
1010 [ -x "$i" ] && . "$i"
1014 script_name="${0##*/}" # basename