1 # Hey Emacs, this is a -*- shell-script -*- !!!
3 # utility functions for ctdb event scripts
5 if [ -z "$CTDB_BASE" ]; then
6 echo 'CTDB_BASE unset in CTDB functions file'
11 # CTDB_VARDIR is used elsewhere
12 # shellcheck disable=SC2034
13 CTDB_VARDIR="/usr/local/var/lib/ctdb"
15 CTDB="${CTDB:-/usr/local/bin/ctdb}"
17 # Only (and always) override these variables in test code
19 if [ -z "$CTDB_SCRIPT_VARDIR" ]; then
20 CTDB_SCRIPT_VARDIR="/usr/local/var/lib/ctdb/scripts"
23 if [ -z "$CTDB_SYS_ETCDIR" ]; then
24 CTDB_SYS_ETCDIR="/etc"
27 if [ -z "$CTDB_HELPER_BINDIR" ]; then
28 CTDB_HELPER_BINDIR="/usr/local/libexec/ctdb"
31 #######################################
32 # pull in a system config file, if any
38 if [ -f "${CTDB_SYS_ETCDIR}/sysconfig/${_i}" ]; then
39 . "${CTDB_SYS_ETCDIR}/sysconfig/${_i}"
41 elif [ -f "${CTDB_SYS_ETCDIR}/default/${_i}" ]; then
42 . "${CTDB_SYS_ETCDIR}/default/${_i}"
48 # load_script_options [ component script ]
49 # script is an event script name relative to a component
50 # component is currently ignored
55 elif [ $# -eq 0 ]; then
58 die "usage: load_script_options [ component script ]"
61 _options="${CTDB_BASE}/script.options"
63 if [ -r "$_options" ]; then
67 if [ -n "$_script" ]; then
68 _s="${CTDB_BASE}/events/legacy/${_script}"
72 _options="${_s}.options"
74 if [ -r "$_options" ]; then
79 ##############################################################
90 # Log given message or stdin to either syslog or a CTDB log file
91 # $1 is the tag passed to logger if syslog is in use.
97 case "$CTDB_LOGGING" in
106 if [ -n "$CTDB_LOGGING" ]; then
107 _file="${CTDB_LOGGING#file:}"
109 _file="/usr/local/var/log/log.ctdb"
120 # Handle all syslog:* variants here too. There's no tool to do
121 # the lossy things, so just use logger.
122 logger -t "ctdbd: ${_tag}" "$@"
127 # When things are run in the background in an eventscript then logging
128 # output might get lost. This is the "solution". :-)
129 background_with_logging()
132 "$@" 2>&1 </dev/null |
133 script_log "${script_name}&"
139 ##############################################################
140 # check number of args for different events
146 echo "ERROR: must supply interface, IP and maskbits"
152 echo "ERROR: must supply old interface, new interface, IP and maskbits"
159 ##############################################################
160 # determine on what type of system (init style) we are running
163 # only do detection if not already set:
164 if [ -n "$CTDB_INIT_STYLE" ]; then
168 if [ -x /sbin/startproc ]; then
169 CTDB_INIT_STYLE="suse"
170 elif [ -x /sbin/start-stop-daemon ]; then
171 CTDB_INIT_STYLE="debian"
173 CTDB_INIT_STYLE="redhat"
177 ######################################################
178 # simulate /sbin/service on platforms that don't have it
179 # _service() makes it easier to hook the service() function for
186 # do nothing, when no service was specified
187 [ -z "$_service_name" ] && return
189 if [ -x /sbin/service ]; then
190 $_nice /sbin/service "$_service_name" "$_op"
191 elif [ -x /usr/sbin/service ]; then
192 $_nice /usr/sbin/service "$_service_name" "$_op"
193 elif [ -x /bin/systemctl ]; then
194 $_nice /bin/systemctl "$_op" "$_service_name"
195 elif [ -x "${CTDB_SYS_ETCDIR}/init.d/${_service_name}" ]; then
196 $_nice "${CTDB_SYS_ETCDIR}/init.d/${_service_name}" "$_op"
197 elif [ -x "${CTDB_SYS_ETCDIR}/rc.d/init.d/${_service_name}" ]; then
198 $_nice "${CTDB_SYS_ETCDIR}/rc.d/init.d/${_service_name}" "$_op"
208 ######################################################
209 # simulate /sbin/service (niced) on platforms that don't have it
216 ######################################################
217 # Cached retrieval of PNN from local node. This never changes so why
218 # open a client connection to the server each time this is needed?
221 _pnn_file="${CTDB_SCRIPT_VARDIR}/my-pnn"
222 if [ ! -f "$_pnn_file" ]; then
223 $CTDB pnn >"$_pnn_file"
229 # Cached retrieval of private IP address from local node. This never
231 ctdb_get_ip_address()
233 _ip_addr_file="${CTDB_SCRIPT_VARDIR}/my-ip-address"
234 if [ ! -f "$_ip_addr_file" ]; then
235 $CTDB -X nodestatus |
236 awk -F '|' 'NR == 2 { print $3 }' >"$_ip_addr_file"
242 # Cached retrieval of database options for use by event scripts.
244 # If the variables are already set then they should not be overwritten
245 # - this should only happen during event script testing.
246 ctdb_get_db_options()
248 _db_opts_file="${CTDB_SCRIPT_VARDIR}/db_options.cache"
250 if [ ! -f "$_db_opts_file" ]; then
252 ctdb_translate_option "database" \
253 "volatile database directory" \
255 ctdb_translate_option "database" \
256 "persistent database directory" \
257 "CTDB_DBDIR_PERSISTENT"
258 ctdb_translate_option "database" \
259 "state database directory" \
267 ctdb_translate_option()
273 # ctdb-config already prints an error if something goes wrong
274 _t=$("${CTDB_HELPER_BINDIR}/ctdb-config" get "$_section" "$_opt") ||
276 echo "${_variable}=\"${_t}\""
279 ######################################################
280 # wrapper around /proc/ settings to allow them to be hooked
282 # 1st arg is relative path under /proc/, 2nd arg is value to set
285 echo "$2" >"/proc/$1"
290 if [ -w "/proc/$1" ]; then
295 ######################################################
296 # wrapper around getting file contents from /proc/ to allow
297 # this to be hooked for testing
298 # 1st arg is relative path under /proc/
304 ######################################################
305 # Print up to $_max kernel stack traces for processes named $_program
306 program_stack_traces()
312 for _pid in $(pidof "$_prog"); do
313 [ "$_count" -le "$_max" ] || break
315 # Do this first to avoid racing with process exit
316 _stack=$(get_proc "${_pid}/stack" 2>/dev/null)
317 if [ -n "$_stack" ]; then
318 echo "Stack trace for ${_prog}[${_pid}]:"
320 _count=$((_count + 1))
325 ######################################################
326 # Ensure $service_name is set
327 assert_service_name()
329 # service_name is set by the event script
330 # shellcheck disable=SC2154
331 [ -n "$service_name" ] || die "INTERNAL ERROR: \$service_name not set"
334 ######################################################
335 # check a set of directories is available
336 # return 1 on a missing directory
337 # directories are read from stdin
338 ######################################################
339 ctdb_check_directories_probe()
341 while IFS="" read -r d; do
347 [ -d "${d}/." ] || return 1
353 ######################################################
354 # check a set of directories is available
355 # directories are read from stdin
356 ######################################################
357 ctdb_check_directories()
359 ctdb_check_directories_probe || {
360 echo "ERROR: $service_name directory \"$d\" not available"
365 ######################################################
366 # check a set of tcp ports
367 # usage: ctdb_check_tcp_ports <ports...>
368 ######################################################
370 # Check whether something is listening on all of the given TCP ports
371 # using the "ctdb checktcpport" command.
372 ctdb_check_tcp_ports()
375 echo "INTERNAL ERROR: ctdb_check_tcp_ports - no ports specified"
379 for _p; do # process each function argument (port)
380 _cmd="$CTDB checktcpport $_p"
385 echo "$service_name not listening on TCP port $_p"
389 # Couldn't bind, something already listening, next port
393 echo "unexpected error (${_ret}) running \"${_cmd}\""
394 if [ -n "$_out" ]; then
402 # All ports listening
406 ######################################################
407 # check a unix socket
408 # usage: ctdb_check_unix_socket SOCKPATH
409 ######################################################
410 ctdb_check_unix_socket()
414 if [ -z "$_sockpath" ]; then
415 echo "ERROR: ctdb_check_unix_socket() requires socket path"
419 _out=$(ss -l -x "src ${_sockpath}" | tail -n +2)
420 if [ -z "$_out" ]; then
421 echo "ERROR: ${service_name} not listening on ${_sockpath}"
426 ################################################
427 # kill off any TCP connections with the given IP
428 ################################################
429 kill_tcp_connections()
435 if [ "$3" = "oneway" ]; then
439 get_tcp_connections_for_ip "$_ip" | {
444 while read -r _dst _src; do
445 _destport="${_dst##*:}"
448 # we only do one-way killtcp for CIFS
449 139 | 445) __oneway=true ;;
452 _connections="${_connections}${_nl}${_src} ${_dst}"
454 _connections="${_connections}${_nl}${_dst} ${_src}"
457 _killcount=$((_killcount + 1))
460 if [ $_killcount -eq 0 ]; then
464 if [ -n "$CTDB_KILLTCP_DEBUGLEVEL" ]; then
465 _debuglevel="$CTDB_KILLTCP_DEBUGLEVEL"
467 _debuglevel="$CTDB_DEBUGLEVEL"
469 echo "$_connections" |
470 CTDB_DEBUGLEVEL="$_debuglevel" \
471 "${CTDB_HELPER_BINDIR}/ctdb_killtcp" "$_iface" || {
472 echo "Failed to kill TCP connections"
476 _connections=$(get_tcp_connections_for_ip "$_ip")
477 if [ -z "$_connections" ]; then
480 _remaining=$(echo "$_connections" | wc -l)
483 _actually_killed=$((_killcount - _remaining))
485 _t="${_actually_killed}/${_killcount}"
486 echo "Killed ${_t} TCP connections to released IP $_ip"
488 if [ -n "$_connections" ]; then
489 echo "Remaining connections:"
490 echo "$_connections" | sed -e 's|^| |'
495 ##################################################################
496 # kill off the local end for any TCP connections with the given IP
497 ##################################################################
498 kill_tcp_connections_local_only()
500 kill_tcp_connections "$@" "oneway"
503 ##################################################################
504 # tickle any TCP connections with the given IP
505 ##################################################################
506 tickle_tcp_connections()
510 # Get connections, both directions
511 _conns=$(get_tcp_connections_for_ip "$_ip" |
512 awk '{ print $1, $2 ; print $2, $1 }')
514 echo "$_conns" | awk '{ print "Tickle TCP connection", $1, $2 }'
515 echo "$_conns" | ctdb tickle
518 get_tcp_connections_for_ip()
522 ss -tn state established "src [$_ip]" | awk 'NR > 1 {print $3, $4}'
525 ########################################################
533 # Ensure interface is up
534 ip link set "$_iface" up ||
535 die "Failed to bringup interface $_iface"
537 # Only need to define broadcast for IPv4
543 # Intentionally unquoted multi-word value here
544 # shellcheck disable=SC2086
545 ip addr add "$_ip/$_maskbits" $_bcast dev "$_iface" || {
546 echo "Failed to add $_ip/$_maskbits on dev $_iface"
550 # Wait 5 seconds for IPv6 addresses to stop being tentative...
551 if [ -z "$_bcast" ]; then
552 for _x in $(seq 1 10); do
553 ip addr show to "${_ip}/128" | grep -q "tentative" || break
557 # If the address was a duplicate then it won't be on the
558 # interface so flag an error.
559 _t=$(ip addr show to "${_ip}/128")
562 echo "Failed to add $_ip/$_maskbits on dev $_iface"
565 *tentative* | *dadfailed*)
566 echo "Failed to add $_ip/$_maskbits on dev $_iface"
567 ip addr del "$_ip/$_maskbits" dev "$_iface"
574 delete_ip_from_iface()
580 # This could be set globally for all interfaces but it is probably
581 # better to avoid surprises, so limit it the interfaces where CTDB
582 # has public IP addresses. There isn't anywhere else convenient
583 # to do this so just set it each time. This is much cheaper than
584 # remembering and re-adding secondaries.
585 set_proc "sys/net/ipv4/conf/${_iface}/promote_secondaries" 1
587 ip addr del "$_ip/$_maskbits" dev "$_iface" || {
588 echo "Failed to del $_ip on dev $_iface"
593 # If the given IP is hosted then print 2 items: maskbits and iface
602 ip addr show to "${_addr}/${_bits}" 2>/dev/null |
603 awk 'NR == 1 { iface = $2; sub(":$", "", iface) ;
604 sub("@.*", "", iface) }
605 $1 ~ /inet/ { mask = $2; sub(".*/", "", mask);
611 _addr="${1%/*}" # Remove optional maskbits
613 # Intentional word splitting here
614 # shellcheck disable=SC2046
615 set -- $(ip_maskbits_iface "$_addr")
619 echo "Removing public address $_addr/$_maskbits from device $_iface"
620 delete_ip_from_iface "$_iface" "$_addr" "$_maskbits" >/dev/null 2>&1
624 drop_all_public_ips()
626 # _x is intentionally ignored
627 # shellcheck disable=SC2034
628 while read -r _ip _x; do
633 done <"${CTDB_BASE}/public_addresses"
638 set_proc_maybe sys/net/ipv4/route/flush 1
639 set_proc_maybe sys/net/ipv6/route/flush 1
642 ########################################################
643 # Interface monitoring
645 # If the interface is a virtual one (e.g. VLAN) then get the
646 # underlying interface
651 # If $_iface is a VLAN (i.e. contains an '@') then strip every
652 # before the '@', otherwise print the whole interface
656 # Check whether an interface is operational
661 _iface_info=$(ip -br link show "$_iface" 2>&1) || {
662 echo "ERROR: Monitored interface ${_iface} does not exist"
666 # If the interface is a virtual one (e.g. VLAN) then get the
667 # underlying interface.
668 _realiface=$(interface_get_real "${_iface_info%% *}")
670 if _bi=$(get_proc "net/bonding/${_realiface}" 2>/dev/null); then
671 # This is a bond: various monitoring strategies
672 echo "$_bi" | grep -q 'Currently Active Slave: None' && {
673 echo "ERROR: No active slaves for bond device ${_realiface}"
676 echo "$_bi" | grep -q '^MII Status: up' || {
677 echo "ERROR: public network interface ${_realiface} is down"
680 echo "$_bi" | grep -q '^Bonding Mode: IEEE 802.3ad Dynamic link aggregation' && {
681 # This works around a bug in the driver where the
682 # overall bond status can be up but none of the actual
683 # physical interfaces have a link.
684 echo "$_bi" | grep 'MII Status:' | tail -n +2 | grep -q '^MII Status: up' || {
685 echo "ERROR: No active slaves for 802.ad bond device ${_realiface}"
695 # loopback is always working
699 # we don't know how to test ib links
703 ethtool "$_iface" | grep -q 'Link detected: yes' || {
704 # On some systems, this is not successful when a
705 # cable is plugged but the interface has not been
706 # brought up previously. Bring the interface up
708 ip link set "$_iface" up
709 ethtool "$_iface" | grep -q 'Link detected: yes' || {
710 echo "ERROR: No link on the public network interface ${_iface}"
720 ########################################################
722 _ctdb_counter_common()
724 [ $# -le 1 ] || die "usage: _ctdb_counter_common [name]"
726 if [ $# -eq 1 ]; then
727 _counter_name="${1}.failcount"
729 _counter_name="failcount"
732 if [ -z "$script_state_dir" ]; then
733 die "ctdb_counter_* functions need ctdb_setup_state_dir()"
736 _counter_file="${script_state_dir}/${_counter_name}"
738 # Some code passes an argument
739 # shellcheck disable=SC2120
742 _ctdb_counter_common "$1"
748 _ctdb_counter_common "$1"
750 # unary counting using newlines!
751 echo >>"$_counter_file"
755 _ctdb_counter_common "$1"
757 _val=$(wc -c <"$_counter_file" 2>/dev/null || echo 0)
758 # Strip leading spaces from output of wc (on freebsd)
759 # shellcheck disable=SC2086
763 ########################################################
765 # ctdb_setup_state_dir <type> <name>
766 # Sets/creates script_state_dir)
767 ctdb_setup_state_dir()
769 [ $# -eq 2 ] || die "usage: ctdb_setup_state_dir <type> <name>"
774 script_state_dir="${CTDB_SCRIPT_VARDIR}/${_type}/${_name}"
776 mkdir -p "$script_state_dir" ||
777 die "Error creating script state dir \"${script_state_dir}\""
780 ##################################################################
781 # Reconfigure a service on demand
783 _ctdb_service_reconfigure_common()
785 if [ -z "$script_state_dir" ]; then
786 die "ctdb_service_*_reconfigure() needs ctdb_setup_state_dir()"
789 _ctdb_service_reconfigure_flag="${script_state_dir}/need_reconfigure"
792 ctdb_service_needs_reconfigure()
794 _ctdb_service_reconfigure_common
795 [ -e "$_ctdb_service_reconfigure_flag" ]
798 ctdb_service_set_reconfigure()
800 _ctdb_service_reconfigure_common
801 : >"$_ctdb_service_reconfigure_flag"
804 ctdb_service_unset_reconfigure()
806 _ctdb_service_reconfigure_common
807 rm -f "$_ctdb_service_reconfigure_flag"
810 ctdb_service_reconfigure()
812 echo "Reconfiguring service \"${service_name}\"..."
813 ctdb_service_unset_reconfigure
814 service_reconfigure || return $?
815 # Intentionally have this use $service_name as default
816 # shellcheck disable=SC2119
820 # Default service_reconfigure() function does nothing.
821 service_reconfigure()
826 # Default service_start() and service_stop() functions.
828 # These may be overridden in an eventscript.
831 service "$service_name" start
836 service "$service_name" stop
839 ##################################################################
841 # This exists only for backward compatibility with 3rd party scripts
843 ctdb_standard_event_handler()
852 if [ "$_family" = "inet6" ]; then
853 _iptables_cmd="ip6tables"
855 _iptables_cmd="iptables"
858 # iptables doesn't like being re-entered, so flock-wrap it.
859 flock -w 30 "${CTDB_SCRIPT_VARDIR}/iptables.flock" "$_iptables_cmd" "$@"
862 # AIX (and perhaps others?) doesn't have mktemp
863 # type is commonly supported and more portable than which(1)
864 # shellcheck disable=SC2039
865 if ! type mktemp >/dev/null 2>&1; then
869 if [ "$1" = "-d" ]; then
874 _hex10=$(dd if=/dev/urandom count=20 2>/dev/null |
877 _t="${_d}/tmp.${_hex10}"
890 ######################################################################
891 # NFS callout handling
897 if [ -z "$CTDB_NFS_CALLOUT" ]; then
898 CTDB_NFS_CALLOUT="${CTDB_BASE}/nfs-linux-kernel-callout"
900 # Always export, for statd callout
901 export CTDB_NFS_CALLOUT
903 # If the callout wants to use this then it must create it
904 export CTDB_NFS_CALLOUT_STATE_DIR="${_state_dir}/callout-state"
906 # Export, if set, for use by clustered NFS callouts
907 if [ -n "$CTDB_NFS_STATE_FS_TYPE" ]; then
908 export CTDB_NFS_STATE_FS_TYPE
910 if [ -n "$CTDB_NFS_STATE_MNT" ]; then
911 export CTDB_NFS_STATE_MNT
914 nfs_callout_cache="${_state_dir}/nfs_callout_cache"
915 nfs_callout_cache_callout="${nfs_callout_cache}/CTDB_NFS_CALLOUT"
916 nfs_callout_cache_ops="${nfs_callout_cache}/ops"
919 nfs_callout_register()
921 mkdir -p "$nfs_callout_cache_ops"
922 rm -f "$nfs_callout_cache_ops"/*
924 echo "$CTDB_NFS_CALLOUT" >"$nfs_callout_cache_callout"
926 _t=$("$CTDB_NFS_CALLOUT" "register")
927 if [ -n "$_t" ]; then
929 while IFS="" read -r _op; do
930 touch "${nfs_callout_cache_ops}/${_op}"
933 touch "${nfs_callout_cache_ops}/ALL"
939 # Re-run registration if $CTDB_NFS_CALLOUT has changed
941 if [ -r "$nfs_callout_cache_callout" ]; then
942 read -r _prev <"$nfs_callout_cache_callout"
944 if [ "$CTDB_NFS_CALLOUT" != "$_prev" ]; then
948 # Run the operation if it is registered...
949 if [ -e "${nfs_callout_cache_ops}/${1}" ] ||
950 [ -e "${nfs_callout_cache_ops}/ALL" ]; then
951 "$CTDB_NFS_CALLOUT" "$@"
955 ########################################################
957 ########################################################
963 tickledir="${CTDB_SCRIPT_VARDIR}/tickles"
964 mkdir -p "$tickledir"
966 # What public IPs do I hold?
968 _ips=$($CTDB -X ip | awk -F'|' -v pnn="$_pnn" '$3 == pnn {print $2}')
970 # IPs and port as ss filters
973 _ip_filter="${_ip_filter}${_ip_filter:+ || }src [${_ip}]"
975 _port_filter="sport == :${_port}"
977 # Record connections to our public IPs in a temporary file.
978 # This temporary file is in CTDB's private state directory and
979 # $$ is used to avoid a very rare race involving CTDB's script
980 # debugging. No security issue, nothing to see here...
981 _my_connections="${tickledir}/${_port}.connections.$$"
982 # Parentheses are needed around the filters for precedence but
983 # the parentheses can't be empty!
985 # Recent versions of ss print square brackets around IPv6
986 # addresses. While it is desirable to update CTDB's address
987 # parsing and printing code, something needs to be done here
988 # for backward compatibility, so just delete the brackets.
989 ss -tn state established \
990 "${_ip_filter:+( ${_ip_filter} )}" \
991 "${_port_filter:+( ${_port_filter} )}" |
992 awk 'NR > 1 {print $4, $3}' |
994 sort >"$_my_connections"
996 # Record our current tickles in a temporary file
997 _my_tickles="${tickledir}/${_port}.tickles.$$"
999 $CTDB -X gettickles "$_i" "$_port" |
1000 awk -F'|' 'NR > 1 { printf "%s:%s %s:%s\n", $2, $3, $4, $5 }'
1002 sort >"$_my_tickles"
1004 # Add tickles for connections that we haven't already got tickles for
1005 comm -23 "$_my_connections" "$_my_tickles" |
1008 # Remove tickles for connections that are no longer there
1009 comm -13 "$_my_connections" "$_my_tickles" |
1012 rm -f "$_my_connections" "$_my_tickles"
1014 # Remove stale files from killed scripts
1015 # Files can't have spaces in name, more portable than -print0/-0
1016 # shellcheck disable=SC2038
1017 (cd "$tickledir" && find . -type f -mmin +10 | xargs -r rm)
1020 ########################################################
1021 # load a site local config file
1022 ########################################################
1024 [ -x "${CTDB_BASE}/rc.local" ] && {
1025 . "${CTDB_BASE}/rc.local"
1028 [ -d "${CTDB_BASE}/rc.local.d" ] && {
1029 for i in "${CTDB_BASE}/rc.local.d"/*; do
1030 [ -x "$i" ] && . "$i"
1034 script_name="${0##*/}" # basename