From b7b6e25b3e26210ed196be7fc5848e3320b5c35b Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Mon, 3 Aug 2015 15:59:50 +1000 Subject: [PATCH] ctdb-scripts: Memory monitoring uses thresholds expressed as percentages CTDB_MONITOR_FREE_MEMORY and CTDB_MONITOR_FREE_MEMORY_WARN are now percentages that specify thresholds of acceptable memory usage. Memory/swap usage in tests also specified as percentages. Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs --- ctdb/config/events.d/05.system | 33 ++++++++++++------------ ctdb/doc/ctdbd.conf.5.xml | 17 ++++++------ ctdb/tests/eventscripts/05.system.monitor.011.sh | 2 +- ctdb/tests/eventscripts/05.system.monitor.012.sh | 4 +-- ctdb/tests/eventscripts/05.system.monitor.013.sh | 2 +- ctdb/tests/eventscripts/05.system.monitor.014.sh | 6 ++--- ctdb/tests/eventscripts/05.system.monitor.015.sh | 6 ++--- ctdb/tests/eventscripts/scripts/local.sh | 23 +++++++++-------- 8 files changed, 46 insertions(+), 47 deletions(-) diff --git a/ctdb/config/events.d/05.system b/ctdb/config/events.d/05.system index 4e5551e786a..da96254384e 100644 --- a/ctdb/config/events.d/05.system +++ b/ctdb/config/events.d/05.system @@ -72,26 +72,25 @@ monitor_memory_usage () _meminfo=$(get_proc "meminfo") set -- $(echo "$_meminfo" | awk ' -$1 == "MemAvailable:" { memavail += $2 / 1024 } -$1 == "MemFree:" { memfree += $2 / 1024 } -$1 == "Cached:" { memfree += $2 / 1024 } -$1 == "Buffers:" { memfree += $2 / 1024 } -$1 == "MemTotal:" { memtotal = $2 / 1024 } -$1 == "SwapFree:" { swapfree = $2 / 1024 } -$1 == "SwapTotal:" { swaptotal = $2 / 1024 } +$1 == "MemAvailable:" { memavail += $2 } +$1 == "MemFree:" { memfree += $2 } +$1 == "Cached:" { memfree += $2 } +$1 == "Buffers:" { memfree += $2 } +$1 == "MemTotal:" { memtotal = $2 } +$1 == "SwapFree:" { swapfree = $2 } +$1 == "SwapTotal:" { swaptotal = $2 } END { if (memavail != 0) { memfree = memavail ; } - print int(memfree), int(memtotal), int(swapfree) , int(swaptotal) + print int((memtotal - memfree) / memtotal * 100), + int((swaptotal - swapfree) / swaptotal * 100) }') - _mem_free="$1" - _mem_total="$2" - _swap_free="$3" - _swap_total="$4" + _mem_usage="$1" + _swap_usage="$2" # Shutdown CTDB when memory is below the configured limit if [ -n "$CTDB_MONITOR_FREE_MEMORY" ] ; then - if [ $_mem_free -le $CTDB_MONITOR_FREE_MEMORY ] ; then - echo "CRITICAL: OOM - ${_mem_free}MB free <= ${CTDB_MONITOR_FREE_MEMORY}MB (CTDB threshold)" + if [ $_mem_usage -ge $CTDB_MONITOR_FREE_MEMORY ] ; then + echo "CRITICAL: OOM - ${_mem_usage}% usage >= ${CTDB_MONITOR_FREE_MEMORY}% (CTDB threshold)" echo "CRITICAL: Shutting down CTDB!!!" echo "$_meminfo" ps auxfww @@ -104,14 +103,14 @@ END { # Warn when low on memory if [ -n "$CTDB_MONITOR_FREE_MEMORY_WARN" ] ; then - if [ $_mem_free -le $CTDB_MONITOR_FREE_MEMORY_WARN ] ; then - echo "WARNING: free memory is low - ${_mem_free}MB free <= ${CTDB_MONITOR_FREE_MEMORY_WARN}MB (CTDB threshold)" + if [ $_mem_usage -ge $CTDB_MONITOR_FREE_MEMORY_WARN ] ; then + echo "WARNING: memory usage is excessive - ${_mem_usage}% >= ${CTDB_MONITOR_FREE_MEMORY_WARN}% (CTDB threshold)" fi fi # We should never enter swap, so SwapTotal == SwapFree. if [ "$CTDB_CHECK_SWAP_IS_NOT_USED" = "yes" ] ; then - if [ "$_swap_total" != "$_swap_free" ] ; then + if [ $_swap_usage -gt 0 ] ; then echo We are swapping: echo "$_meminfo" ps auxfww diff --git a/ctdb/doc/ctdbd.conf.5.xml b/ctdb/doc/ctdbd.conf.5.xml index 77f75c8bf21..63c84aa5ac3 100644 --- a/ctdb/doc/ctdbd.conf.5.xml +++ b/ctdb/doc/ctdbd.conf.5.xml @@ -1336,11 +1336,11 @@ CTDB_PER_IP_ROUTING_TABLE_ID_HIGH=9000 CTDB_MONITOR_FREE_MEMORY=NUM - NUM is a lower limit on available system memory, expressed - in megabytes. If this is set and the amount of available - memory falls below this limit then some debug information - will be logged, the node will be disabled and then CTDB - will be shut down. + NUM is threshold of acceptable memory usage, expressed + as a percentage. If this is set and memory usage + reaches this limit then some debug information will be + logged, the node will be disabled and then CTDB will be + shut down. No default. @@ -1352,10 +1352,9 @@ CTDB_PER_IP_ROUTING_TABLE_ID_HIGH=9000 CTDB_MONITOR_FREE_MEMORY_WARN=NUM - NUM is a lower limit on available system memory, expressed - in megabytes. If this is set and the amount of available - memory falls below this limit then a warning will be - logged. + NUM is threshold of acceptable memory usage, expressed + as a percentage. If this is set and memory usage + reaches this limit then a warning will be logged. No default. diff --git a/ctdb/tests/eventscripts/05.system.monitor.011.sh b/ctdb/tests/eventscripts/05.system.monitor.011.sh index 4290d13e15d..79f55f06a91 100755 --- a/ctdb/tests/eventscripts/05.system.monitor.011.sh +++ b/ctdb/tests/eventscripts/05.system.monitor.011.sh @@ -4,7 +4,7 @@ define_test "Memory check, bad situation, no checks enabled" -setup_memcheck "bad" +setup_memcheck 100 100 CTDB_MONITOR_FREE_MEMORY="" CTDB_MONITOR_FREE_MEMORY_WARN="" diff --git a/ctdb/tests/eventscripts/05.system.monitor.012.sh b/ctdb/tests/eventscripts/05.system.monitor.012.sh index 6e9401233b2..6c06480824a 100755 --- a/ctdb/tests/eventscripts/05.system.monitor.012.sh +++ b/ctdb/tests/eventscripts/05.system.monitor.012.sh @@ -6,8 +6,8 @@ define_test "Memory check, good situation, all enabled" setup_memcheck -CTDB_MONITOR_FREE_MEMORY="500" -CTDB_MONITOR_FREE_MEMORY_WARN="1000" +CTDB_MONITOR_FREE_MEMORY="90" +CTDB_MONITOR_FREE_MEMORY_WARN="80" CTDB_CHECK_SWAP_IS_NOT_USED="yes" ok_null diff --git a/ctdb/tests/eventscripts/05.system.monitor.013.sh b/ctdb/tests/eventscripts/05.system.monitor.013.sh index 9e63ab50fd3..dc3d40d0fa9 100755 --- a/ctdb/tests/eventscripts/05.system.monitor.013.sh +++ b/ctdb/tests/eventscripts/05.system.monitor.013.sh @@ -4,7 +4,7 @@ define_test "Memory check, bad situation, only swap check" -setup_memcheck "bad" +setup_memcheck 100 10 CTDB_MONITOR_FREE_MEMORY="" CTDB_MONITOR_FREE_MEMORY_WARN="" diff --git a/ctdb/tests/eventscripts/05.system.monitor.014.sh b/ctdb/tests/eventscripts/05.system.monitor.014.sh index fdf20329e2d..64c07416445 100755 --- a/ctdb/tests/eventscripts/05.system.monitor.014.sh +++ b/ctdb/tests/eventscripts/05.system.monitor.014.sh @@ -4,14 +4,14 @@ define_test "Memory check, bad situation, only memory warning" -setup_memcheck "bad" +setup_memcheck 90 10 CTDB_MONITOR_FREE_MEMORY="" -CTDB_MONITOR_FREE_MEMORY_WARN="500" +CTDB_MONITOR_FREE_MEMORY_WARN="85" CTDB_CHECK_SWAP_IS_NOT_USED="no" ok <= 85% (CTDB threshold) EOF simple_test diff --git a/ctdb/tests/eventscripts/05.system.monitor.015.sh b/ctdb/tests/eventscripts/05.system.monitor.015.sh index a46851a573d..e950bbd276e 100755 --- a/ctdb/tests/eventscripts/05.system.monitor.015.sh +++ b/ctdb/tests/eventscripts/05.system.monitor.015.sh @@ -4,14 +4,14 @@ define_test "Memory check, bad situation, only memory critical" -setup_memcheck "bad" +setup_memcheck 90 0 -CTDB_MONITOR_FREE_MEMORY="500" +CTDB_MONITOR_FREE_MEMORY="85" CTDB_MONITOR_FREE_MEMORY_WARN="" CTDB_CHECK_SWAP_IS_NOT_USED="no" ok <= 85% (CTDB threshold) CRITICAL: Shutting down CTDB!!! $FAKE_PROC_MEMINFO $(ps foobar) diff --git a/ctdb/tests/eventscripts/scripts/local.sh b/ctdb/tests/eventscripts/scripts/local.sh index c50eeb5c7b2..ce1c2510bd0 100644 --- a/ctdb/tests/eventscripts/scripts/local.sh +++ b/ctdb/tests/eventscripts/scripts/local.sh @@ -338,22 +338,23 @@ validate_percentage () setup_memcheck () { + _mem_usage="${1:-10}" # Default is 10% + _swap_usage="${2:-0}" # Default is 0% + setup_ctdb - _swap_total="5857276" + _swap_total=5857276 + _swap_free=$(( (100 - $_swap_usage) * $_swap_total / 100 )) - if [ "$1" = "bad" ] ; then - _swap_free=" 4352" - _mem_cached="108568" - else - _swap_free="$_swap_total" - _mem_cached="1139348" - fi + _mem_total=3940712 + _mem_free=225268 + _mem_buffers=146120 + _mem_cached=$(( $_mem_total * (100 - $_mem_usage) / 100 - $_mem_free - $_mem_buffers )) export FAKE_PROC_MEMINFO="\ -MemTotal: 3940712 kB -MemFree: 225268 kB -Buffers: 146120 kB +MemTotal: ${_mem_total} kB +MemFree: ${_mem_free} kB +Buffers: ${_mem_buffers} kB Cached: ${_mem_cached} kB SwapCached: 56016 kB Active: 2422104 kB -- 2.11.4.GIT