ganglia/gmond.conf

   1 /* This configuration is as close to 2.5.x default behavior as possible
   2    The values closely match ./gmond/metric.h definitions in 2.5.x */
   3 globals {
   4   daemonize = yes
   5   setuid = yes
   6   user = nobody
   7   debug_level = 0
   8   max_udp_msg_len = 1472
   9   mute = no
  10   deaf = no
  11   allow_extra_data = yes
  12   host_dmax = 0 /*secs */
  13   cleanup_threshold = 300 /*secs */
  14   gexec = no
  15   send_metadata_interval = 0
  16 }
  17
  18 /*
  19  * The cluster attributes specified will be used as part of the <CLUSTER>
  20  * tag that will wrap all hosts collected by this instance.
  21  */
  22 cluster {
  23   name = "unspecified"
  24   owner = "unspecified"
  25   latlong = "unspecified"
  26   url = "unspecified"
  27 }
  28
  29 /* The host section describes attributes of the host, like the location */
  30 host {
  31   location = "unspecified"
  32 }
  33
  34 /* Feel free to specify as many udp_send_channels as you like.  Gmond
  35    used to only support having a single channel */
  36 udp_send_channel {
  37   mcast_join = 239.2.11.71
  38   port = 8649
  39   ttl = 1
  40 }
  41
  42 /* You can specify as many udp_recv_channels as you like as well. */
  43 udp_recv_channel {
  44   mcast_join = 239.2.11.71
  45   port = 8649
  46   bind = 239.2.11.71
  47 }
  48
  49 /* You can specify as many tcp_accept_channels as you like to share
  50    an xml description of the state of the cluster */
  51 tcp_accept_channel {
  52   port = 8649
  53 }
  54
  55 /* Each metrics module that is referenced by gmond must be specified and
  56    loaded. If the module has been statically linked with gmond, it does
  57    not require a load path. However all dynamically loadable modules must
  58    include a load path. */
  59 modules {
  60   module {
  61     name = "core_metrics"
  62   }
  63   module {
  64     name = "cpu_module"
  65     path = "modcpu.so"
  66   }
  67   module {
  68     name = "disk_module"
  69     path = "moddisk.so"
  70   }
  71   module {
  72     name = "load_module"
  73     path = "modload.so"
  74   }
  75   module {
  76     name = "mem_module"
  77     path = "modmem.so"
  78   }
  79   module {
  80     name = "net_module"
  81     path = "modnet.so"
  82   }
  83   module {
  84     name = "proc_module"
  85     path = "modproc.so"
  86   }
  87   module {
  88     name = "sys_module"
  89     path = "modsys.so"
  90   }
  91 }
  92
  93 include ('/etc/ganglia/conf.d/*.conf')
  94
  95 /* The old internal 2.5.x metric array has been replaced by the following
  96    collection_group directives.  What follows is the default behavior for
  97    collecting and sending metrics that is as close to 2.5.x behavior as
  98    possible. */
  99
 100 /* This collection group will cause a heartbeat (or beacon) to be sent every
 101    20 seconds.  In the heartbeat is the GMOND_STARTED data which expresses
 102    the age of the running gmond. */
 103 collection_group {
 104   collect_once = yes
 105   time_threshold = 20
 106   metric {
 107     name = "heartbeat"
 108   }
 109 }
 110
 111 /* This collection group will send general info about this host every
 112    1200 secs.
 113    This information doesn't change between reboots and is only collected
 114    once. */
 115 collection_group {
 116   collect_once = yes
 117   time_threshold = 1200
 118   metric {
 119     name = "cpu_num"
 120     title = "CPU Count"
 121   }
 122   metric {
 123     name = "cpu_speed"
 124     title = "CPU Speed"
 125   }
 126   metric {
 127     name = "mem_total"
 128     title = "Memory Total"
 129   }
 130   /* Should this be here? Swap can be added/removed between reboots. */
 131   metric {
 132     name = "swap_total"
 133     title = "Swap Space Total"
 134   }
 135   metric {
 136     name = "boottime"
 137     title = "Last Boot Time"
 138   }
 139   metric {
 140     name = "machine_type"
 141     title = "Machine Type"
 142   }
 143   metric {
 144     name = "os_name"
 145     title = "Operating System"
 146   }
 147   metric {
 148     name = "os_release"
 149     title = "Operating System Release"
 150   }
 151   metric {
 152     name = "location"
 153     title = "Location"
 154   }
 155 }
 156
 157 /* This collection group will send the status of gexecd for this host
 158    every 300 secs.*/
 159 /* Unlike 2.5.x the default behavior is to report gexecd OFF. */
 160 collection_group {
 161   collect_once = yes
 162   time_threshold = 300
 163   metric {
 164     name = "gexec"
 165     title = "Gexec Status"
 166   }
 167 }
 168
 169 /* This collection group will collect the CPU status info every 20 secs.
 170    The time threshold is set to 90 seconds.  In honesty, this
 171    time_threshold could be set significantly higher to reduce
 172    unneccessary  network chatter. */
 173 collection_group {
 174   collect_every = 20
 175   time_threshold = 90
 176   /* CPU status */
 177   metric {
 178     name = "cpu_user"
 179     value_threshold = "1.0"
 180     title = "CPU User"
 181   }
 182   metric {
 183     name = "cpu_system"
 184     value_threshold = "1.0"
 185     title = "CPU System"
 186   }
 187   metric {
 188     name = "cpu_idle"
 189     value_threshold = "5.0"
 190     title = "CPU Idle"
 191   }
 192   metric {
 193     name = "cpu_nice"
 194     value_threshold = "1.0"
 195     title = "CPU Nice"
 196   }
 197   metric {
 198     name = "cpu_aidle"
 199     value_threshold = "5.0"
 200     title = "CPU aidle"
 201   }
 202   metric {
 203     name = "cpu_wio"
 204     value_threshold = "1.0"
 205     title = "CPU wio"
 206   }
 207   /* The next two metrics are optional if you want more detail...
 208      ... since they are accounted for in cpu_system.
 209   metric {
 210     name = "cpu_intr"
 211     value_threshold = "1.0"
 212     title = "CPU intr"
 213   }
 214   metric {
 215     name = "cpu_sintr"
 216     value_threshold = "1.0"
 217     title = "CPU sintr"
 218   }
 219   */
 220 }
 221
 222 collection_group {
 223   collect_every = 20
 224   time_threshold = 90
 225   /* Load Averages */
 226   metric {
 227     name = "load_one"
 228     value_threshold = "1.0"
 229     title = "One Minute Load Average"
 230   }
 231   metric {
 232     name = "load_five"
 233     value_threshold = "1.0"
 234     title = "Five Minute Load Average"
 235   }
 236   metric {
 237     name = "load_fifteen"
 238     value_threshold = "1.0"
 239     title = "Fifteen Minute Load Average"
 240   }
 241 }
 242
 243 /* This group collects the number of running and total processes */
 244 collection_group {
 245   collect_every = 80
 246   time_threshold = 950
 247   metric {
 248     name = "proc_run"
 249     value_threshold = "1.0"
 250     title = "Total Running Processes"
 251   }
 252   metric {
 253     name = "proc_total"
 254     value_threshold = "1.0"
 255     title = "Total Processes"
 256   }
 257 }
 258
 259 /* This collection group grabs the volatile memory metrics every 40 secs and
 260    sends them at least every 180 secs.  This time_threshold can be increased
 261    significantly to reduce unneeded network traffic. */
 262 collection_group {
 263   collect_every = 40
 264   time_threshold = 180
 265   metric {
 266     name = "mem_free"
 267     value_threshold = "1024.0"
 268     title = "Free Memory"
 269   }
 270   metric {
 271     name = "mem_shared"
 272     value_threshold = "1024.0"
 273     title = "Shared Memory"
 274   }
 275   metric {
 276     name = "mem_buffers"
 277     value_threshold = "1024.0"
 278     title = "Memory Buffers"
 279   }
 280   metric {
 281     name = "mem_cached"
 282     value_threshold = "1024.0"
 283     title = "Cached Memory"
 284   }
 285   metric {
 286     name = "swap_free"
 287     value_threshold = "1024.0"
 288     title = "Free Swap Space"
 289   }
 290 }
 291
 292 collection_group {
 293   collect_every = 40
 294   time_threshold = 300
 295   metric {
 296     name = "bytes_out"
 297     value_threshold = 4096
 298     title = "Bytes Sent"
 299   }
 300   metric {
 301     name = "bytes_in"
 302     value_threshold = 4096
 303     title = "Bytes Received"
 304   }
 305   metric {
 306     name = "pkts_in"
 307     value_threshold = 256
 308     title = "Packets Received"
 309   }
 310   metric {
 311     name = "pkts_out"
 312     value_threshold = 256
 313     title = "Packets Sent"
 314   }
 315 }
 316
 317 /* Different than 2.5.x default since the old config made no sense */
 318 collection_group {
 319   collect_every = 1800
 320   time_threshold = 3600
 321   metric {
 322     name = "disk_total"
 323     value_threshold = 1.0
 324     title = "Total Disk Space"
 325   }
 326 }
 327
 328 collection_group {
 329   collect_every = 40
 330   time_threshold = 180
 331   metric {
 332     name = "disk_free"
 333     value_threshold = 1.0
 334     title = "Disk Space Available"
 335   }
 336   metric {
 337     name = "part_max_used"
 338     value_threshold = 1.0
 339     title = "Maximum Disk Space Used"
 340   }
 341 }
 342