2 * Linux /proc/stat metrics cluster
4 * Copyright (c) 2012-2014,2017 Red Hat.
5 * Copyright (c) 2008-2009 Aconex. All Rights Reserved.
6 * Copyright (c) 2000,2004-2008 Silicon Graphics, Inc. All Rights Reserved.
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the
10 * Free Software Foundation; either version 2 of the License, or (at your
11 * option) any later version.
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 #include "proc_stat.h"
25 * Allocate instance identifiers for all CPUs. Note there is a
26 * need to deal with CPUs/nodes going online and offline during
27 * the life of the PMDA - usually we're only able to get values
28 * for online resources (/proc/stat reports online CPUs only).
30 * We must create a direct mapping of CPU ID to instance ID, for
31 * historical reasons. So initially all have a NULL private data
32 * pointer associated with them, which we'll subsequently fill in
33 * if/when the CPU/node is discovered to be online (later).
36 setup_cpu_indom(pmInDom cpus
)
42 _pm_ncpus
= 1; /* sanity, surely there must be at least one CPU */
44 pmdaCacheOp(cpus
, PMDA_CACHE_CULL
);
45 for (i
= 0; i
< _pm_ncpus
; i
++) {
46 snprintf(name
, sizeof(name
)-1, "cpu%u", i
);
47 pmdaCacheStore(cpus
, PMDA_CACHE_ADD
, name
, NULL
);
52 setup_cpu_info(cpuinfo_t
*cip
)
63 cpu_add(pmInDom cpus
, unsigned int cpuid
, unsigned int nodeid
)
68 if ((cpu
= (percpu_t
*)calloc(1, sizeof(percpu_t
))) == NULL
)
72 setup_cpu_info(&cpu
->info
);
73 snprintf(name
, sizeof(name
)-1, "cpu%u", cpuid
);
74 pmdaCacheStore(cpus
, PMDA_CACHE_ADD
, name
, (void*)cpu
);
78 node_add(pmInDom nodes
, unsigned int nodeid
)
83 if ((node
= (pernode_t
*)calloc(1, sizeof(pernode_t
))) == NULL
)
85 node
->nodeid
= nodeid
;
86 snprintf(name
, sizeof(name
)-1, "node%u", nodeid
);
87 pmdaCacheStore(nodes
, PMDA_CACHE_ADD
, name
, (void*)node
);
93 const char *node_path
= "sys/devices/system/node";
95 unsigned int cpu
, node
;
96 struct dirent
**node_files
= NULL
;
97 struct dirent
*cpu_entry
;
100 char path
[MAXPATHLEN
];
107 nodes
= INDOM(NODE_INDOM
);
108 cpus
= INDOM(CPU_INDOM
);
109 setup_cpu_indom(cpus
);
111 snprintf(path
, sizeof(path
), "%s/%s", linux_statspath
, node_path
);
112 count
= scandir(path
, &node_files
, NULL
, versionsort
);
113 if (!node_files
|| linux_test_mode
) {
114 /* QA mode or no sysfs support, assume single NUMA node */
115 node_add(nodes
, 0); /* default to just node zero */
116 for (cpu
= 0; cpu
< _pm_ncpus
; cpu
++)
117 cpu_add(cpus
, cpu
, 0); /* all in node zero */
121 for (i
= 0; i
< count
; i
++) {
122 if (sscanf(node_files
[i
]->d_name
, "node%u", &node
) != 1)
124 node_add(nodes
, node
);
125 snprintf(path
, sizeof(path
), "%s/%s/%s",
126 linux_statspath
, node_path
, node_files
[i
]->d_name
);
127 if ((cpu_dir
= opendir(path
)) == NULL
)
129 while ((cpu_entry
= readdir(cpu_dir
)) != NULL
) {
130 if (sscanf(cpu_entry
->d_name
, "cpu%u", &cpu
) != 1)
132 cpu_add(cpus
, cpu
, node
);
139 for (i
= 0; i
< count
; i
++)
146 find_line_format(const char *fmt
, int fmtlen
, char **bufindex
, int nbufindex
, int start
)
150 if (start
< nbufindex
-1 && strncmp(fmt
, bufindex
[++start
], fmtlen
) == 0)
151 return start
; /* fast-path, next line found where expected */
153 for (j
= 0; j
< nbufindex
; j
++) {
154 if (strncmp(fmt
, bufindex
[j
], 5) != 0)
162 * We use /proc/stat as a single source of truth regarding online/offline
163 * state for CPUs (its per-CPU stats are for online CPUs only).
164 * This drives the contents of the CPU indom for all per-CPU metrics, so
165 * it is important to ensure this refresh routine is called first before
166 * refreshing any other per-CPU metrics (e.g. interrupts, softnet).
169 refresh_proc_stat(proc_stat_t
*proc_stat
)
174 char buf
[MAXPATHLEN
], *name
;
177 static int fd
= -1; /* kept open until exit(), unless testing */
178 static char *statbuf
;
179 static int maxstatbuf
;
180 static char **bufindex
;
181 static int nbufindex
;
182 static int maxbufindex
;
185 cpus
= INDOM(CPU_INDOM
);
186 pmdaCacheOp(cpus
, PMDA_CACHE_INACTIVE
);
187 nodes
= INDOM(NODE_INDOM
);
189 /* reset per-node aggregate CPU utilisation stats */
190 for (pmdaCacheOp(nodes
, PMDA_CACHE_WALK_REWIND
);;) {
191 if ((i
= pmdaCacheOp(nodes
, PMDA_CACHE_WALK_NEXT
)) < 0)
193 if (!pmdaCacheLookup(nodes
, i
, NULL
, (void **)&np
) || !np
)
195 memset(&np
->stat
, 0, sizeof(np
->stat
));
198 /* in test mode we can replace procfs files (keeping open thwarts that) */
199 if (fd
>= 0 && linux_test_mode
) {
205 if (lseek(fd
, 0, SEEK_SET
) < 0)
208 snprintf(buf
, sizeof(buf
), "%s/proc/stat", linux_statspath
);
209 if ((fd
= open(buf
, O_RDONLY
)) < 0)
214 while (n
>= maxstatbuf
) {
215 size
= maxstatbuf
+ 512;
216 if ((statbuf
= (char *)realloc(statbuf
, size
)) == NULL
)
220 size
= (statbuf
+ maxstatbuf
) - (statbuf
+ n
);
221 if ((i
= read(fd
, statbuf
+ n
, size
)) > 0)
228 if (bufindex
== NULL
) {
229 size
= 16 * sizeof(char *);
230 if ((bufindex
= (char **)malloc(size
)) == NULL
)
236 bufindex
[nbufindex
] = statbuf
;
237 for (i
= 0; i
< n
; i
++) {
238 if (statbuf
[i
] == '\n' || statbuf
[i
] == '\0') {
240 if (nbufindex
+ 1 >= maxbufindex
) {
241 size
= (maxbufindex
+ 4) * sizeof(char *);
242 if ((bufindex
= (char **)realloc(bufindex
, size
)) == NULL
)
246 bufindex
[++nbufindex
] = statbuf
+ i
+ 1;
250 #define ALLCPU_FMT "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu"
251 n
= sscanf((const char *)bufindex
[0], ALLCPU_FMT
,
252 &proc_stat
->all
.user
, &proc_stat
->all
.nice
,
253 &proc_stat
->all
.sys
, &proc_stat
->all
.idle
,
254 &proc_stat
->all
.wait
, &proc_stat
->all
.irq
,
255 &proc_stat
->all
.sirq
, &proc_stat
->all
.steal
,
256 &proc_stat
->all
.guest
, &proc_stat
->all
.guest_nice
);
258 #define PERCPU_FMT "cpu%u %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu"
261 * e.g. cpu0 95379 4 20053 6502503
262 * 2.6 kernels have 3 additional fields for wait, irq and soft_irq.
263 * More recent (2008) 2.6 kernels have an extra field for guest and
264 * also (since 2009) guest_nice.
265 * In the single-CPU system case, don't bother scanning, use "all";
266 * this handles non-SMP kernels with no line starting with "cpu0".
268 if ((size
= pmdaCacheOp(cpus
, PMDA_CACHE_SIZE
)) == 1) {
269 pmdaCacheLookup(cpus
, 0, &name
, (void **)&cp
);
270 memcpy(&cp
->stat
, &proc_stat
->all
, sizeof(cp
->stat
));
271 pmdaCacheStore(cpus
, PMDA_CACHE_ADD
, name
, (void *)cp
);
274 for (n
= 0; n
< nbufindex
; n
++) {
275 if (strncmp("cpu", bufindex
[n
], 3) != 0 ||
276 !isdigit((int)bufindex
[n
][3]))
280 i
= atoi(&bufindex
[n
][3]); /* extract CPU identifier */
281 if (pmdaCacheLookup(cpus
, i
, &name
, (void **)&cp
) < 0 || !cp
)
283 memset(&cp
->stat
, 0, sizeof(cp
->stat
));
284 sscanf(bufindex
[n
], PERCPU_FMT
, &i
,
285 &cp
->stat
.user
, &cp
->stat
.nice
, &cp
->stat
.sys
,
286 &cp
->stat
.idle
, &cp
->stat
.wait
, &cp
->stat
.irq
,
287 &cp
->stat
.sirq
, &cp
->stat
.steal
, &cp
->stat
.guest
,
288 &cp
->stat
.guest_nice
);
289 pmdaCacheStore(cpus
, PMDA_CACHE_ADD
, name
, (void *)cp
);
291 /* update per-node aggregate CPU utilisation stats as well */
292 if (pmdaCacheLookup(nodes
, cp
->nodeid
, NULL
, (void **)&np
) < 0)
294 np
->stat
.user
+= cp
->stat
.user
;
295 np
->stat
.nice
+= cp
->stat
.nice
;
296 np
->stat
.sys
+= cp
->stat
.sys
;
297 np
->stat
.idle
+= cp
->stat
.idle
;
298 np
->stat
.wait
+= cp
->stat
.wait
;
299 np
->stat
.irq
+= cp
->stat
.irq
;
300 np
->stat
.sirq
+= cp
->stat
.sirq
;
301 np
->stat
.steal
+= cp
->stat
.steal
;
302 np
->stat
.guest
+= cp
->stat
.guest
;
303 np
->stat
.guest_nice
+= cp
->stat
.guest_nice
;
309 #define PAGE_FMT "page %u %u" /* NB: moved to /proc/vmstat in 2.6 kernels */
310 if ((i
= find_line_format(PAGE_FMT
, 5, bufindex
, nbufindex
, i
)) >= 0)
311 sscanf((const char *)bufindex
[i
], PAGE_FMT
,
312 &proc_stat
->page
[0], &proc_stat
->page
[1]);
314 #define SWAP_FMT "swap %u %u" /* NB: moved to /proc/vmstat in 2.6 kernels */
315 if ((i
= find_line_format(SWAP_FMT
, 5, bufindex
, nbufindex
, i
)) >= 0)
316 sscanf((const char *)bufindex
[i
], SWAP_FMT
,
317 &proc_stat
->swap
[0], &proc_stat
->swap
[1]);
319 #define INTR_FMT "intr %llu" /* (export 1st 'total interrupts' value only) */
320 if ((i
= find_line_format(INTR_FMT
, 5, bufindex
, nbufindex
, i
)) >= 0)
321 sscanf((const char *)bufindex
[i
], INTR_FMT
, &proc_stat
->intr
);
323 #define CTXT_FMT "ctxt %llu"
324 if ((i
= find_line_format(CTXT_FMT
, 5, bufindex
, nbufindex
, i
)) >= 0)
325 sscanf((const char *)bufindex
[i
], CTXT_FMT
, &proc_stat
->ctxt
);
327 #define BTIME_FMT "btime %lu"
328 if ((i
= find_line_format(BTIME_FMT
, 6, bufindex
, nbufindex
, i
)) >= 0)
329 sscanf((const char *)bufindex
[i
], BTIME_FMT
, &proc_stat
->btime
);
331 #define PROCESSES_FMT "processes %lu"
332 if ((i
= find_line_format(PROCESSES_FMT
, 10, bufindex
, nbufindex
, i
)) >= 0)
333 sscanf((const char *)bufindex
[i
], PROCESSES_FMT
, &proc_stat
->processes
);
335 #define RUNNING_FMT "procs_running %lu"
336 if ((i
= find_line_format(RUNNING_FMT
, 14, bufindex
, nbufindex
, i
)) >= 0)
337 sscanf((const char *)bufindex
[i
], RUNNING_FMT
, &proc_stat
->procs_running
);
339 #define BLOCKED_FMT "procs_blocked %lu"
340 if ((i
= find_line_format(BLOCKED_FMT
, 14, bufindex
, nbufindex
, i
)) >= 0)
341 sscanf((const char *)bufindex
[i
], BLOCKED_FMT
, &proc_stat
->procs_blocked
);