pmdalinux: dynamic per-CPU and per-node instance domains
[pcp.git] / src / pmdas / linux / proc_stat.c
blobecf4b15c9cabb9a827a2ab87b40894b5183bd811
1 /*
2 * Linux /proc/stat metrics cluster
4 * Copyright (c) 2012-2014,2017 Red Hat.
5 * Copyright (c) 2008-2009 Aconex. All Rights Reserved.
6 * Copyright (c) 2000,2004-2008 Silicon Graphics, Inc. All Rights Reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the
10 * Free Software Foundation; either version 2 of the License, or (at your
11 * option) any later version.
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 * for more details.
18 #include "linux.h"
19 #include "proc_stat.h"
20 #include <sys/stat.h>
21 #include <dirent.h>
22 #include <ctype.h>
25 * Allocate instance identifiers for all CPUs. Note there is a
26 * need to deal with CPUs/nodes going online and offline during
27 * the life of the PMDA - usually we're only able to get values
28 * for online resources (/proc/stat reports online CPUs only).
30 * We must create a direct mapping of CPU ID to instance ID, for
31 * historical reasons. So initially all have a NULL private data
32 * pointer associated with them, which we'll subsequently fill in
33 * if/when the CPU/node is discovered to be online (later).
35 static void
36 setup_cpu_indom(pmInDom cpus)
38 char name[64];
39 int i;
41 if (_pm_ncpus < 1)
42 _pm_ncpus = 1; /* sanity, surely there must be at least one CPU */
44 pmdaCacheOp(cpus, PMDA_CACHE_CULL);
45 for (i = 0; i < _pm_ncpus; i++) {
46 snprintf(name, sizeof(name)-1, "cpu%u", i);
47 pmdaCacheStore(cpus, PMDA_CACHE_ADD, name, NULL);
51 void
52 setup_cpu_info(cpuinfo_t *cip)
54 cip->sapic = -1;
55 cip->vendor = -1;
56 cip->model = -1;
57 cip->model_name = -1;
58 cip->stepping = -1;
59 cip->flags = -1;
62 static void
63 cpu_add(pmInDom cpus, unsigned int cpuid, unsigned int nodeid)
65 percpu_t *cpu;
66 char name[64];
68 if ((cpu = (percpu_t *)calloc(1, sizeof(percpu_t))) == NULL)
69 return;
70 cpu->cpuid = cpuid;
71 cpu->nodeid = nodeid;
72 setup_cpu_info(&cpu->info);
73 snprintf(name, sizeof(name)-1, "cpu%u", cpuid);
74 pmdaCacheStore(cpus, PMDA_CACHE_ADD, name, (void*)cpu);
77 static void
78 node_add(pmInDom nodes, unsigned int nodeid)
80 pernode_t *node;
81 char name[64];
83 if ((node = (pernode_t *)calloc(1, sizeof(pernode_t))) == NULL)
84 return;
85 node->nodeid = nodeid;
86 snprintf(name, sizeof(name)-1, "node%u", nodeid);
87 pmdaCacheStore(nodes, PMDA_CACHE_ADD, name, (void*)node);
90 void
91 cpu_node_setup(void)
93 const char *node_path = "sys/devices/system/node";
94 pmInDom cpus, nodes;
95 unsigned int cpu, node;
96 struct dirent **node_files = NULL;
97 struct dirent *cpu_entry;
98 DIR *cpu_dir;
99 int i, count;
100 char path[MAXPATHLEN];
101 static int setup;
103 if (setup)
104 return;
105 setup = 1;
107 nodes = INDOM(NODE_INDOM);
108 cpus = INDOM(CPU_INDOM);
109 setup_cpu_indom(cpus);
111 snprintf(path, sizeof(path), "%s/%s", linux_statspath, node_path);
112 count = scandir(path, &node_files, NULL, versionsort);
113 if (!node_files || linux_test_mode) {
114 /* QA mode or no sysfs support, assume single NUMA node */
115 node_add(nodes, 0); /* default to just node zero */
116 for (cpu = 0; cpu < _pm_ncpus; cpu++)
117 cpu_add(cpus, cpu, 0); /* all in node zero */
118 goto done;
121 for (i = 0; i < count; i++) {
122 if (sscanf(node_files[i]->d_name, "node%u", &node) != 1)
123 continue;
124 node_add(nodes, node);
125 snprintf(path, sizeof(path), "%s/%s/%s",
126 linux_statspath, node_path, node_files[i]->d_name);
127 if ((cpu_dir = opendir(path)) == NULL)
128 continue;
129 while ((cpu_entry = readdir(cpu_dir)) != NULL) {
130 if (sscanf(cpu_entry->d_name, "cpu%u", &cpu) != 1)
131 continue;
132 cpu_add(cpus, cpu, node);
134 closedir(cpu_dir);
137 done:
138 if (node_files) {
139 for (i = 0; i < count; i++)
140 free(node_files[i]);
141 free(node_files);
145 static int
146 find_line_format(const char *fmt, int fmtlen, char **bufindex, int nbufindex, int start)
148 int j;
150 if (start < nbufindex-1 && strncmp(fmt, bufindex[++start], fmtlen) == 0)
151 return start; /* fast-path, next line found where expected */
153 for (j = 0; j < nbufindex; j++) {
154 if (strncmp(fmt, bufindex[j], 5) != 0)
155 continue;
156 return j;
158 return -1;
162 * We use /proc/stat as a single source of truth regarding online/offline
163 * state for CPUs (its per-CPU stats are for online CPUs only).
164 * This drives the contents of the CPU indom for all per-CPU metrics, so
165 * it is important to ensure this refresh routine is called first before
166 * refreshing any other per-CPU metrics (e.g. interrupts, softnet).
169 refresh_proc_stat(proc_stat_t *proc_stat)
171 pernode_t *np;
172 percpu_t *cp;
173 pmInDom cpus, nodes;
174 char buf[MAXPATHLEN], *name;
175 int n = 0, i, size;
177 static int fd = -1; /* kept open until exit(), unless testing */
178 static char *statbuf;
179 static int maxstatbuf;
180 static char **bufindex;
181 static int nbufindex;
182 static int maxbufindex;
184 cpu_node_setup();
185 cpus = INDOM(CPU_INDOM);
186 pmdaCacheOp(cpus, PMDA_CACHE_INACTIVE);
187 nodes = INDOM(NODE_INDOM);
189 /* reset per-node aggregate CPU utilisation stats */
190 for (pmdaCacheOp(nodes, PMDA_CACHE_WALK_REWIND);;) {
191 if ((i = pmdaCacheOp(nodes, PMDA_CACHE_WALK_NEXT)) < 0)
192 break;
193 if (!pmdaCacheLookup(nodes, i, NULL, (void **)&np) || !np)
194 continue;
195 memset(&np->stat, 0, sizeof(np->stat));
198 /* in test mode we can replace procfs files (keeping open thwarts that) */
199 if (fd >= 0 && linux_test_mode) {
200 close(fd);
201 fd = -1;
204 if (fd >= 0) {
205 if (lseek(fd, 0, SEEK_SET) < 0)
206 return -oserror();
207 } else {
208 snprintf(buf, sizeof(buf), "%s/proc/stat", linux_statspath);
209 if ((fd = open(buf, O_RDONLY)) < 0)
210 return -oserror();
213 for (;;) {
214 while (n >= maxstatbuf) {
215 size = maxstatbuf + 512;
216 if ((statbuf = (char *)realloc(statbuf, size)) == NULL)
217 return -ENOMEM;
218 maxstatbuf = size;
220 size = (statbuf + maxstatbuf) - (statbuf + n);
221 if ((i = read(fd, statbuf + n, size)) > 0)
222 n += i;
223 else
224 break;
226 statbuf[n] = '\0';
228 if (bufindex == NULL) {
229 size = 16 * sizeof(char *);
230 if ((bufindex = (char **)malloc(size)) == NULL)
231 return -ENOMEM;
232 maxbufindex = 16;
235 nbufindex = 0;
236 bufindex[nbufindex] = statbuf;
237 for (i = 0; i < n; i++) {
238 if (statbuf[i] == '\n' || statbuf[i] == '\0') {
239 statbuf[i] = '\0';
240 if (nbufindex + 1 >= maxbufindex) {
241 size = (maxbufindex + 4) * sizeof(char *);
242 if ((bufindex = (char **)realloc(bufindex, size)) == NULL)
243 return -ENOMEM;
244 maxbufindex += 4;
246 bufindex[++nbufindex] = statbuf + i + 1;
250 #define ALLCPU_FMT "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu"
251 n = sscanf((const char *)bufindex[0], ALLCPU_FMT,
252 &proc_stat->all.user, &proc_stat->all.nice,
253 &proc_stat->all.sys, &proc_stat->all.idle,
254 &proc_stat->all.wait, &proc_stat->all.irq,
255 &proc_stat->all.sirq, &proc_stat->all.steal,
256 &proc_stat->all.guest, &proc_stat->all.guest_nice);
258 #define PERCPU_FMT "cpu%u %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu"
260 * per-CPU stats
261 * e.g. cpu0 95379 4 20053 6502503
262 * 2.6 kernels have 3 additional fields for wait, irq and soft_irq.
263 * More recent (2008) 2.6 kernels have an extra field for guest and
264 * also (since 2009) guest_nice.
265 * In the single-CPU system case, don't bother scanning, use "all";
266 * this handles non-SMP kernels with no line starting with "cpu0".
268 if ((size = pmdaCacheOp(cpus, PMDA_CACHE_SIZE)) == 1) {
269 pmdaCacheLookup(cpus, 0, &name, (void **)&cp);
270 memcpy(&cp->stat, &proc_stat->all, sizeof(cp->stat));
271 pmdaCacheStore(cpus, PMDA_CACHE_ADD, name, (void *)cp);
273 else {
274 for (n = 0; n < nbufindex; n++) {
275 if (strncmp("cpu", bufindex[n], 3) != 0 ||
276 !isdigit((int)bufindex[n][3]))
277 continue;
278 cp = NULL;
279 np = NULL;
280 i = atoi(&bufindex[n][3]); /* extract CPU identifier */
281 if (pmdaCacheLookup(cpus, i, &name, (void **)&cp) < 0 || !cp)
282 continue;
283 memset(&cp->stat, 0, sizeof(cp->stat));
284 sscanf(bufindex[n], PERCPU_FMT, &i,
285 &cp->stat.user, &cp->stat.nice, &cp->stat.sys,
286 &cp->stat.idle, &cp->stat.wait, &cp->stat.irq,
287 &cp->stat.sirq, &cp->stat.steal, &cp->stat.guest,
288 &cp->stat.guest_nice);
289 pmdaCacheStore(cpus, PMDA_CACHE_ADD, name, (void *)cp);
291 /* update per-node aggregate CPU utilisation stats as well */
292 if (pmdaCacheLookup(nodes, cp->nodeid, NULL, (void **)&np) < 0)
293 continue;
294 np->stat.user += cp->stat.user;
295 np->stat.nice += cp->stat.nice;
296 np->stat.sys += cp->stat.sys;
297 np->stat.idle += cp->stat.idle;
298 np->stat.wait += cp->stat.wait;
299 np->stat.irq += cp->stat.irq;
300 np->stat.sirq += cp->stat.sirq;
301 np->stat.steal += cp->stat.steal;
302 np->stat.guest += cp->stat.guest;
303 np->stat.guest_nice += cp->stat.guest_nice;
307 i = size;
309 #define PAGE_FMT "page %u %u" /* NB: moved to /proc/vmstat in 2.6 kernels */
310 if ((i = find_line_format(PAGE_FMT, 5, bufindex, nbufindex, i)) >= 0)
311 sscanf((const char *)bufindex[i], PAGE_FMT,
312 &proc_stat->page[0], &proc_stat->page[1]);
314 #define SWAP_FMT "swap %u %u" /* NB: moved to /proc/vmstat in 2.6 kernels */
315 if ((i = find_line_format(SWAP_FMT, 5, bufindex, nbufindex, i)) >= 0)
316 sscanf((const char *)bufindex[i], SWAP_FMT,
317 &proc_stat->swap[0], &proc_stat->swap[1]);
319 #define INTR_FMT "intr %llu" /* (export 1st 'total interrupts' value only) */
320 if ((i = find_line_format(INTR_FMT, 5, bufindex, nbufindex, i)) >= 0)
321 sscanf((const char *)bufindex[i], INTR_FMT, &proc_stat->intr);
323 #define CTXT_FMT "ctxt %llu"
324 if ((i = find_line_format(CTXT_FMT, 5, bufindex, nbufindex, i)) >= 0)
325 sscanf((const char *)bufindex[i], CTXT_FMT, &proc_stat->ctxt);
327 #define BTIME_FMT "btime %lu"
328 if ((i = find_line_format(BTIME_FMT, 6, bufindex, nbufindex, i)) >= 0)
329 sscanf((const char *)bufindex[i], BTIME_FMT, &proc_stat->btime);
331 #define PROCESSES_FMT "processes %lu"
332 if ((i = find_line_format(PROCESSES_FMT, 10, bufindex, nbufindex, i)) >= 0)
333 sscanf((const char *)bufindex[i], PROCESSES_FMT, &proc_stat->processes);
335 #define RUNNING_FMT "procs_running %lu"
336 if ((i = find_line_format(RUNNING_FMT, 14, bufindex, nbufindex, i)) >= 0)
337 sscanf((const char *)bufindex[i], RUNNING_FMT, &proc_stat->procs_running);
339 #define BLOCKED_FMT "procs_blocked %lu"
340 if ((i = find_line_format(BLOCKED_FMT, 14, bufindex, nbufindex, i)) >= 0)
341 sscanf((const char *)bufindex[i], BLOCKED_FMT, &proc_stat->procs_blocked);
343 /* success */
344 return 0;