ctdb-pcp-pmda: Reimplement using new client API
[Samba.git] / ctdb / utils / pmda / pmda_ctdb.c
blob6131be9cd3e1e2bcc74bd301ee496e201ed569f9
1 /*
2 * CTDB Performance Metrics Domain Agent (PMDA) for Performance Co-Pilot (PCP)
4 * Copyright (c) 1995,2004 Silicon Graphics, Inc. All Rights Reserved.
5 * Copyright (c) 2011 David Disseldorp
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License as published by the
9 * Free Software Foundation; either version 2 of the License, or (at your
10 * option) any later version.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 * for more details.
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #include "replace.h"
23 #include "system/network.h"
25 #include <talloc.h>
26 #include <tevent.h>
27 #include <tdb.h>
29 #include "lib/util/time.h"
30 #include "lib/util/blocking.h"
32 #include "client/client.h"
34 #include <pcp/pmapi.h>
35 #include <pcp/impl.h>
36 #include <pcp/pmda.h>
38 #include "domain.h"
41 * CTDB PMDA
43 * This PMDA connects to the locally running ctdbd daemon and pulls
44 * statistics for export via PCP. The ctdbd Unix domain socket path can be
45 * specified with the CTDB_SOCKET environment variable, otherwise the default
46 * path is used.
50 * All metrics supported in this PMDA - one table entry for each.
51 * The 4th field specifies the serial number of the instance domain
52 * for the metric, and must be either PM_INDOM_NULL (denoting a
53 * metric that only ever has a single value), or the serial number
54 * of one of the instance domains declared in the instance domain table
55 * (i.e. in indomtab, above).
57 static pmdaMetric metrictab[] = {
58 /* num_clients */
59 { NULL, { PMDA_PMID(0,0), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
60 PMDA_PMUNITS(0,0,0,0,0,0) }, },
61 /* frozen */
62 { NULL, { PMDA_PMID(0,1), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
63 PMDA_PMUNITS(0,0,0,0,0,0) }, },
64 /* recovering */
65 { NULL, { PMDA_PMID(0,2), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
66 PMDA_PMUNITS(0,0,0,0,0,0) }, },
67 /* client_packets_sent */
68 { NULL, { PMDA_PMID(0,3), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
69 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
70 /* client_packets_recv */
71 { NULL, { PMDA_PMID(0,4), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
72 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
73 /* node_packets_sent */
74 { NULL, { PMDA_PMID(0,5), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
75 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
76 /* node_packets_recv */
77 { NULL, { PMDA_PMID(0,6), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
78 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
79 /* keepalive_packets_sent */
80 { NULL, { PMDA_PMID(0,7), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
81 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
82 /* keepalive_packets_recv */
83 { NULL, { PMDA_PMID(0,8), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
84 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
85 /* req_call */
86 { NULL, { PMDA_PMID(1,0), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
87 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
88 /* reply_call */
89 { NULL, { PMDA_PMID(1,1), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
90 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
91 /* req_dmaster */
92 { NULL, { PMDA_PMID(1,2), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
93 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
94 /* reply_dmaster */
95 { NULL, { PMDA_PMID(1,3), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
96 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
97 /* reply_error */
98 { NULL, { PMDA_PMID(1,4), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
99 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
100 /* req_message */
101 { NULL, { PMDA_PMID(1,5), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
102 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
103 /* req_control */
104 { NULL, { PMDA_PMID(1,6), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
105 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
106 /* reply_control */
107 { NULL, { PMDA_PMID(1,7), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
108 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
109 /* req_call */
110 { NULL, { PMDA_PMID(2,0), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
111 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
112 /* req_message */
113 { NULL, { PMDA_PMID(2,1), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
114 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
115 /* req_control */
116 { NULL, { PMDA_PMID(2,2), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
117 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
118 /* call */
119 { NULL, { PMDA_PMID(3,0), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
120 PMDA_PMUNITS(0,0,1,0,0,0) }, },
121 /* control */
122 { NULL, { PMDA_PMID(3,1), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
123 PMDA_PMUNITS(0,0,1,0,0,0) }, },
124 /* traverse */
125 { NULL, { PMDA_PMID(3,2), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
126 PMDA_PMUNITS(0,0,1,0,0,0) }, },
127 /* total_calls */
128 { NULL, { PMDA_PMID(0,9), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
129 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
130 /* pending_calls */
131 { NULL, { PMDA_PMID(0,10), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
132 PMDA_PMUNITS(0,0,0,0,0,0) }, },
133 /* locks.num_calls */
134 { NULL, { PMDA_PMID(0,11), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
135 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
136 /* locks.num_pending */
137 { NULL, { PMDA_PMID(0,12), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
138 PMDA_PMUNITS(0,0,0,0,0,0) }, },
139 /* childwrite_calls */
140 { NULL, { PMDA_PMID(0,13), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
141 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
142 /* pending_childwrite_calls */
143 { NULL, { PMDA_PMID(0,14), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
144 PMDA_PMUNITS(0,0,0,0,0,0) }, },
145 /* memory_used */
146 { NULL, { PMDA_PMID(0,15), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
147 PMDA_PMUNITS(1,0,0,PM_SPACE_BYTE,0,0) }, },
148 /* max_hop_count */
149 { NULL, { PMDA_PMID(0,16), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
150 PMDA_PMUNITS(0,0,0,0,0,0) }, },
151 /* reclock.ctdbd.max */
152 { NULL, { PMDA_PMID(0,17), PM_TYPE_DOUBLE, PM_INDOM_NULL, PM_SEM_INSTANT,
153 PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC,0) }, },
154 /* reclock.recd.max */
155 { NULL, { PMDA_PMID(0,18), PM_TYPE_DOUBLE, PM_INDOM_NULL, PM_SEM_INSTANT,
156 PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC,0) }, },
157 /* call_latency.max */
158 { NULL, { PMDA_PMID(0,19), PM_TYPE_DOUBLE, PM_INDOM_NULL, PM_SEM_INSTANT,
159 PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC,0) }, },
160 /* locks.latency.max */
161 { NULL, { PMDA_PMID(0,20), PM_TYPE_DOUBLE, PM_INDOM_NULL, PM_SEM_INSTANT,
162 PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC,0) }, },
163 /* childwrite_latency.max */
164 { NULL, { PMDA_PMID(0,21), PM_TYPE_DOUBLE, PM_INDOM_NULL, PM_SEM_INSTANT,
165 PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC,0) }, },
166 /* num_recoveries */
167 { NULL, { PMDA_PMID(0,22), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
168 PMDA_PMUNITS(0,0,0,0,0,0) }, },
171 static struct tevent_context *ev;
172 static struct ctdb_client_context *client;
173 static struct ctdb_statistics *stats;
175 static void
176 pmda_ctdb_disconnected(void *args)
178 fprintf(stderr, "ctdbd unreachable\n");
179 TALLOC_FREE(client);
183 static int
184 pmda_ctdb_daemon_connect(void)
186 const char *socket_name;
187 int ret;
189 ev = tevent_context_init(NULL);
190 if (ev == NULL) {
191 fprintf(stderr, "Failed to init event ctx\n");
192 return -1;
195 socket_name = getenv("CTDB_SOCKET");
196 if (socket_name == NULL) {
197 socket_name = CTDB_SOCKET;
200 ret = ctdb_client_init(ev, ev, socket_name, &client);
201 if (ret != 0) {
202 fprintf(stderr, "Failed to connect to ctdb daemon via %s\n",
203 socket_name);
204 goto err_ev;
207 ctdb_client_set_disconnect_callback(client, pmda_ctdb_disconnected,
208 NULL);
210 return 0;
212 err_ev:
213 talloc_free(ev);
214 client = NULL;
215 return -1;
218 static void
219 pmda_ctdb_daemon_disconnect(void)
221 TALLOC_FREE(client);
222 talloc_free(ev);
225 static int
226 fill_base(unsigned int item, pmAtomValue *atom)
228 switch (item) {
229 case 0:
230 atom->ul = stats->num_clients;
231 break;
232 case 1:
233 atom->ul = stats->frozen;
234 break;
235 case 2:
236 atom->ul = stats->recovering;
237 break;
238 case 3:
239 atom->ul = stats->client_packets_sent;
240 break;
241 case 4:
242 atom->ul = stats->client_packets_recv;
243 break;
244 case 5:
245 atom->ul = stats->node_packets_sent;
246 break;
247 case 6:
248 atom->ul = stats->node_packets_recv;
249 break;
250 case 7:
251 atom->ul = stats->keepalive_packets_sent;
252 break;
253 case 8:
254 atom->ul = stats->keepalive_packets_recv;
255 break;
256 case 9:
257 atom->ul = stats->total_calls;
258 break;
259 case 10:
260 atom->ul = stats->pending_calls;
261 break;
262 case 11:
263 atom->ul = stats->locks.num_calls;
264 break;
265 case 12:
266 atom->ul = stats->locks.num_pending;
267 break;
268 case 13:
269 atom->ul = stats->childwrite_calls;
270 break;
271 case 14:
272 atom->ul = stats->pending_childwrite_calls;
273 break;
274 case 15:
275 atom->ul = stats->memory_used;
276 break;
277 case 16:
278 atom->ul = stats->max_hop_count;
279 break;
280 case 17:
281 atom->d = stats->reclock.ctdbd.max;
282 break;
283 case 18:
284 atom->d = stats->reclock.recd.max;
285 break;
286 case 19:
287 atom->d = stats->call_latency.max;
288 break;
289 case 20:
290 atom->d = stats->locks.latency.max;
291 break;
292 case 21:
293 atom->d = stats->childwrite_latency.max;
294 break;
295 case 22:
296 atom->d = stats->num_recoveries;
297 break;
298 default:
299 return PM_ERR_PMID;
302 return 0;
305 static int
306 fill_node(unsigned int item, pmAtomValue *atom)
308 switch (item) {
309 case 0:
310 atom->ul = stats->node.req_call;
311 break;
312 case 1:
313 atom->ul = stats->node.reply_call;
314 break;
315 case 2:
316 atom->ul = stats->node.req_dmaster;
317 break;
318 case 3:
319 atom->ul = stats->node.reply_dmaster;
320 break;
321 case 4:
322 atom->ul = stats->node.reply_error;
323 break;
324 case 5:
325 atom->ul = stats->node.req_message;
326 break;
327 case 6:
328 atom->ul = stats->node.req_control;
329 break;
330 case 7:
331 atom->ul = stats->node.reply_control;
332 break;
333 default:
334 return PM_ERR_PMID;
337 return 0;
341 static int
342 fill_client(unsigned int item, pmAtomValue *atom)
344 switch (item) {
345 case 0:
346 atom->ul = stats->client.req_call;
347 break;
348 case 1:
349 atom->ul = stats->client.req_message;
350 break;
351 case 2:
352 atom->ul = stats->client.req_control;
353 break;
354 default:
355 return PM_ERR_PMID;
358 return 0;
361 static int
362 fill_timeout(unsigned int item, pmAtomValue *atom)
364 switch (item) {
365 case 0:
366 atom->ul = stats->timeouts.call;
367 break;
368 case 1:
369 atom->ul = stats->timeouts.control;
370 break;
371 case 2:
372 atom->ul = stats->timeouts.traverse;
373 break;
374 default:
375 return PM_ERR_PMID;
378 return 0;
382 * callback provided to pmdaFetch
384 static int
385 pmda_ctdb_fetch_cb(pmdaMetric *mdesc, unsigned int inst, pmAtomValue *atom)
387 int ret;
388 __pmID_int *id = (__pmID_int *)&(mdesc->m_desc.pmid);
390 if (inst != PM_IN_NULL) {
391 return PM_ERR_INST;
394 if (stats == NULL) {
395 fprintf(stderr, "stats not available\n");
396 ret = PM_ERR_VALUE;
397 goto err_out;
401 switch (id->cluster) {
402 case 0:
403 ret = fill_base(id->item, atom);
404 if (ret) {
405 goto err_out;
407 break;
408 case 1:
409 ret = fill_node(id->item, atom);
410 if (ret) {
411 goto err_out;
413 break;
414 case 2:
415 ret = fill_client(id->item, atom);
416 if (ret) {
417 goto err_out;
419 break;
420 case 3:
421 ret = fill_timeout(id->item, atom);
422 if (ret) {
423 goto err_out;
425 break;
426 default:
427 return PM_ERR_PMID;
430 ret = 0;
431 err_out:
432 return ret;
436 * This routine is called once for each pmFetch(3) operation, so is a
437 * good place to do once-per-fetch functions, such as value caching or
438 * instance domain evaluation.
440 static int
441 pmda_ctdb_fetch(int numpmid, pmID pmidlist[], pmResult **resp, pmdaExt *pmda)
443 int ret;
444 struct timeval ctdb_timeout;
446 if (client == NULL) {
447 fprintf(stderr, "attempting reconnect to ctdbd\n");
448 ret = pmda_ctdb_daemon_connect();
449 if (ret < 0) {
450 fprintf(stderr, "reconnect failed\n");
451 return PM_ERR_VALUE;
455 ret = ctdb_ctrl_statistics(client, ev, client, CTDB_CURRENT_NODE,
456 ctdb_timeout, &stats);
457 if (ret != 0) {
458 fprintf(stderr, "ctdb control for statistics failed, reconnecting\n");
459 pmda_ctdb_daemon_disconnect();
460 ret = PM_ERR_VALUE;
461 goto err_out;
464 ret = pmdaFetch(numpmid, pmidlist, resp, pmda);
466 talloc_free(stats);
467 err_out:
468 return ret;
471 void pmda_ctdb_init(pmdaInterface *dp);
474 * Initialise the agent
476 void
477 pmda_ctdb_init(pmdaInterface *dp)
479 if (dp->status != 0) {
480 return;
483 dp->version.two.fetch = pmda_ctdb_fetch;
484 pmdaSetFetchCallBack(dp, pmda_ctdb_fetch_cb);
486 pmdaInit(dp, NULL, 0, metrictab,
487 (sizeof(metrictab) / sizeof(metrictab[0])));
490 static char *
491 helpfile(void)
493 static char buf[MAXPATHLEN];
495 if (!buf[0]) {
496 snprintf(buf, sizeof(buf), "%s/ctdb/help",
497 pmGetConfig("PCP_PMDAS_DIR"));
499 return buf;
502 static void
503 usage(void)
505 fprintf(stderr, "Usage: %s [options]\n\n", pmProgname);
506 fputs("Options:\n"
507 " -d domain use domain (numeric) for metrics domain of PMDA\n"
508 " -l logfile write log into logfile rather than using default log name\n"
509 "\nExactly one of the following options may appear:\n"
510 " -i port expect PMCD to connect on given inet port (number or name)\n"
511 " -p expect PMCD to supply stdin/stdout (pipe)\n"
512 " -u socket expect PMCD to connect on given unix domain socket\n",
513 stderr);
514 exit(1);
518 * Set up the agent if running as a daemon.
521 main(int argc, char **argv)
523 int err = 0;
524 char log_file[] = "pmda_ctdb.log";
525 pmdaInterface dispatch;
527 __pmSetProgname(argv[0]);
529 pmdaDaemon(&dispatch, PMDA_INTERFACE_2, pmProgname, CTDB,
530 log_file, helpfile());
532 if (pmdaGetOpt(argc, argv, "d:i:l:pu:?", &dispatch, &err) != EOF) {
533 err++;
536 if (err) {
537 usage();
540 pmdaOpenLog(&dispatch);
541 pmda_ctdb_init(&dispatch);
542 pmdaConnect(&dispatch);
543 pmdaMain(&dispatch);
545 exit(0);