2 * CTDB Performance Metrics Domain Agent (PMDA) for Performance Co-Pilot (PCP)
4 * Copyright (c) 1995,2004 Silicon Graphics, Inc. All Rights Reserved.
5 * Copyright (c) 2011 David Disseldorp
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License as published by the
9 * Free Software Foundation; either version 2 of the License, or (at your
10 * option) any later version.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #include "system/network.h"
29 #include "lib/util/time.h"
30 #include "lib/util/blocking.h"
32 #include "client/client.h"
34 #include <pcp/pmapi.h>
43 * This PMDA connects to the locally running ctdbd daemon and pulls
44 * statistics for export via PCP. The ctdbd Unix domain socket path can be
45 * specified with the CTDB_SOCKET environment variable, otherwise the default
50 * All metrics supported in this PMDA - one table entry for each.
51 * The 4th field specifies the serial number of the instance domain
52 * for the metric, and must be either PM_INDOM_NULL (denoting a
53 * metric that only ever has a single value), or the serial number
54 * of one of the instance domains declared in the instance domain table
55 * (i.e. in indomtab, above).
57 static pmdaMetric metrictab
[] = {
59 { NULL
, { PMDA_PMID(0,0), PM_TYPE_U32
, PM_INDOM_NULL
, PM_SEM_INSTANT
,
60 PMDA_PMUNITS(0,0,0,0,0,0) }, },
62 { NULL
, { PMDA_PMID(0,1), PM_TYPE_U32
, PM_INDOM_NULL
, PM_SEM_INSTANT
,
63 PMDA_PMUNITS(0,0,0,0,0,0) }, },
65 { NULL
, { PMDA_PMID(0,2), PM_TYPE_U32
, PM_INDOM_NULL
, PM_SEM_INSTANT
,
66 PMDA_PMUNITS(0,0,0,0,0,0) }, },
67 /* client_packets_sent */
68 { NULL
, { PMDA_PMID(0,3), PM_TYPE_U32
, PM_INDOM_NULL
, PM_SEM_COUNTER
,
69 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE
) }, },
70 /* client_packets_recv */
71 { NULL
, { PMDA_PMID(0,4), PM_TYPE_U32
, PM_INDOM_NULL
, PM_SEM_COUNTER
,
72 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE
) }, },
73 /* node_packets_sent */
74 { NULL
, { PMDA_PMID(0,5), PM_TYPE_U32
, PM_INDOM_NULL
, PM_SEM_COUNTER
,
75 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE
) }, },
76 /* node_packets_recv */
77 { NULL
, { PMDA_PMID(0,6), PM_TYPE_U32
, PM_INDOM_NULL
, PM_SEM_COUNTER
,
78 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE
) }, },
79 /* keepalive_packets_sent */
80 { NULL
, { PMDA_PMID(0,7), PM_TYPE_U32
, PM_INDOM_NULL
, PM_SEM_COUNTER
,
81 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE
) }, },
82 /* keepalive_packets_recv */
83 { NULL
, { PMDA_PMID(0,8), PM_TYPE_U32
, PM_INDOM_NULL
, PM_SEM_COUNTER
,
84 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE
) }, },
86 { NULL
, { PMDA_PMID(1,0), PM_TYPE_U32
, PM_INDOM_NULL
, PM_SEM_COUNTER
,
87 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE
) }, },
89 { NULL
, { PMDA_PMID(1,1), PM_TYPE_U32
, PM_INDOM_NULL
, PM_SEM_COUNTER
,
90 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE
) }, },
92 { NULL
, { PMDA_PMID(1,2), PM_TYPE_U32
, PM_INDOM_NULL
, PM_SEM_COUNTER
,
93 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE
) }, },
95 { NULL
, { PMDA_PMID(1,3), PM_TYPE_U32
, PM_INDOM_NULL
, PM_SEM_COUNTER
,
96 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE
) }, },
98 { NULL
, { PMDA_PMID(1,4), PM_TYPE_U32
, PM_INDOM_NULL
, PM_SEM_COUNTER
,
99 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE
) }, },
101 { NULL
, { PMDA_PMID(1,5), PM_TYPE_U32
, PM_INDOM_NULL
, PM_SEM_COUNTER
,
102 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE
) }, },
104 { NULL
, { PMDA_PMID(1,6), PM_TYPE_U32
, PM_INDOM_NULL
, PM_SEM_COUNTER
,
105 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE
) }, },
107 { NULL
, { PMDA_PMID(1,7), PM_TYPE_U32
, PM_INDOM_NULL
, PM_SEM_COUNTER
,
108 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE
) }, },
110 { NULL
, { PMDA_PMID(2,0), PM_TYPE_U32
, PM_INDOM_NULL
, PM_SEM_COUNTER
,
111 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE
) }, },
113 { NULL
, { PMDA_PMID(2,1), PM_TYPE_U32
, PM_INDOM_NULL
, PM_SEM_COUNTER
,
114 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE
) }, },
116 { NULL
, { PMDA_PMID(2,2), PM_TYPE_U32
, PM_INDOM_NULL
, PM_SEM_COUNTER
,
117 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE
) }, },
119 { NULL
, { PMDA_PMID(3,0), PM_TYPE_U32
, PM_INDOM_NULL
, PM_SEM_COUNTER
,
120 PMDA_PMUNITS(0,0,1,0,0,0) }, },
122 { NULL
, { PMDA_PMID(3,1), PM_TYPE_U32
, PM_INDOM_NULL
, PM_SEM_COUNTER
,
123 PMDA_PMUNITS(0,0,1,0,0,0) }, },
125 { NULL
, { PMDA_PMID(3,2), PM_TYPE_U32
, PM_INDOM_NULL
, PM_SEM_COUNTER
,
126 PMDA_PMUNITS(0,0,1,0,0,0) }, },
128 { NULL
, { PMDA_PMID(0,9), PM_TYPE_U32
, PM_INDOM_NULL
, PM_SEM_COUNTER
,
129 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE
) }, },
131 { NULL
, { PMDA_PMID(0,10), PM_TYPE_U32
, PM_INDOM_NULL
, PM_SEM_INSTANT
,
132 PMDA_PMUNITS(0,0,0,0,0,0) }, },
133 /* locks.num_calls */
134 { NULL
, { PMDA_PMID(0,11), PM_TYPE_U32
, PM_INDOM_NULL
, PM_SEM_COUNTER
,
135 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE
) }, },
136 /* locks.num_pending */
137 { NULL
, { PMDA_PMID(0,12), PM_TYPE_U32
, PM_INDOM_NULL
, PM_SEM_INSTANT
,
138 PMDA_PMUNITS(0,0,0,0,0,0) }, },
139 /* childwrite_calls */
140 { NULL
, { PMDA_PMID(0,13), PM_TYPE_U32
, PM_INDOM_NULL
, PM_SEM_COUNTER
,
141 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE
) }, },
142 /* pending_childwrite_calls */
143 { NULL
, { PMDA_PMID(0,14), PM_TYPE_U32
, PM_INDOM_NULL
, PM_SEM_INSTANT
,
144 PMDA_PMUNITS(0,0,0,0,0,0) }, },
146 { NULL
, { PMDA_PMID(0,15), PM_TYPE_U32
, PM_INDOM_NULL
, PM_SEM_INSTANT
,
147 PMDA_PMUNITS(1,0,0,PM_SPACE_BYTE
,0,0) }, },
149 { NULL
, { PMDA_PMID(0,16), PM_TYPE_U32
, PM_INDOM_NULL
, PM_SEM_INSTANT
,
150 PMDA_PMUNITS(0,0,0,0,0,0) }, },
151 /* reclock.ctdbd.max */
152 { NULL
, { PMDA_PMID(0,17), PM_TYPE_DOUBLE
, PM_INDOM_NULL
, PM_SEM_INSTANT
,
153 PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC
,0) }, },
154 /* reclock.recd.max */
155 { NULL
, { PMDA_PMID(0,18), PM_TYPE_DOUBLE
, PM_INDOM_NULL
, PM_SEM_INSTANT
,
156 PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC
,0) }, },
157 /* call_latency.max */
158 { NULL
, { PMDA_PMID(0,19), PM_TYPE_DOUBLE
, PM_INDOM_NULL
, PM_SEM_INSTANT
,
159 PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC
,0) }, },
160 /* locks.latency.max */
161 { NULL
, { PMDA_PMID(0,20), PM_TYPE_DOUBLE
, PM_INDOM_NULL
, PM_SEM_INSTANT
,
162 PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC
,0) }, },
163 /* childwrite_latency.max */
164 { NULL
, { PMDA_PMID(0,21), PM_TYPE_DOUBLE
, PM_INDOM_NULL
, PM_SEM_INSTANT
,
165 PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC
,0) }, },
167 { NULL
, { PMDA_PMID(0,22), PM_TYPE_U32
, PM_INDOM_NULL
, PM_SEM_INSTANT
,
168 PMDA_PMUNITS(0,0,0,0,0,0) }, },
171 static struct tevent_context
*ev
;
172 static struct ctdb_client_context
*client
;
173 static struct ctdb_statistics
*stats
;
176 pmda_ctdb_disconnected(void *args
)
178 fprintf(stderr
, "ctdbd unreachable\n");
184 pmda_ctdb_daemon_connect(void)
186 const char *socket_name
;
189 ev
= tevent_context_init(NULL
);
191 fprintf(stderr
, "Failed to init event ctx\n");
195 socket_name
= getenv("CTDB_SOCKET");
196 if (socket_name
== NULL
) {
197 socket_name
= CTDB_SOCKET
;
200 ret
= ctdb_client_init(ev
, ev
, socket_name
, &client
);
202 fprintf(stderr
, "Failed to connect to ctdb daemon via %s\n",
207 ctdb_client_set_disconnect_callback(client
, pmda_ctdb_disconnected
,
219 pmda_ctdb_daemon_disconnect(void)
226 fill_base(unsigned int item
, pmAtomValue
*atom
)
230 atom
->ul
= stats
->num_clients
;
233 atom
->ul
= stats
->frozen
;
236 atom
->ul
= stats
->recovering
;
239 atom
->ul
= stats
->client_packets_sent
;
242 atom
->ul
= stats
->client_packets_recv
;
245 atom
->ul
= stats
->node_packets_sent
;
248 atom
->ul
= stats
->node_packets_recv
;
251 atom
->ul
= stats
->keepalive_packets_sent
;
254 atom
->ul
= stats
->keepalive_packets_recv
;
257 atom
->ul
= stats
->total_calls
;
260 atom
->ul
= stats
->pending_calls
;
263 atom
->ul
= stats
->locks
.num_calls
;
266 atom
->ul
= stats
->locks
.num_pending
;
269 atom
->ul
= stats
->childwrite_calls
;
272 atom
->ul
= stats
->pending_childwrite_calls
;
275 atom
->ul
= stats
->memory_used
;
278 atom
->ul
= stats
->max_hop_count
;
281 atom
->d
= stats
->reclock
.ctdbd
.max
;
284 atom
->d
= stats
->reclock
.recd
.max
;
287 atom
->d
= stats
->call_latency
.max
;
290 atom
->d
= stats
->locks
.latency
.max
;
293 atom
->d
= stats
->childwrite_latency
.max
;
296 atom
->d
= stats
->num_recoveries
;
306 fill_node(unsigned int item
, pmAtomValue
*atom
)
310 atom
->ul
= stats
->node
.req_call
;
313 atom
->ul
= stats
->node
.reply_call
;
316 atom
->ul
= stats
->node
.req_dmaster
;
319 atom
->ul
= stats
->node
.reply_dmaster
;
322 atom
->ul
= stats
->node
.reply_error
;
325 atom
->ul
= stats
->node
.req_message
;
328 atom
->ul
= stats
->node
.req_control
;
331 atom
->ul
= stats
->node
.reply_control
;
342 fill_client(unsigned int item
, pmAtomValue
*atom
)
346 atom
->ul
= stats
->client
.req_call
;
349 atom
->ul
= stats
->client
.req_message
;
352 atom
->ul
= stats
->client
.req_control
;
362 fill_timeout(unsigned int item
, pmAtomValue
*atom
)
366 atom
->ul
= stats
->timeouts
.call
;
369 atom
->ul
= stats
->timeouts
.control
;
372 atom
->ul
= stats
->timeouts
.traverse
;
382 * callback provided to pmdaFetch
385 pmda_ctdb_fetch_cb(pmdaMetric
*mdesc
, unsigned int inst
, pmAtomValue
*atom
)
388 __pmID_int
*id
= (__pmID_int
*)&(mdesc
->m_desc
.pmid
);
390 if (inst
!= PM_IN_NULL
) {
395 fprintf(stderr
, "stats not available\n");
401 switch (id
->cluster
) {
403 ret
= fill_base(id
->item
, atom
);
409 ret
= fill_node(id
->item
, atom
);
415 ret
= fill_client(id
->item
, atom
);
421 ret
= fill_timeout(id
->item
, atom
);
436 * This routine is called once for each pmFetch(3) operation, so is a
437 * good place to do once-per-fetch functions, such as value caching or
438 * instance domain evaluation.
441 pmda_ctdb_fetch(int numpmid
, pmID pmidlist
[], pmResult
**resp
, pmdaExt
*pmda
)
444 struct timeval ctdb_timeout
;
446 if (client
== NULL
) {
447 fprintf(stderr
, "attempting reconnect to ctdbd\n");
448 ret
= pmda_ctdb_daemon_connect();
450 fprintf(stderr
, "reconnect failed\n");
455 ret
= ctdb_ctrl_statistics(client
, ev
, client
, CTDB_CURRENT_NODE
,
456 ctdb_timeout
, &stats
);
458 fprintf(stderr
, "ctdb control for statistics failed, reconnecting\n");
459 pmda_ctdb_daemon_disconnect();
464 ret
= pmdaFetch(numpmid
, pmidlist
, resp
, pmda
);
471 void pmda_ctdb_init(pmdaInterface
*dp
);
474 * Initialise the agent
477 pmda_ctdb_init(pmdaInterface
*dp
)
479 if (dp
->status
!= 0) {
483 dp
->version
.two
.fetch
= pmda_ctdb_fetch
;
484 pmdaSetFetchCallBack(dp
, pmda_ctdb_fetch_cb
);
486 pmdaInit(dp
, NULL
, 0, metrictab
,
487 (sizeof(metrictab
) / sizeof(metrictab
[0])));
493 static char buf
[MAXPATHLEN
];
496 snprintf(buf
, sizeof(buf
), "%s/ctdb/help",
497 pmGetConfig("PCP_PMDAS_DIR"));
505 fprintf(stderr
, "Usage: %s [options]\n\n", pmProgname
);
507 " -d domain use domain (numeric) for metrics domain of PMDA\n"
508 " -l logfile write log into logfile rather than using default log name\n"
509 "\nExactly one of the following options may appear:\n"
510 " -i port expect PMCD to connect on given inet port (number or name)\n"
511 " -p expect PMCD to supply stdin/stdout (pipe)\n"
512 " -u socket expect PMCD to connect on given unix domain socket\n",
518 * Set up the agent if running as a daemon.
521 main(int argc
, char **argv
)
524 char log_file
[] = "pmda_ctdb.log";
525 pmdaInterface dispatch
;
527 __pmSetProgname(argv
[0]);
529 pmdaDaemon(&dispatch
, PMDA_INTERFACE_2
, pmProgname
, CTDB
,
530 log_file
, helpfile());
532 if (pmdaGetOpt(argc
, argv
, "d:i:l:pu:?", &dispatch
, &err
) != EOF
) {
540 pmdaOpenLog(&dispatch
);
541 pmda_ctdb_init(&dispatch
);
542 pmdaConnect(&dispatch
);