inet_diag: small reorganization
[raindrops.git] / ext / raindrops / linux_inet_diag.c
blob386b87ce9bab0051529c502b7418e810faf60544
1 #include <ruby.h>
2 #ifdef HAVE_RUBY_ST_H
3 # include <ruby/st.h>
4 #else
5 # include <st.h>
6 #endif
7 #ifdef __linux__
9 /* Ruby 1.8.6+ macros (for compatibility with Ruby 1.9) */
10 #ifndef RSTRING_LEN
11 # define RSTRING_LEN(s) (RSTRING(s)->len)
12 #endif
14 /* partial emulation of the 1.9 rb_thread_blocking_region under 1.8 */
15 #ifndef HAVE_RB_THREAD_BLOCKING_REGION
16 # include <rubysig.h>
17 typedef void rb_unblock_function_t(void *);
18 typedef VALUE rb_blocking_function_t(void *);
19 static VALUE
20 rb_thread_blocking_region(
21 rb_blocking_function_t *func, void *data1,
22 rb_unblock_function_t *ubf, void *data2)
24 VALUE rv;
26 TRAP_BEG;
27 rv = func(data1);
28 TRAP_END;
30 return rv;
32 #endif /* ! HAVE_RB_THREAD_BLOCKING_REGION */
34 #include <assert.h>
35 #include <errno.h>
36 #include <sys/socket.h>
37 #include <sys/types.h>
38 #include <netdb.h>
39 #include <unistd.h>
40 #include <string.h>
41 #include <asm/types.h>
42 #include <netinet/in.h>
43 #include <arpa/inet.h>
44 #include <netinet/tcp.h>
45 #include <linux/netlink.h>
46 #include <linux/rtnetlink.h>
47 #include <linux/inet_diag.h>
49 static size_t page_size;
50 static unsigned g_seq;
51 static VALUE cListenStats;
53 struct listen_stats {
54 uint32_t active;
55 uint32_t listener_p:1;
56 uint32_t queued:31;
59 #define OPLEN (sizeof(struct inet_diag_bc_op) + \
60 sizeof(struct inet_diag_hostcond) + \
61 sizeof(struct sockaddr_storage))
63 struct nogvl_args {
64 st_table *table;
65 struct iovec iov[3]; /* last iov holds inet_diag bytecode */
66 struct listen_stats stats;
69 /* creates a Ruby ListenStats Struct based on our internal listen_stats */
70 static VALUE rb_listen_stats(struct listen_stats *stats)
72 VALUE active = UINT2NUM(stats->active);
73 VALUE queued = UINT2NUM(stats->queued);
75 return rb_struct_new(cListenStats, active, queued);
78 static int st_free_data(st_data_t key, st_data_t value, st_data_t ignored)
80 xfree((void *)key);
81 xfree((void *)value);
83 return ST_DELETE;
86 static int st_to_hash(st_data_t key, st_data_t value, VALUE hash)
88 struct listen_stats *stats = (struct listen_stats *)value;
90 if (stats->listener_p) {
91 VALUE k = rb_str_new2((const char *)key);
92 VALUE v = rb_listen_stats(stats);
94 OBJ_FREEZE(k);
95 rb_hash_aset(hash, k, v);
97 return st_free_data(key, value, 0);
100 static struct listen_stats *stats_for(st_table *table, struct inet_diag_msg *r)
102 char *key, *port;
103 struct listen_stats *stats;
104 size_t keylen;
105 size_t portlen = sizeof("65535");
106 struct sockaddr_storage ss = { 0 };
107 socklen_t len = sizeof(struct sockaddr_storage);
108 int rc;
109 int flags = NI_NUMERICHOST | NI_NUMERICSERV;
111 switch ((ss.ss_family = r->idiag_family)) {
112 case AF_INET: {
113 struct sockaddr_in *in = (struct sockaddr_in *)&ss;
114 in->sin_port = r->id.idiag_sport;
115 in->sin_addr.s_addr = r->id.idiag_src[0];
116 keylen = INET_ADDRSTRLEN;
117 break;
119 case AF_INET6: {
120 struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)&ss;
121 in6->sin6_port = r->id.idiag_sport;
122 memcpy(&in6->sin6_addr.in6_u.u6_addr32,
123 &r->id.idiag_src, sizeof(__be32[4]));
124 keylen = INET6_ADDRSTRLEN;
125 break;
127 default:
128 assert(0 && "unsupported address family, could that be IPv7?!");
130 key = alloca(keylen + 1 + portlen);
131 key[keylen] = 0; /* will be ':' later */
132 port = key + keylen + 1;
133 rc = getnameinfo((struct sockaddr *)&ss, len,
134 key, keylen, port, portlen, flags);
135 if (rc != 0) {
136 fprintf(stderr, "BUG: getnameinfo: %s\n"
137 "Please report how you produced this at %s\n",
138 gai_strerror(rc), "raindrops@librelist.com");
139 fflush(stderr);
140 *key = 0;
142 keylen = strlen(key);
143 portlen = strlen(port);
144 key[keylen] = ':';
145 memmove(key + keylen + 1, port, portlen + 1);
146 if (!st_lookup(table, (st_data_t)key, (st_data_t *)&stats)) {
147 char *old_key = key;
149 key = xmalloc(keylen + 1 + portlen + 1);
150 memcpy(key, old_key, keylen + 1 + portlen + 1);
151 stats = xcalloc(1, sizeof(struct listen_stats));
152 st_insert(table, (st_data_t)key, (st_data_t)stats);
154 return stats;
157 static void table_incr_active(st_table *table, struct inet_diag_msg *r)
159 struct listen_stats *stats = stats_for(table, r);
160 ++stats->active;
163 static void table_set_queued(st_table *table, struct inet_diag_msg *r)
165 struct listen_stats *stats = stats_for(table, r);
166 stats->listener_p = 1;
167 stats->queued = r->idiag_rqueue;
170 /* inner loop of inet_diag, called for every socket returned by netlink */
171 static inline void r_acc(struct nogvl_args *args, struct inet_diag_msg *r)
174 * inode == 0 means the connection is still in the listen queue
175 * and has not yet been accept()-ed by the server. The
176 * inet_diag bytecode cannot filter this for us.
178 if (r->idiag_inode == 0)
179 return;
180 if (r->idiag_state == TCP_ESTABLISHED) {
181 if (args->table)
182 table_incr_active(args->table, r);
183 else
184 args->stats.active++;
185 } else { /* if (r->idiag_state == TCP_LISTEN) */
186 if (args->table)
187 table_set_queued(args->table, r);
188 else
189 args->stats.queued = r->idiag_rqueue;
192 * we wont get anything else because of the idiag_states filter
196 static const char err_socket[] = "socket";
197 static const char err_sendmsg[] = "sendmsg";
198 static const char err_recvmsg[] = "recvmsg";
199 static const char err_nlmsg[] = "nlmsg";
201 struct diag_req {
202 struct nlmsghdr nlh;
203 struct inet_diag_req r;
206 static void prep_msghdr(
207 struct msghdr *msg,
208 struct nogvl_args *args,
209 struct sockaddr_nl *nladdr,
210 size_t iovlen)
212 memset(msg, 0, sizeof(struct msghdr));
213 msg->msg_name = (void *)nladdr;
214 msg->msg_namelen = sizeof(struct sockaddr_nl);
215 msg->msg_iov = args->iov;
216 msg->msg_iovlen = iovlen;
219 static void prep_diag_args(
220 struct nogvl_args *args,
221 struct sockaddr_nl *nladdr,
222 struct rtattr *rta,
223 struct diag_req *req,
224 struct msghdr *msg)
226 struct inet_diag_bc_op *op = args->iov[2].iov_base;
227 struct inet_diag_hostcond *cond = (struct inet_diag_hostcond *)(op + 1);
229 memset(req, 0, sizeof(struct diag_req));
230 memset(nladdr, 0, sizeof(struct sockaddr_nl));
232 nladdr->nl_family = AF_NETLINK;
234 req->nlh.nlmsg_len = sizeof(struct diag_req) +
235 RTA_LENGTH(args->iov[2].iov_len);
236 req->nlh.nlmsg_type = TCPDIAG_GETSOCK;
237 req->nlh.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
238 req->nlh.nlmsg_pid = getpid();
239 req->r.idiag_states = (1<<TCP_ESTABLISHED) | (1<<TCP_LISTEN);
240 req->r.idiag_family = cond->family;
241 rta->rta_type = INET_DIAG_REQ_BYTECODE;
242 rta->rta_len = RTA_LENGTH(args->iov[2].iov_len);
244 args->iov[0].iov_base = req;
245 args->iov[0].iov_len = sizeof(struct diag_req);
246 args->iov[1].iov_base = rta;
247 args->iov[1].iov_len = sizeof(struct rtattr);
249 prep_msghdr(msg, args, nladdr, 3);
252 static void prep_recvmsg_buf(struct nogvl_args *args)
254 /* reuse buffer that was allocated for bytecode */
255 args->iov[0].iov_len = page_size;
256 args->iov[0].iov_base = args->iov[2].iov_base;
259 /* does the inet_diag stuff with netlink(), this is called w/o GVL */
260 static VALUE diag(void *ptr)
262 struct nogvl_args *args = ptr;
263 struct sockaddr_nl nladdr;
264 struct rtattr rta;
265 struct diag_req req;
266 struct msghdr msg;
267 const char *err = NULL;
268 unsigned seq = ++g_seq;
269 int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_INET_DIAG);
271 if (fd < 0)
272 return (VALUE)err_socket;
274 prep_diag_args(args, &nladdr, &rta, &req, &msg);
275 req.nlh.nlmsg_seq = seq;
277 if (sendmsg(fd, &msg, 0) < 0) {
278 err = err_sendmsg;
279 goto out;
282 prep_recvmsg_buf(args);
284 while (1) {
285 ssize_t readed;
286 struct nlmsghdr *h = (struct nlmsghdr *)args->iov[0].iov_base;
288 prep_msghdr(&msg, args, &nladdr, 1);
289 readed = recvmsg(fd, &msg, 0);
290 if (readed < 0) {
291 if (errno == EINTR)
292 continue;
293 err = err_recvmsg;
294 goto out;
296 if (readed == 0)
297 goto out;
299 for ( ; NLMSG_OK(h, readed); h = NLMSG_NEXT(h, readed)) {
300 if (h->nlmsg_seq != seq)
301 continue;
302 if (h->nlmsg_type == NLMSG_DONE)
303 goto out;
304 if (h->nlmsg_type == NLMSG_ERROR) {
305 err = err_nlmsg;
306 goto out;
308 r_acc(args, NLMSG_DATA(h));
311 out:
313 int save_errno = errno;
314 close(fd);
315 if (err && args->table) {
316 st_foreach(args->table, st_free_data, 0);
317 st_free_table(args->table);
319 errno = save_errno;
321 return (VALUE)err;
324 /* populates sockaddr_storage struct by parsing +addr+ */
325 static void parse_addr(struct sockaddr_storage *inet, VALUE addr)
327 char *host_ptr;
328 char *colon = NULL;
329 char *rbracket = NULL;
330 long host_len;
331 struct addrinfo hints;
332 struct addrinfo *res;
333 int rc;
335 if (TYPE(addr) != T_STRING)
336 rb_raise(rb_eArgError, "addrs must be an Array of Strings");
338 host_ptr = StringValueCStr(addr);
339 host_len = RSTRING_LEN(addr);
340 if (*host_ptr == '[') { /* ipv6 address format (rfc2732) */
341 rbracket = memchr(host_ptr + 1, ']', host_len - 1);
343 if (rbracket == NULL)
344 rb_raise(rb_eArgError, "']' not found in IPv6 addr=%s",
345 host_ptr);
346 if (rbracket[1] != ':')
347 rb_raise(rb_eArgError, "':' not found in IPv6 addr=%s",
348 host_ptr);
349 colon = rbracket + 1;
350 host_ptr++;
351 *rbracket = 0;
352 } else { /* ipv4 */
353 colon = memchr(host_ptr, ':', host_len);
356 if (!colon)
357 rb_raise(rb_eArgError, "port not found in: `%s'", host_ptr);
359 hints.ai_family = AF_UNSPEC;
360 hints.ai_socktype = SOCK_STREAM;
361 hints.ai_protocol = IPPROTO_TCP;
362 hints.ai_flags = AI_NUMERICHOST | AI_NUMERICSERV;
364 *colon = 0;
365 rc = getaddrinfo(host_ptr, colon + 1, &hints, &res);
366 *colon = ':';
367 if (rbracket) *rbracket = ']';
368 if (rc != 0)
369 rb_raise(rb_eArgError, "getaddrinfo(%s): %s",
370 host_ptr, gai_strerror(rc));
372 memcpy(inet, res->ai_addr, res->ai_addrlen);
373 freeaddrinfo(res);
376 /* generates inet_diag bytecode to match all addrs for a given family */
377 static void gen_bytecode_all(struct iovec *iov, sa_family_t family)
379 struct inet_diag_bc_op *op;
380 struct inet_diag_hostcond *cond;
382 /* iov_len was already set and base allocated in a parent function */
383 assert(iov->iov_len == OPLEN && iov->iov_base && "iov invalid");
384 op = iov->iov_base;
385 op->code = INET_DIAG_BC_S_COND;
386 op->yes = OPLEN;
387 op->no = sizeof(struct inet_diag_bc_op) + OPLEN;
388 cond = (struct inet_diag_hostcond *)(op + 1);
389 cond->family = family;
390 cond->port = -1;
391 cond->prefix_len = 0;
394 /* generates inet_diag bytecode to match a single addr */
395 static void gen_bytecode(struct iovec *iov, struct sockaddr_storage *inet)
397 struct inet_diag_bc_op *op;
398 struct inet_diag_hostcond *cond;
400 /* iov_len was already set and base allocated in a parent function */
401 assert(iov->iov_len == OPLEN && iov->iov_base && "iov invalid");
402 op = iov->iov_base;
403 op->code = INET_DIAG_BC_S_COND;
404 op->yes = OPLEN;
405 op->no = sizeof(struct inet_diag_bc_op) + OPLEN;
407 cond = (struct inet_diag_hostcond *)(op + 1);
408 cond->family = inet->ss_family;
409 switch (inet->ss_family) {
410 case AF_INET: {
411 struct sockaddr_in *in = (struct sockaddr_in *)inet;
413 cond->port = ntohs(in->sin_port);
414 cond->prefix_len = in->sin_addr.s_addr == 0 ? 0 :
415 sizeof(in->sin_addr.s_addr) * CHAR_BIT;
416 *cond->addr = in->sin_addr.s_addr;
418 break;
419 case AF_INET6: {
420 struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)inet;
422 cond->port = ntohs(in6->sin6_port);
423 cond->prefix_len = memcmp(&in6addr_any, &in6->sin6_addr,
424 sizeof(struct in6_addr)) == 0 ?
425 0 : sizeof(in6->sin6_addr) * CHAR_BIT;
426 memcpy(&cond->addr, &in6->sin6_addr, sizeof(struct in6_addr));
428 break;
429 default:
430 assert(0 && "unsupported address family, could that be IPv7?!");
434 static void nl_errcheck(VALUE r)
436 const char *err = (const char *)r;
438 if (err) {
439 if (err == err_nlmsg)
440 rb_raise(rb_eRuntimeError, "NLMSG_ERROR");
441 else
442 rb_sys_fail(err);
446 static VALUE tcp_stats(struct nogvl_args *args, VALUE addr)
448 struct sockaddr_storage query_addr;
450 parse_addr(&query_addr, addr);
451 gen_bytecode(&args->iov[2], &query_addr);
453 memset(&args->stats, 0, sizeof(struct listen_stats));
454 nl_errcheck(rb_thread_blocking_region(diag, args, 0, 0));
456 return rb_listen_stats(&args->stats);
460 * call-seq:
461 * addrs = %w(0.0.0.0:80 127.0.0.1:8080)
462 * Raindrops::Linux.tcp_listener_stats(addrs) => hash
464 * Takes an array of strings representing listen addresses to filter for.
465 * Returns a hash with given addresses as keys and ListenStats
466 * objects as the values.
468 static VALUE tcp_listener_stats(VALUE obj, VALUE addrs)
470 VALUE *ary;
471 long i;
472 VALUE rv;
473 struct nogvl_args args;
476 * allocating page_size instead of OP_LEN since we'll reuse the
477 * buffer for recvmsg() later, we already checked for
478 * OPLEN <= page_size at initialization
480 args.iov[2].iov_len = OPLEN;
481 args.iov[2].iov_base = alloca(page_size);
482 args.table = NULL;
484 if (TYPE(addrs) != T_ARRAY)
485 rb_raise(rb_eArgError, "addrs must be an Array of Strings");
487 rv = rb_hash_new();
488 ary = RARRAY_PTR(addrs);
489 for (i = RARRAY_LEN(addrs); --i >= 0; ary++)
490 rb_hash_aset(rv, *ary, tcp_stats(&args, *ary));
492 return rv;
495 static VALUE all_tcp_listener_stats(VALUE obj)
497 VALUE rv;
498 struct nogvl_args args;
501 * allocating page_size instead of OP_LEN since we'll reuse the
502 * buffer for recvmsg() later, we already checked for
503 * OPLEN <= page_size at initialization
505 args.iov[2].iov_len = OPLEN;
506 args.iov[2].iov_base = alloca(page_size);
507 args.table = st_init_strtable();
508 gen_bytecode_all(&args.iov[2], AF_INET);
510 nl_errcheck(rb_thread_blocking_region(diag, &args, NULL, 0));
511 rv = rb_hash_new();
512 st_foreach(args.table, st_to_hash, rv);
513 st_free_table(args.table);
514 return rv;
517 void Init_raindrops_linux_inet_diag(void)
519 VALUE cRaindrops = rb_const_get(rb_cObject, rb_intern("Raindrops"));
520 VALUE mLinux = rb_define_module_under(cRaindrops, "Linux");
522 cListenStats = rb_const_get(cRaindrops, rb_intern("ListenStats"));
524 rb_define_module_function(mLinux, "tcp_listener_stats",
525 tcp_listener_stats, 1);
526 rb_define_module_function(mLinux, "all_tcp_listener_stats",
527 all_tcp_listener_stats, 0);
529 page_size = getpagesize();
531 assert(OPLEN <= page_size && "bytecode OPLEN is not <= PAGE_SIZE");
533 #endif /* __linux__ */