Remove Scope IDs from IPv6 addresses.
[raindrops.git] / ext / raindrops / linux_inet_diag.c
blob7b5bae10fdba786788d9e1e5d6d0c7db65ec70ed
1 #include <ruby.h>
2 #ifdef HAVE_RUBY_ST_H
3 # include <ruby/st.h>
4 #else
5 # include <st.h>
6 #endif
7 #include "my_fileno.h"
8 #ifdef __linux__
10 /* Ruby 1.8.6+ macros (for compatibility with Ruby 1.9) */
11 #ifndef RSTRING_LEN
12 # define RSTRING_LEN(s) (RSTRING(s)->len)
13 #endif
15 /* partial emulation of the 1.9 rb_thread_blocking_region under 1.8 */
16 #ifndef HAVE_RB_THREAD_BLOCKING_REGION
17 # include <rubysig.h>
18 # define RUBY_UBF_IO ((rb_unblock_function_t *)-1)
19 typedef void rb_unblock_function_t(void *);
20 typedef VALUE rb_blocking_function_t(void *);
21 static VALUE
22 rb_thread_blocking_region(
23 rb_blocking_function_t *func, void *data1,
24 rb_unblock_function_t *ubf, void *data2)
26 VALUE rv;
28 TRAP_BEG;
29 rv = func(data1);
30 TRAP_END;
32 return rv;
34 #endif /* ! HAVE_RB_THREAD_BLOCKING_REGION */
36 #ifdef HAVE_RB_THREAD_IO_BLOCKING_REGION
37 VALUE rb_thread_io_blocking_region(rb_blocking_function_t *, void *, int);
38 #else
39 # define rb_thread_io_blocking_region(fn,data,fd) \
40 rb_thread_blocking_region((fn),(data),RUBY_UBF_IO,0)
41 #endif /* HAVE_RB_THREAD_IO_BLOCKING_REGION */
43 #include <assert.h>
44 #include <errno.h>
45 #include <sys/socket.h>
46 #include <sys/types.h>
47 #include <netdb.h>
48 #include <unistd.h>
49 #include <fcntl.h>
50 #include <string.h>
51 #include <asm/types.h>
52 #include <netinet/in.h>
53 #include <arpa/inet.h>
54 #include <netinet/tcp.h>
55 #include <linux/netlink.h>
56 #include <linux/rtnetlink.h>
57 #include <linux/inet_diag.h>
59 union any_addr {
60 struct sockaddr_storage ss;
61 struct sockaddr sa;
62 struct sockaddr_in in;
63 struct sockaddr_in6 in6;
66 static size_t page_size;
67 static unsigned g_seq;
68 static VALUE cListenStats, cIDSock;
69 static ID id_new;
71 struct listen_stats {
72 uint32_t active;
73 uint32_t queued;
74 uint32_t listener_p;
77 #define OPLEN (sizeof(struct inet_diag_bc_op) + \
78 sizeof(struct inet_diag_hostcond) + \
79 sizeof(struct sockaddr_storage))
81 struct nogvl_args {
82 st_table *table;
83 struct iovec iov[3]; /* last iov holds inet_diag bytecode */
84 struct listen_stats stats;
85 int fd;
88 #ifdef SOCK_CLOEXEC
89 # define my_SOCK_RAW (SOCK_RAW|SOCK_CLOEXEC)
90 # define FORCE_CLOEXEC(v) (v)
91 #else
92 # define my_SOCK_RAW SOCK_RAW
93 static VALUE FORCE_CLOEXEC(VALUE io)
95 int fd = my_fileno(io);
96 int flags = fcntl(fd, F_SETFD, FD_CLOEXEC);
97 if (flags == -1)
98 rb_sys_fail("fcntl(F_SETFD, FD_CLOEXEC)");
99 return io;
101 #endif
104 * call-seq:
105 * Raindrops::InetDiagSocket.new -> Socket
107 * Creates a new Socket object for the netlink inet_diag facility
109 static VALUE ids_s_new(VALUE klass)
111 VALUE argv[3];
113 argv[0] = INT2NUM(AF_NETLINK);
114 argv[1] = INT2NUM(my_SOCK_RAW);
115 argv[2] = INT2NUM(NETLINK_INET_DIAG);
117 return FORCE_CLOEXEC(rb_call_super(3, argv));
120 /* creates a Ruby ListenStats Struct based on our internal listen_stats */
121 static VALUE rb_listen_stats(struct listen_stats *stats)
123 VALUE active = UINT2NUM(stats->active);
124 VALUE queued = UINT2NUM(stats->queued);
126 return rb_struct_new(cListenStats, active, queued);
129 static int st_free_data(st_data_t key, st_data_t value, st_data_t ignored)
131 xfree((void *)key);
132 xfree((void *)value);
134 return ST_DELETE;
138 * call-seq:
139 * remove_scope_id(ip_address)
141 * Returns copy of IP address with Scope ID removed,
142 * if address has it (only IPv6 actually may have it).
144 static VALUE remove_scope_id(const char *addr)
146 VALUE rv = rb_str_new2(addr);
147 long len = RSTRING_LEN(rv);
148 char *ptr = RSTRING_PTR(rv);
149 char *pct = memchr(ptr, '%', len);
152 * remove scoped portion
153 * Ruby equivalent: rv.sub!(/%([^\]]*)\]/, "]")
155 if (pct) {
156 size_t newlen = pct - ptr;
157 char *rbracket = memchr(pct, ']', len - newlen);
159 if (rbracket) {
160 size_t move = len - (rbracket - ptr);
162 memmove(pct, rbracket, move);
163 newlen += move;
165 rb_str_set_len(rv, newlen);
166 } else {
167 rb_raise(rb_eArgError,
168 "']' not found in IPv6 addr=%s", ptr);
171 return rv;
174 static int st_to_hash(st_data_t key, st_data_t value, VALUE hash)
176 struct listen_stats *stats = (struct listen_stats *)value;
178 if (stats->listener_p) {
179 VALUE k = remove_scope_id((const char *)key);
180 VALUE v = rb_listen_stats(stats);
182 OBJ_FREEZE(k);
183 rb_hash_aset(hash, k, v);
185 return st_free_data(key, value, 0);
188 static int st_AND_hash(st_data_t key, st_data_t value, VALUE hash)
190 struct listen_stats *stats = (struct listen_stats *)value;
192 if (stats->listener_p) {
193 VALUE k = remove_scope_id((const char *)key);
195 if (rb_hash_lookup(hash, k) == Qtrue) {
196 VALUE v = rb_listen_stats(stats);
197 OBJ_FREEZE(k);
198 rb_hash_aset(hash, k, v);
201 return st_free_data(key, value, 0);
204 static const char *addr_any(sa_family_t family)
206 static const char ipv4[] = "0.0.0.0";
207 static const char ipv6[] = "[::]";
209 if (family == AF_INET)
210 return ipv4;
211 assert(family == AF_INET6 && "unknown family");
212 return ipv6;
215 static void bug_warn(void)
217 fprintf(stderr, "Please report how you produced this at "\
218 "raindrops@librelist.org\n");
219 fflush(stderr);
222 static struct listen_stats *stats_for(st_table *table, struct inet_diag_msg *r)
224 char *key, *port, *old_key;
225 size_t alloca_len;
226 struct listen_stats *stats;
227 size_t keylen;
228 size_t portlen = sizeof("65535");
229 union any_addr sa;
230 socklen_t len = sizeof(struct sockaddr_storage);
231 int rc;
232 int flags = NI_NUMERICHOST | NI_NUMERICSERV;
234 switch ((sa.ss.ss_family = r->idiag_family)) {
235 case AF_INET: {
236 sa.in.sin_port = r->id.idiag_sport;
237 sa.in.sin_addr.s_addr = r->id.idiag_src[0];
238 keylen = INET_ADDRSTRLEN;
239 alloca_len = keylen + 1 + portlen;
240 key = alloca(alloca_len);
241 key[keylen] = 0; /* will be ':' later */
242 port = key + keylen + 1;
243 rc = getnameinfo(&sa.sa, len,
244 key, keylen, port, portlen, flags);
245 break;
247 case AF_INET6: {
248 sa.in6.sin6_port = r->id.idiag_sport;
249 memcpy(&sa.in6.sin6_addr, &r->id.idiag_src, sizeof(__be32[4]));
250 keylen = INET6_ADDRSTRLEN;
251 /* [ ] */
252 alloca_len = 1 + keylen + 1 + 1 + portlen;
253 key = alloca(alloca_len);
254 *key = '[';
255 key[1 + keylen + 1] = 0; /* will be ':' later */
256 port = 1 + key + keylen + 1 + 1;
257 rc = getnameinfo(&sa.sa, len,
258 key + 1, keylen, port, portlen, flags);
259 break;
261 default:
262 assert(0 && "unsupported address family, could that be IPv7?!");
264 if (rc != 0) {
265 fprintf(stderr, "BUG: getnameinfo: %s\n", gai_strerror(rc));
266 bug_warn();
267 *key = 0;
270 keylen = strlen(key);
271 portlen = strlen(port);
273 switch (sa.ss.ss_family) {
274 case AF_INET:
275 key[keylen] = ':';
276 memmove(key + keylen + 1, port, portlen + 1);
277 break;
278 case AF_INET6:
279 key[keylen] = ']';
280 key[keylen + 1] = ':';
281 memmove(key + keylen + 2, port, portlen + 1);
282 keylen++;
283 break;
284 default:
285 assert(0 && "unsupported address family, could that be IPv7?!");
288 if (st_lookup(table, (st_data_t)key, (st_data_t *)&stats))
289 return stats;
291 old_key = key;
293 if (r->idiag_state == TCP_ESTABLISHED) {
294 int n = snprintf(key, alloca_len, "%s:%u",
295 addr_any(sa.ss.ss_family),
296 ntohs(r->id.idiag_sport));
297 if (n <= 0) {
298 fprintf(stderr, "BUG: snprintf: %d\n", n);
299 bug_warn();
301 if (st_lookup(table, (st_data_t)key, (st_data_t *)&stats))
302 return stats;
303 if (n <= 0) {
304 key = xmalloc(1);
305 *key = '\0';
306 } else {
307 old_key = key;
308 key = xmalloc(n + 1);
309 memcpy(key, old_key, n + 1);
311 } else {
312 key = xmalloc(keylen + 1 + portlen + 1);
313 memcpy(key, old_key, keylen + 1 + portlen + 1);
315 stats = xcalloc(1, sizeof(struct listen_stats));
316 st_insert(table, (st_data_t)key, (st_data_t)stats);
317 return stats;
320 static void table_incr_active(st_table *table, struct inet_diag_msg *r)
322 struct listen_stats *stats = stats_for(table, r);
323 ++stats->active;
326 static void table_set_queued(st_table *table, struct inet_diag_msg *r)
328 struct listen_stats *stats = stats_for(table, r);
329 stats->listener_p = 1;
330 stats->queued = r->idiag_rqueue;
333 /* inner loop of inet_diag, called for every socket returned by netlink */
334 static inline void r_acc(struct nogvl_args *args, struct inet_diag_msg *r)
337 * inode == 0 means the connection is still in the listen queue
338 * and has not yet been accept()-ed by the server. The
339 * inet_diag bytecode cannot filter this for us.
341 if (r->idiag_inode == 0)
342 return;
343 if (r->idiag_state == TCP_ESTABLISHED) {
344 if (args->table)
345 table_incr_active(args->table, r);
346 else
347 args->stats.active++;
348 } else { /* if (r->idiag_state == TCP_LISTEN) */
349 if (args->table)
350 table_set_queued(args->table, r);
351 else
352 args->stats.queued = r->idiag_rqueue;
355 * we wont get anything else because of the idiag_states filter
359 static const char err_sendmsg[] = "sendmsg";
360 static const char err_recvmsg[] = "recvmsg";
361 static const char err_nlmsg[] = "nlmsg";
363 struct diag_req {
364 struct nlmsghdr nlh;
365 struct inet_diag_req r;
368 static void prep_msghdr(
369 struct msghdr *msg,
370 struct nogvl_args *args,
371 struct sockaddr_nl *nladdr,
372 size_t iovlen)
374 memset(msg, 0, sizeof(struct msghdr));
375 msg->msg_name = (void *)nladdr;
376 msg->msg_namelen = sizeof(struct sockaddr_nl);
377 msg->msg_iov = args->iov;
378 msg->msg_iovlen = iovlen;
381 static void prep_diag_args(
382 struct nogvl_args *args,
383 struct sockaddr_nl *nladdr,
384 struct rtattr *rta,
385 struct diag_req *req,
386 struct msghdr *msg)
388 memset(req, 0, sizeof(struct diag_req));
389 memset(nladdr, 0, sizeof(struct sockaddr_nl));
391 nladdr->nl_family = AF_NETLINK;
393 req->nlh.nlmsg_len = sizeof(struct diag_req) +
394 RTA_LENGTH(args->iov[2].iov_len);
395 req->nlh.nlmsg_type = TCPDIAG_GETSOCK;
396 req->nlh.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
397 req->nlh.nlmsg_pid = getpid();
398 req->r.idiag_states = (1<<TCP_ESTABLISHED) | (1<<TCP_LISTEN);
399 rta->rta_type = INET_DIAG_REQ_BYTECODE;
400 rta->rta_len = RTA_LENGTH(args->iov[2].iov_len);
402 args->iov[0].iov_base = req;
403 args->iov[0].iov_len = sizeof(struct diag_req);
404 args->iov[1].iov_base = rta;
405 args->iov[1].iov_len = sizeof(struct rtattr);
407 prep_msghdr(msg, args, nladdr, 3);
410 static void prep_recvmsg_buf(struct nogvl_args *args)
412 /* reuse buffer that was allocated for bytecode */
413 args->iov[0].iov_len = page_size;
414 args->iov[0].iov_base = args->iov[2].iov_base;
417 /* does the inet_diag stuff with netlink(), this is called w/o GVL */
418 static VALUE diag(void *ptr)
420 struct nogvl_args *args = ptr;
421 struct sockaddr_nl nladdr;
422 struct rtattr rta;
423 struct diag_req req;
424 struct msghdr msg;
425 const char *err = NULL;
426 unsigned seq = ++g_seq;
428 prep_diag_args(args, &nladdr, &rta, &req, &msg);
429 req.nlh.nlmsg_seq = seq;
431 if (sendmsg(args->fd, &msg, 0) < 0) {
432 err = err_sendmsg;
433 goto out;
436 prep_recvmsg_buf(args);
438 while (1) {
439 ssize_t readed;
440 size_t r;
441 struct nlmsghdr *h = (struct nlmsghdr *)args->iov[0].iov_base;
443 prep_msghdr(&msg, args, &nladdr, 1);
444 readed = recvmsg(args->fd, &msg, 0);
445 if (readed < 0) {
446 if (errno == EINTR)
447 continue;
448 err = err_recvmsg;
449 goto out;
451 if (readed == 0)
452 goto out;
453 r = (size_t)readed;
454 for ( ; NLMSG_OK(h, r); h = NLMSG_NEXT(h, r)) {
455 if (h->nlmsg_seq != seq)
456 continue;
457 if (h->nlmsg_type == NLMSG_DONE)
458 goto out;
459 if (h->nlmsg_type == NLMSG_ERROR) {
460 err = err_nlmsg;
461 goto out;
463 r_acc(args, NLMSG_DATA(h));
466 out:
468 int save_errno = errno;
469 if (err && args->table) {
470 st_foreach(args->table, st_free_data, 0);
471 st_free_table(args->table);
473 errno = save_errno;
475 return (VALUE)err;
478 /* populates sockaddr_storage struct by parsing +addr+ */
479 static void parse_addr(union any_addr *inet, VALUE addr)
481 char *host_ptr;
482 char *check;
483 char *colon = NULL;
484 char *rbracket = NULL;
485 void *dst;
486 long host_len;
487 int af, rc;
488 uint16_t *portdst;
489 unsigned long port;
491 Check_Type(addr, T_STRING);
492 host_ptr = StringValueCStr(addr);
493 host_len = RSTRING_LEN(addr);
494 if (*host_ptr == '[') { /* ipv6 address format (rfc2732) */
495 rbracket = memchr(host_ptr + 1, ']', host_len - 1);
497 if (rbracket == NULL)
498 rb_raise(rb_eArgError, "']' not found in IPv6 addr=%s",
499 host_ptr);
500 if (rbracket[1] != ':')
501 rb_raise(rb_eArgError, "':' not found in IPv6 addr=%s",
502 host_ptr);
503 colon = rbracket + 1;
504 host_ptr++;
505 *rbracket = 0;
506 inet->ss.ss_family = af = AF_INET6;
507 dst = &inet->in6.sin6_addr;
508 portdst = &inet->in6.sin6_port;
509 } else { /* ipv4 */
510 colon = memchr(host_ptr, ':', host_len);
511 inet->ss.ss_family = af = AF_INET;
512 dst = &inet->in.sin_addr;
513 portdst = &inet->in.sin_port;
516 if (!colon)
517 rb_raise(rb_eArgError, "port not found in: `%s'", host_ptr);
518 port = strtoul(colon + 1, &check, 10);
519 *colon = 0;
520 rc = inet_pton(af, host_ptr, dst);
521 *colon = ':';
522 if (rbracket) *rbracket = ']';
523 if (*check || ((uint16_t)port != port))
524 rb_raise(rb_eArgError, "invalid port: %s", colon + 1);
525 if (rc != 1)
526 rb_raise(rb_eArgError, "inet_pton failed for: `%s' with %d",
527 host_ptr, rc);
528 *portdst = ntohs((uint16_t)port);
531 /* generates inet_diag bytecode to match all addrs */
532 static void gen_bytecode_all(struct iovec *iov)
534 struct inet_diag_bc_op *op;
535 struct inet_diag_hostcond *cond;
537 /* iov_len was already set and base allocated in a parent function */
538 assert(iov->iov_len == OPLEN && iov->iov_base && "iov invalid");
539 op = iov->iov_base;
540 op->code = INET_DIAG_BC_S_COND;
541 op->yes = OPLEN;
542 op->no = sizeof(struct inet_diag_bc_op) + OPLEN;
543 cond = (struct inet_diag_hostcond *)(op + 1);
544 cond->family = AF_UNSPEC;
545 cond->port = -1;
546 cond->prefix_len = 0;
549 /* generates inet_diag bytecode to match a single addr */
550 static void gen_bytecode(struct iovec *iov, union any_addr *inet)
552 struct inet_diag_bc_op *op;
553 struct inet_diag_hostcond *cond;
555 /* iov_len was already set and base allocated in a parent function */
556 assert(iov->iov_len == OPLEN && iov->iov_base && "iov invalid");
557 op = iov->iov_base;
558 op->code = INET_DIAG_BC_S_COND;
559 op->yes = OPLEN;
560 op->no = sizeof(struct inet_diag_bc_op) + OPLEN;
562 cond = (struct inet_diag_hostcond *)(op + 1);
563 cond->family = inet->ss.ss_family;
564 switch (inet->ss.ss_family) {
565 case AF_INET: {
566 cond->port = ntohs(inet->in.sin_port);
567 cond->prefix_len = inet->in.sin_addr.s_addr == 0 ? 0 :
568 sizeof(inet->in.sin_addr.s_addr) * CHAR_BIT;
569 *cond->addr = inet->in.sin_addr.s_addr;
571 break;
572 case AF_INET6: {
573 cond->port = ntohs(inet->in6.sin6_port);
574 cond->prefix_len = memcmp(&in6addr_any, &inet->in6.sin6_addr,
575 sizeof(struct in6_addr)) == 0 ?
576 0 : sizeof(inet->in6.sin6_addr) * CHAR_BIT;
577 memcpy(&cond->addr, &inet->in6.sin6_addr,
578 sizeof(struct in6_addr));
580 break;
581 default:
582 assert(0 && "unsupported address family, could that be IPv7?!");
586 static void nl_errcheck(VALUE r)
588 const char *err = (const char *)r;
590 if (err) {
591 if (err == err_nlmsg)
592 rb_raise(rb_eRuntimeError, "NLMSG_ERROR");
593 else
594 rb_sys_fail(err);
598 static VALUE tcp_stats(struct nogvl_args *args, VALUE addr)
600 union any_addr query_addr;
602 parse_addr(&query_addr, addr);
603 gen_bytecode(&args->iov[2], &query_addr);
605 memset(&args->stats, 0, sizeof(struct listen_stats));
606 nl_errcheck(rb_thread_io_blocking_region(diag, args, args->fd));
608 return rb_listen_stats(&args->stats);
612 * call-seq:
613 * Raindrops::Linux.tcp_listener_stats([addrs[, sock]]) => hash
615 * If specified, +addr+ may be a string or array of strings representing
616 * listen addresses to filter for. Returns a hash with given addresses as
617 * keys and ListenStats objects as the values or a hash of all addresses.
619 * addrs = %w(0.0.0.0:80 127.0.0.1:8080)
621 * If +addr+ is nil or not specified, all (IPv4) addresses are returned.
622 * If +sock+ is specified, it should be a Raindrops::InetDiagSock object.
624 static VALUE tcp_listener_stats(int argc, VALUE *argv, VALUE self)
626 VALUE rv = rb_hash_new();
627 struct nogvl_args args;
628 VALUE addrs, sock;
630 rb_scan_args(argc, argv, "02", &addrs, &sock);
633 * allocating page_size instead of OP_LEN since we'll reuse the
634 * buffer for recvmsg() later, we already checked for
635 * OPLEN <= page_size at initialization
637 args.iov[2].iov_len = OPLEN;
638 args.iov[2].iov_base = alloca(page_size);
639 args.table = NULL;
640 if (NIL_P(sock))
641 sock = rb_funcall(cIDSock, id_new, 0);
642 args.fd = my_fileno(sock);
644 switch (TYPE(addrs)) {
645 case T_STRING:
646 rb_hash_aset(rv, addrs, tcp_stats(&args, addrs));
647 return rv;
648 case T_ARRAY: {
649 long i;
650 long len = RARRAY_LEN(addrs);
651 VALUE cur;
653 if (len == 1) {
654 cur = rb_ary_entry(addrs, 0);
656 rb_hash_aset(rv, cur, tcp_stats(&args, cur));
657 return rv;
659 for (i = 0; i < len; i++) {
660 union any_addr check;
661 VALUE cur = rb_ary_entry(addrs, i);
663 parse_addr(&check, cur);
664 rb_hash_aset(rv, cur, Qtrue);
666 /* fall through */
668 case T_NIL:
669 args.table = st_init_strtable();
670 gen_bytecode_all(&args.iov[2]);
671 break;
672 default:
673 rb_raise(rb_eArgError,
674 "addr must be an array of strings, a string, or nil");
677 nl_errcheck(rb_thread_io_blocking_region(diag, &args, args.fd));
679 st_foreach(args.table, NIL_P(addrs) ? st_to_hash : st_AND_hash, rv);
680 st_free_table(args.table);
682 /* let GC deal with corner cases */
683 if (argc < 2) rb_io_close(sock);
684 return rv;
687 void Init_raindrops_linux_inet_diag(void)
689 VALUE cRaindrops = rb_const_get(rb_cObject, rb_intern("Raindrops"));
690 VALUE mLinux = rb_define_module_under(cRaindrops, "Linux");
692 rb_require("socket");
693 cIDSock = rb_const_get(rb_cObject, rb_intern("Socket"));
694 id_new = rb_intern("new");
697 * Document-class: Raindrops::InetDiagSocket
699 * This is a subclass of +Socket+ specifically for talking
700 * to the inet_diag facility of Netlink.
702 cIDSock = rb_define_class_under(cRaindrops, "InetDiagSocket", cIDSock);
703 rb_define_singleton_method(cIDSock, "new", ids_s_new, 0);
705 cListenStats = rb_const_get(cRaindrops, rb_intern("ListenStats"));
707 rb_define_module_function(mLinux, "tcp_listener_stats",
708 tcp_listener_stats, -1);
710 page_size = getpagesize();
712 assert(OPLEN <= page_size && "bytecode OPLEN is not <= PAGE_SIZE");
714 #endif /* __linux__ */