inet_diag: no need to specify family
[raindrops.git] / ext / raindrops / linux_inet_diag.c
blobabc2ef9b25577b378e5eb4872ca2c2dd69ccc700
1 #include <ruby.h>
2 #ifdef HAVE_RUBY_ST_H
3 # include <ruby/st.h>
4 #else
5 # include <st.h>
6 #endif
7 #ifdef __linux__
9 /* Ruby 1.8.6+ macros (for compatibility with Ruby 1.9) */
10 #ifndef RSTRING_LEN
11 # define RSTRING_LEN(s) (RSTRING(s)->len)
12 #endif
14 /* partial emulation of the 1.9 rb_thread_blocking_region under 1.8 */
15 #ifndef HAVE_RB_THREAD_BLOCKING_REGION
16 # include <rubysig.h>
17 typedef void rb_unblock_function_t(void *);
18 typedef VALUE rb_blocking_function_t(void *);
19 static VALUE
20 rb_thread_blocking_region(
21 rb_blocking_function_t *func, void *data1,
22 rb_unblock_function_t *ubf, void *data2)
24 VALUE rv;
26 TRAP_BEG;
27 rv = func(data1);
28 TRAP_END;
30 return rv;
32 #endif /* ! HAVE_RB_THREAD_BLOCKING_REGION */
34 #include <assert.h>
35 #include <errno.h>
36 #include <sys/socket.h>
37 #include <sys/types.h>
38 #include <netdb.h>
39 #include <unistd.h>
40 #include <string.h>
41 #include <asm/types.h>
42 #include <netinet/in.h>
43 #include <arpa/inet.h>
44 #include <netinet/tcp.h>
45 #include <linux/netlink.h>
46 #include <linux/rtnetlink.h>
47 #include <linux/inet_diag.h>
49 static size_t page_size;
50 static unsigned g_seq;
51 static VALUE cListenStats;
53 struct listen_stats {
54 uint32_t active;
55 uint32_t listener_p:1;
56 uint32_t queued:31;
59 #define OPLEN (sizeof(struct inet_diag_bc_op) + \
60 sizeof(struct inet_diag_hostcond) + \
61 sizeof(struct sockaddr_storage))
63 struct nogvl_args {
64 st_table *table;
65 struct iovec iov[3]; /* last iov holds inet_diag bytecode */
66 struct listen_stats stats;
69 /* creates a Ruby ListenStats Struct based on our internal listen_stats */
70 static VALUE rb_listen_stats(struct listen_stats *stats)
72 VALUE active = UINT2NUM(stats->active);
73 VALUE queued = UINT2NUM(stats->queued);
75 return rb_struct_new(cListenStats, active, queued);
78 static int st_free_data(st_data_t key, st_data_t value, st_data_t ignored)
80 xfree((void *)key);
81 xfree((void *)value);
83 return ST_DELETE;
86 static int st_to_hash(st_data_t key, st_data_t value, VALUE hash)
88 struct listen_stats *stats = (struct listen_stats *)value;
90 if (stats->listener_p) {
91 VALUE k = rb_str_new2((const char *)key);
92 VALUE v = rb_listen_stats(stats);
94 OBJ_FREEZE(k);
95 rb_hash_aset(hash, k, v);
97 return st_free_data(key, value, 0);
100 static int st_AND_hash(st_data_t key, st_data_t value, VALUE hash)
102 struct listen_stats *stats = (struct listen_stats *)value;
104 if (stats->listener_p) {
105 VALUE k = rb_str_new2((const char *)key);
107 if (rb_hash_lookup(hash, k) == Qtrue) {
108 VALUE v = rb_listen_stats(stats);
109 OBJ_FREEZE(k);
110 rb_hash_aset(hash, k, v);
113 return st_free_data(key, value, 0);
116 static struct listen_stats *stats_for(st_table *table, struct inet_diag_msg *r)
118 char *key, *port;
119 struct listen_stats *stats;
120 size_t keylen;
121 size_t portlen = sizeof("65535");
122 struct sockaddr_storage ss = { 0 };
123 socklen_t len = sizeof(struct sockaddr_storage);
124 int rc;
125 int flags = NI_NUMERICHOST | NI_NUMERICSERV;
127 switch ((ss.ss_family = r->idiag_family)) {
128 case AF_INET: {
129 struct sockaddr_in *in = (struct sockaddr_in *)&ss;
130 in->sin_port = r->id.idiag_sport;
131 in->sin_addr.s_addr = r->id.idiag_src[0];
132 keylen = INET_ADDRSTRLEN;
133 key = alloca(keylen + 1 + portlen);
134 key[keylen] = 0; /* will be ':' later */
135 port = key + keylen + 1;
136 rc = getnameinfo((struct sockaddr *)&ss, len,
137 key, keylen, port, portlen, flags);
139 break;
141 case AF_INET6: {
142 struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)&ss;
143 in6->sin6_port = r->id.idiag_sport;
144 memcpy(&in6->sin6_addr.in6_u.u6_addr32,
145 &r->id.idiag_src, sizeof(__be32[4]));
146 keylen = INET6_ADDRSTRLEN;
147 /* [ ] */
148 key = alloca(1 + keylen + 1 + 1 + portlen);
149 *key = '[';
150 key[1 + keylen + 1] = 0; /* will be ':' later */
151 port = 1 + key + keylen + 1 + 1;
152 rc = getnameinfo((struct sockaddr *)&ss, len,
153 key + 1, keylen, port, portlen, flags);
154 break;
156 default:
157 assert(0 && "unsupported address family, could that be IPv7?!");
159 if (rc != 0) {
160 fprintf(stderr, "BUG: getnameinfo: %s\n"
161 "Please report how you produced this at %s\n",
162 gai_strerror(rc), "raindrops@librelist.com");
163 fflush(stderr);
164 *key = 0;
167 keylen = strlen(key);
168 portlen = strlen(port);
170 switch (ss.ss_family) {
171 case AF_INET:
172 key[keylen] = ':';
173 memmove(key + keylen + 1, port, portlen + 1);
174 break;
175 case AF_INET6:
176 key[keylen] = ']';
177 key[keylen + 1] = ':';
178 memmove(key + keylen + 2, port, portlen + 1);
179 break;
180 default:
181 assert(0 && "unsupported address family, could that be IPv7?!");
184 if (!st_lookup(table, (st_data_t)key, (st_data_t *)&stats)) {
185 char *old_key = key;
187 key = xmalloc(keylen + 1 + portlen + 1);
188 memcpy(key, old_key, keylen + 1 + portlen + 1);
189 stats = xcalloc(1, sizeof(struct listen_stats));
190 st_insert(table, (st_data_t)key, (st_data_t)stats);
192 return stats;
195 static void table_incr_active(st_table *table, struct inet_diag_msg *r)
197 struct listen_stats *stats = stats_for(table, r);
198 ++stats->active;
201 static void table_set_queued(st_table *table, struct inet_diag_msg *r)
203 struct listen_stats *stats = stats_for(table, r);
204 stats->listener_p = 1;
205 stats->queued = r->idiag_rqueue;
208 /* inner loop of inet_diag, called for every socket returned by netlink */
209 static inline void r_acc(struct nogvl_args *args, struct inet_diag_msg *r)
212 * inode == 0 means the connection is still in the listen queue
213 * and has not yet been accept()-ed by the server. The
214 * inet_diag bytecode cannot filter this for us.
216 if (r->idiag_inode == 0)
217 return;
218 if (r->idiag_state == TCP_ESTABLISHED) {
219 if (args->table)
220 table_incr_active(args->table, r);
221 else
222 args->stats.active++;
223 } else { /* if (r->idiag_state == TCP_LISTEN) */
224 if (args->table)
225 table_set_queued(args->table, r);
226 else
227 args->stats.queued = r->idiag_rqueue;
230 * we wont get anything else because of the idiag_states filter
234 static const char err_socket[] = "socket";
235 static const char err_sendmsg[] = "sendmsg";
236 static const char err_recvmsg[] = "recvmsg";
237 static const char err_nlmsg[] = "nlmsg";
239 struct diag_req {
240 struct nlmsghdr nlh;
241 struct inet_diag_req r;
244 static void prep_msghdr(
245 struct msghdr *msg,
246 struct nogvl_args *args,
247 struct sockaddr_nl *nladdr,
248 size_t iovlen)
250 memset(msg, 0, sizeof(struct msghdr));
251 msg->msg_name = (void *)nladdr;
252 msg->msg_namelen = sizeof(struct sockaddr_nl);
253 msg->msg_iov = args->iov;
254 msg->msg_iovlen = iovlen;
257 static void prep_diag_args(
258 struct nogvl_args *args,
259 struct sockaddr_nl *nladdr,
260 struct rtattr *rta,
261 struct diag_req *req,
262 struct msghdr *msg)
264 memset(req, 0, sizeof(struct diag_req));
265 memset(nladdr, 0, sizeof(struct sockaddr_nl));
267 nladdr->nl_family = AF_NETLINK;
269 req->nlh.nlmsg_len = sizeof(struct diag_req) +
270 RTA_LENGTH(args->iov[2].iov_len);
271 req->nlh.nlmsg_type = TCPDIAG_GETSOCK;
272 req->nlh.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
273 req->nlh.nlmsg_pid = getpid();
274 req->r.idiag_states = (1<<TCP_ESTABLISHED) | (1<<TCP_LISTEN);
275 rta->rta_type = INET_DIAG_REQ_BYTECODE;
276 rta->rta_len = RTA_LENGTH(args->iov[2].iov_len);
278 args->iov[0].iov_base = req;
279 args->iov[0].iov_len = sizeof(struct diag_req);
280 args->iov[1].iov_base = rta;
281 args->iov[1].iov_len = sizeof(struct rtattr);
283 prep_msghdr(msg, args, nladdr, 3);
286 static void prep_recvmsg_buf(struct nogvl_args *args)
288 /* reuse buffer that was allocated for bytecode */
289 args->iov[0].iov_len = page_size;
290 args->iov[0].iov_base = args->iov[2].iov_base;
293 /* does the inet_diag stuff with netlink(), this is called w/o GVL */
294 static VALUE diag(void *ptr)
296 struct nogvl_args *args = ptr;
297 struct sockaddr_nl nladdr;
298 struct rtattr rta;
299 struct diag_req req;
300 struct msghdr msg;
301 const char *err = NULL;
302 unsigned seq = ++g_seq;
303 int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_INET_DIAG);
305 if (fd < 0)
306 return (VALUE)err_socket;
308 prep_diag_args(args, &nladdr, &rta, &req, &msg);
309 req.nlh.nlmsg_seq = seq;
311 if (sendmsg(fd, &msg, 0) < 0) {
312 err = err_sendmsg;
313 goto out;
316 prep_recvmsg_buf(args);
318 while (1) {
319 ssize_t readed;
320 struct nlmsghdr *h = (struct nlmsghdr *)args->iov[0].iov_base;
322 prep_msghdr(&msg, args, &nladdr, 1);
323 readed = recvmsg(fd, &msg, 0);
324 if (readed < 0) {
325 if (errno == EINTR)
326 continue;
327 err = err_recvmsg;
328 goto out;
330 if (readed == 0)
331 goto out;
333 for ( ; NLMSG_OK(h, readed); h = NLMSG_NEXT(h, readed)) {
334 if (h->nlmsg_seq != seq)
335 continue;
336 if (h->nlmsg_type == NLMSG_DONE)
337 goto out;
338 if (h->nlmsg_type == NLMSG_ERROR) {
339 err = err_nlmsg;
340 goto out;
342 r_acc(args, NLMSG_DATA(h));
345 out:
347 int save_errno = errno;
348 close(fd);
349 if (err && args->table) {
350 st_foreach(args->table, st_free_data, 0);
351 st_free_table(args->table);
353 errno = save_errno;
355 return (VALUE)err;
358 /* populates sockaddr_storage struct by parsing +addr+ */
359 static void parse_addr(struct sockaddr_storage *inet, VALUE addr)
361 char *host_ptr;
362 char *colon = NULL;
363 char *rbracket = NULL;
364 long host_len;
365 struct addrinfo hints;
366 struct addrinfo *res;
367 int rc;
369 Check_Type(addr, T_STRING);
370 host_ptr = StringValueCStr(addr);
371 host_len = RSTRING_LEN(addr);
372 if (*host_ptr == '[') { /* ipv6 address format (rfc2732) */
373 rbracket = memchr(host_ptr + 1, ']', host_len - 1);
375 if (rbracket == NULL)
376 rb_raise(rb_eArgError, "']' not found in IPv6 addr=%s",
377 host_ptr);
378 if (rbracket[1] != ':')
379 rb_raise(rb_eArgError, "':' not found in IPv6 addr=%s",
380 host_ptr);
381 colon = rbracket + 1;
382 host_ptr++;
383 *rbracket = 0;
384 } else { /* ipv4 */
385 colon = memchr(host_ptr, ':', host_len);
388 if (!colon)
389 rb_raise(rb_eArgError, "port not found in: `%s'", host_ptr);
391 hints.ai_family = AF_UNSPEC;
392 hints.ai_socktype = SOCK_STREAM;
393 hints.ai_protocol = IPPROTO_TCP;
394 hints.ai_flags = AI_NUMERICHOST | AI_NUMERICSERV;
396 *colon = 0;
397 rc = getaddrinfo(host_ptr, colon + 1, &hints, &res);
398 *colon = ':';
399 if (rbracket) *rbracket = ']';
400 if (rc != 0)
401 rb_raise(rb_eArgError, "getaddrinfo(%s): %s",
402 host_ptr, gai_strerror(rc));
404 memcpy(inet, res->ai_addr, res->ai_addrlen);
405 freeaddrinfo(res);
408 /* generates inet_diag bytecode to match all addrs */
409 static void gen_bytecode_all(struct iovec *iov)
411 struct inet_diag_bc_op *op;
412 struct inet_diag_hostcond *cond;
414 /* iov_len was already set and base allocated in a parent function */
415 assert(iov->iov_len == OPLEN && iov->iov_base && "iov invalid");
416 op = iov->iov_base;
417 op->code = INET_DIAG_BC_S_COND;
418 op->yes = OPLEN;
419 op->no = sizeof(struct inet_diag_bc_op) + OPLEN;
420 cond = (struct inet_diag_hostcond *)(op + 1);
421 cond->family = AF_UNSPEC;
422 cond->port = -1;
423 cond->prefix_len = 0;
426 /* generates inet_diag bytecode to match a single addr */
427 static void gen_bytecode(struct iovec *iov, struct sockaddr_storage *inet)
429 struct inet_diag_bc_op *op;
430 struct inet_diag_hostcond *cond;
432 /* iov_len was already set and base allocated in a parent function */
433 assert(iov->iov_len == OPLEN && iov->iov_base && "iov invalid");
434 op = iov->iov_base;
435 op->code = INET_DIAG_BC_S_COND;
436 op->yes = OPLEN;
437 op->no = sizeof(struct inet_diag_bc_op) + OPLEN;
439 cond = (struct inet_diag_hostcond *)(op + 1);
440 cond->family = inet->ss_family;
441 switch (inet->ss_family) {
442 case AF_INET: {
443 struct sockaddr_in *in = (struct sockaddr_in *)inet;
445 cond->port = ntohs(in->sin_port);
446 cond->prefix_len = in->sin_addr.s_addr == 0 ? 0 :
447 sizeof(in->sin_addr.s_addr) * CHAR_BIT;
448 *cond->addr = in->sin_addr.s_addr;
450 break;
451 case AF_INET6: {
452 struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)inet;
454 cond->port = ntohs(in6->sin6_port);
455 cond->prefix_len = memcmp(&in6addr_any, &in6->sin6_addr,
456 sizeof(struct in6_addr)) == 0 ?
457 0 : sizeof(in6->sin6_addr) * CHAR_BIT;
458 memcpy(&cond->addr, &in6->sin6_addr, sizeof(struct in6_addr));
460 break;
461 default:
462 assert(0 && "unsupported address family, could that be IPv7?!");
466 static void nl_errcheck(VALUE r)
468 const char *err = (const char *)r;
470 if (err) {
471 if (err == err_nlmsg)
472 rb_raise(rb_eRuntimeError, "NLMSG_ERROR");
473 else
474 rb_sys_fail(err);
478 static VALUE tcp_stats(struct nogvl_args *args, VALUE addr)
480 struct sockaddr_storage query_addr;
482 parse_addr(&query_addr, addr);
483 gen_bytecode(&args->iov[2], &query_addr);
485 memset(&args->stats, 0, sizeof(struct listen_stats));
486 nl_errcheck(rb_thread_blocking_region(diag, args, 0, 0));
488 return rb_listen_stats(&args->stats);
492 * call-seq:
493 * Raindrops::Linux.tcp_listener_stats([addrs]) => hash
495 * If specified, +addr+ may be a string or array of strings representing
496 * listen addresses to filter for. Returns a hash with given addresses as
497 * keys and ListenStats objects as the values or a hash of all addresses.
499 * addrs = %w(0.0.0.0:80 127.0.0.1:8080)
501 * If +addr+ is nil or not specified, all (IPv4) addresses are returned.
503 static VALUE tcp_listener_stats(int argc, VALUE *argv, VALUE self)
505 VALUE *ary;
506 long i;
507 VALUE rv = rb_hash_new();
508 struct nogvl_args args;
509 VALUE addrs;
511 rb_scan_args(argc, argv, "01", &addrs);
514 * allocating page_size instead of OP_LEN since we'll reuse the
515 * buffer for recvmsg() later, we already checked for
516 * OPLEN <= page_size at initialization
518 args.iov[2].iov_len = OPLEN;
519 args.iov[2].iov_base = alloca(page_size);
520 args.table = NULL;
522 switch (TYPE(addrs)) {
523 case T_STRING:
524 rb_hash_aset(rv, addrs, tcp_stats(&args, addrs));
525 return rv;
526 case T_ARRAY:
527 ary = RARRAY_PTR(addrs);
528 i = RARRAY_LEN(addrs);
529 if (i == 1) {
530 rb_hash_aset(rv, *ary, tcp_stats(&args, *ary));
531 return rv;
533 for (; --i >= 0; ary++) {
534 struct sockaddr_storage check;
536 parse_addr(&check, *ary);
537 rb_hash_aset(rv, *ary, Qtrue);
539 /* fall through */
540 case T_NIL:
541 args.table = st_init_strtable();
542 gen_bytecode_all(&args.iov[2]);
543 break;
544 default:
545 rb_raise(rb_eArgError,
546 "addr must be an array of strings, a string, or nil");
549 nl_errcheck(rb_thread_blocking_region(diag, &args, NULL, 0));
551 st_foreach(args.table, NIL_P(addrs) ? st_to_hash : st_AND_hash, rv);
552 st_free_table(args.table);
553 return rv;
556 void Init_raindrops_linux_inet_diag(void)
558 VALUE cRaindrops = rb_const_get(rb_cObject, rb_intern("Raindrops"));
559 VALUE mLinux = rb_define_module_under(cRaindrops, "Linux");
561 cListenStats = rb_const_get(cRaindrops, rb_intern("ListenStats"));
563 rb_define_module_function(mLinux, "tcp_listener_stats",
564 tcp_listener_stats, -1);
566 page_size = getpagesize();
568 assert(OPLEN <= page_size && "bytecode OPLEN is not <= PAGE_SIZE");
570 #endif /* __linux__ */