Added direct xmit test
[ana-net.git] / src / fb_pflana.c
blobc8ec0e8c013070dd73776c23e7e023740e3e3dc6
1 /*
2 * Lightweight Autonomic Network Architecture
4 * LANA BSD Socket interface for communication with user level.
5 * PF_LANA protocol family socket handler.
7 * Copyright 2011 Daniel Borkmann <dborkma@tik.ee.ethz.ch>,
8 * Swiss federal institute of technology (ETH Zurich)
9 * Subject to the GPL.
12 #include <linux/kernel.h>
13 #include <linux/module.h>
14 #include <linux/spinlock.h>
15 #include <linux/notifier.h>
16 #include <linux/rcupdate.h>
17 #include <linux/seqlock.h>
18 #include <linux/bug.h>
19 #include <linux/percpu.h>
20 #include <linux/prefetch.h>
21 #include <linux/atomic.h>
22 #include <linux/slab.h>
23 #include <net/sock.h>
25 #include "xt_fblock.h"
26 #include "xt_builder.h"
27 #include "xt_idp.h"
28 #include "xt_skb.h"
29 #include "xt_engine.h"
30 #include "xt_builder.h"
32 #define AF_LANA 27 /* For now.. */
33 #define PF_LANA AF_LANA
35 /* LANA protocol types on top of the PF_LANA family */
36 #define LANA_PROTO_AUTO 0 /* Auto-select if none is given */
37 #define LANA_PROTO_RAW 1 /* LANA raw proto, currently the only one */
38 /* Total num of protos available */
39 #define LANA_NPROTO 2
41 /* Protocols in LANA family */
42 struct lana_protocol {
43 int protocol;
44 const struct proto_ops *ops;
45 struct proto *proto;
46 struct module *owner;
49 struct fb_pflana_priv {
50 idp_t port[2];
51 seqlock_t lock;
52 struct lana_sock *sock_self;
55 struct lana_sock {
56 struct sock sk;
57 struct fblock *fb;
58 int ifindex;
59 int bound;
62 static DEFINE_MUTEX(proto_tab_lock);
64 static struct lana_protocol *proto_tab[LANA_NPROTO] __read_mostly;
66 static int fb_pflana_netrx(const struct fblock * const fb,
67 struct sk_buff *skb,
68 enum path_type * const dir)
70 u8 *skb_head = skb->data;
71 int skb_len = skb->len;
72 struct sock *sk;
73 struct fb_pflana_priv __percpu *fb_priv_cpu;
75 fb_priv_cpu = this_cpu_ptr(rcu_dereference_raw(fb->private_data));
76 sk = &fb_priv_cpu->sock_self->sk;
78 if (skb_shared(skb)) {
79 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
80 if (skb_head != skb->data) {
81 skb->data = skb_head;
82 skb->len = skb_len;
84 if (nskb == NULL)
85 goto out;
86 kfree_skb(skb);
87 skb = nskb;
89 sk_receive_skb(sk, skb, 0);
90 out:
91 /* We are last in chain. */
92 write_next_idp_to_skb(skb, fb->idp, IDP_UNKNOWN);
93 return PPE_HALT;
96 static int fb_pflana_event(struct notifier_block *self, unsigned long cmd,
97 void *args)
99 int ret = NOTIFY_OK;
100 unsigned int cpu;
101 struct fblock *fb;
102 struct fb_pflana_priv __percpu *fb_priv;
104 rcu_read_lock();
105 fb = rcu_dereference_raw(container_of(self, struct fblock_notifier,
106 nb)->self);
107 fb_priv = (struct fb_pflana_priv __percpu *)
108 rcu_dereference_raw(fb->private_data);
109 rcu_read_unlock();
111 switch (cmd) {
112 case FBLOCK_BIND_IDP: {
113 int bound = 0;
114 struct fblock_bind_msg *msg = args;
115 get_online_cpus();
116 for_each_online_cpu(cpu) {
117 struct fb_pflana_priv *fb_priv_cpu;
118 fb_priv_cpu = per_cpu_ptr(fb_priv, cpu);
119 if (fb_priv_cpu->port[msg->dir] == IDP_UNKNOWN) {
120 write_seqlock(&fb_priv_cpu->lock);
121 fb_priv_cpu->port[msg->dir] = msg->idp;
122 write_sequnlock(&fb_priv_cpu->lock);
123 bound = 1;
124 } else {
125 ret = NOTIFY_BAD;
126 break;
129 put_online_cpus();
130 if (bound)
131 printk(KERN_INFO "[%s::bsdsock] port %s bound to IDP%u\n",
132 fb->name, path_names[msg->dir], msg->idp);
133 } break;
134 case FBLOCK_UNBIND_IDP: {
135 int unbound = 0;
136 struct fblock_bind_msg *msg = args;
137 get_online_cpus();
138 for_each_online_cpu(cpu) {
139 struct fb_pflana_priv *fb_priv_cpu;
140 fb_priv_cpu = per_cpu_ptr(fb_priv, cpu);
141 if (fb_priv_cpu->port[msg->dir] == msg->idp) {
142 write_seqlock(&fb_priv_cpu->lock);
143 fb_priv_cpu->port[msg->dir] = IDP_UNKNOWN;
144 write_sequnlock(&fb_priv_cpu->lock);
145 unbound = 1;
146 } else {
147 ret = NOTIFY_BAD;
148 break;
151 put_online_cpus();
152 if (unbound)
153 printk(KERN_INFO "[%s::bsdsock] port %s unbound\n",
154 fb->name, path_names[msg->dir]);
155 } break;
156 default:
157 break;
160 return ret;
163 static struct fblock *get_bound_fblock(struct fblock *self,
164 enum path_type dir)
166 idp_t fbidp;
167 unsigned int seq;
168 struct fb_pflana_priv __percpu *fb_priv_cpu;
169 fb_priv_cpu = this_cpu_ptr(rcu_dereference_raw(self->private_data));
170 do {
171 seq = read_seqbegin(&fb_priv_cpu->lock);
172 fbidp = fb_priv_cpu->port[dir];
173 } while (read_seqretry(&fb_priv_cpu->lock, seq));
174 return search_fblock(fbidp);
177 static inline struct lana_sock *to_lana_sk(const struct sock *sk)
179 return container_of(sk, struct lana_sock, sk);
182 static struct fblock *fb_pflana_build_fblock(char *name);
184 static int lana_sk_init(struct sock* sk)
186 int cpu;
187 char name[32];
188 struct lana_sock *lana = to_lana_sk(sk);
190 memset(name, 0, sizeof(name));
191 snprintf(name, sizeof(name), "%p", &lana->sk);
192 lana->fb = fb_pflana_build_fblock(name);
193 if (!lana->fb)
194 return -ENOMEM;
195 get_online_cpus();
196 for_each_online_cpu(cpu) {
197 struct fb_pflana_priv *fb_priv_cpu;
198 fb_priv_cpu = per_cpu_ptr(lana->fb->private_data, cpu);
199 fb_priv_cpu->sock_self = lana;
201 put_online_cpus();
202 smp_wmb();
203 return 0;
206 static void fb_pflana_destroy_fblock(struct fblock *fb);
208 static void lana_sk_free(struct sock *sk)
210 struct fblock *fb_bound;
211 struct lana_sock *lana;
213 lana = to_lana_sk(sk);
214 fb_bound = get_bound_fblock(lana->fb, TYPE_INGRESS);
215 if (fb_bound) {
216 fblock_unbind(fb_bound, lana->fb);
217 put_fblock(fb_bound);
219 fb_bound = get_bound_fblock(lana->fb, TYPE_EGRESS);
220 if (fb_bound) {
221 fblock_unbind(lana->fb, fb_bound);
222 put_fblock(fb_bound);
225 fb_pflana_destroy_fblock(lana->fb);
228 static int lana_raw_release(struct socket *sock)
230 struct sock *sk = sock->sk;
231 if (sk) {
232 sock->sk = NULL;
233 sk->sk_prot->close(sk, 0);
234 lana_sk_free(sk);
236 return 0;
239 static int lana_raw_bind(struct socket *sock, struct sockaddr *addr, int len)
241 int idx;
242 struct sock *sk = sock->sk;
243 struct net_device *dev = NULL;
244 struct lana_sock *lana = to_lana_sk(sk);
246 if (len < sizeof(struct sockaddr))
247 return -EINVAL;
248 if (addr->sa_family != AF_LANA)
249 return -EINVAL;
251 idx = addr->sa_data[0];
252 dev = dev_get_by_index(sock_net(sk), idx);
253 if (dev == NULL)
254 return -ENODEV;
255 lana->ifindex = idx;
256 lana->bound = 1;
257 dev_put(dev);
259 return 0;
262 static unsigned int lana_raw_poll(struct file *file, struct socket *sock,
263 poll_table *wait)
265 unsigned int mask = 0;
266 struct sock *sk = sock->sk;
267 poll_wait(file, sk_sleep(sk), wait);
268 if (!skb_queue_empty(&sk->sk_receive_queue))
269 mask |= POLLIN | POLLRDNORM;
270 return mask;
273 static int lana_raw_sendmsg(struct kiocb *iocb, struct socket *sock,
274 struct msghdr *msg, size_t len)
276 struct sock *sk = sock->sk;
277 return sk->sk_prot->sendmsg(iocb, sk, msg, len);
280 /* Todo later: send bound dev from fb_eth, not from userspace */
281 static int lana_proto_sendmsg(struct kiocb *iocb, struct sock *sk,
282 struct msghdr *msg, size_t len)
284 int err;
285 unsigned int seq;
286 struct net *net = sock_net(sk);
287 struct net_device *dev;
288 struct sockaddr *target;
289 struct sk_buff *skb;
290 struct lana_sock *lana = to_lana_sk(sk);
291 struct fblock *fb = lana->fb;
292 struct fb_pflana_priv *fb_priv_cpu;
294 if (msg->msg_name == NULL)
295 return -EDESTADDRREQ;
296 if (msg->msg_namelen < sizeof(struct sockaddr))
297 return -EINVAL;
299 target = (struct sockaddr *) msg->msg_name;
300 if (unlikely(target->sa_family != AF_LANA))
301 return -EAFNOSUPPORT;
303 lock_sock(sk);
304 if (sk->sk_bound_dev_if || lana->bound) {
305 dev = dev_get_by_index(net, lana->bound ? lana->ifindex :
306 sk->sk_bound_dev_if);
307 } else {
308 dev = dev_getfirstbyhwtype(sock_net(sk), ETH_P_ALL); //FIXME
310 release_sock(sk);
312 if (!dev || !(dev->flags & IFF_UP) || unlikely(len > dev->mtu)) {
313 err = -EIO;
314 goto drop_put;
317 skb = sock_alloc_send_skb(sk, LL_ALLOCATED_SPACE(dev) + len,
318 msg->msg_flags & MSG_DONTWAIT, &err);
319 if (!skb)
320 goto drop_put;
322 skb_reserve(skb, LL_RESERVED_SPACE(dev));
324 skb_reset_mac_header(skb);
325 skb_reset_network_header(skb);
327 err = memcpy_fromiovec((void *) skb_put(skb, len), msg->msg_iov, len);
328 if (err < 0)
329 goto drop;
331 skb->dev = dev;
332 skb->sk = sk;
333 skb->protocol = htons(ETH_P_ALL); //FIXME
335 skb_orphan(skb);
337 dev_put(dev);
339 err = dev_queue_xmit(skb);
340 if (err > 0)
341 err = net_xmit_errno(err);
343 #if 0
344 rcu_read_lock();
345 fb_priv_cpu = this_cpu_ptr(rcu_dereference(fb->private_data));
346 do {
347 seq = read_seqbegin(&fb_priv_cpu->lock);
348 write_next_idp_to_skb(skb, fb->idp,
349 fb_priv_cpu->port[TYPE_EGRESS]);
350 } while (read_seqretry(&fb_priv_cpu->lock, seq));
351 rcu_read_unlock();
353 process_packet(skb, TYPE_EGRESS);
354 #endif
356 return (err >= 0) ? len : err;
357 drop:
358 kfree_skb(skb);
359 drop_put:
360 dev_put(dev);
361 return err;
364 static int lana_proto_recvmsg(struct kiocb *iocb, struct sock *sk,
365 struct msghdr *msg, size_t len, int noblock,
366 int flags, int *addr_len)
368 int err = 0;
369 struct sk_buff *skb;
370 size_t copied = 0;
372 skb = skb_recv_datagram(sk, flags, noblock, &err);
373 if (!skb) {
374 if (sk->sk_shutdown & RCV_SHUTDOWN)
375 return 0;
376 return err;
378 msg->msg_namelen = 0;
379 if (addr_len)
380 *addr_len = msg->msg_namelen;
381 copied = skb->len;
382 if (len < copied) {
383 msg->msg_flags |= MSG_TRUNC;
384 copied = len;
386 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
387 if (err == 0)
388 sock_recv_ts_and_drops(msg, sk, skb);
389 skb_free_datagram(sk, skb);
391 return err ? : copied;
394 static int lana_proto_backlog_rcv(struct sock *sk, struct sk_buff *skb)
396 int err = -EPROTONOSUPPORT;
398 switch (sk->sk_protocol) {
399 case LANA_PROTO_RAW:
400 err = sock_queue_rcv_skb(sk, skb);
401 if (err != 0)
402 kfree_skb(skb);
403 break;
404 default:
405 kfree_skb(skb);
406 err = -EPROTONOSUPPORT;
407 break;
410 return err ? NET_RX_DROP : NET_RX_SUCCESS;
413 #if 0 /* unused */
414 static int lana_common_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
415 struct msghdr *msg, size_t len, int flags)
417 int err = 0;
418 long timeout;
419 size_t target, chunk, copied = 0;
420 struct sock *sk = sock->sk;
421 struct sk_buff *skb;
423 msg->msg_namelen = 0;
424 lock_sock(sk);
425 timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
426 target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
427 do {
428 skb = skb_dequeue(&sk->sk_receive_queue);
429 if (!skb) {
430 if (copied >= target)
431 break;
432 err = sock_error(sk);
433 if (err || sk->sk_shutdown & RCV_SHUTDOWN)
434 break;
435 err = -EAGAIN;
436 if (!timeout)
437 break;
438 timeout = sk_wait_data(sk, &timeout);
439 if (signal_pending(current)) {
440 err = sock_intr_errno(timeout);
441 break;
443 continue;
445 chunk = min_t(size_t, skb->len, len);
446 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
447 skb_queue_head(&sk->sk_receive_queue, skb);
448 if (!copied)
449 copied = -EFAULT;
450 break;
452 copied += chunk;
453 len -= chunk;
454 sock_recv_ts_and_drops(msg, sk, skb);
455 if (!(flags & MSG_PEEK)) {
456 skb_pull(skb, chunk);
457 if (skb->len) {
458 skb_queue_head(&sk->sk_receive_queue, skb);
459 break;
461 kfree_skb(skb);
462 } else {
463 /* put message back and return */
464 skb_queue_head(&sk->sk_receive_queue, skb);
465 break;
467 } while (len > 0);
469 release_sock(sk);
470 return copied ? : err;
472 #endif
474 static void lana_proto_destruct(struct sock *sk)
476 skb_queue_purge(&sk->sk_receive_queue);
479 static int lana_proto_init(struct sock *sk)
481 sk->sk_destruct = lana_proto_destruct;
482 return 0;
485 static void lana_proto_close(struct sock *sk, long timeout)
487 sk_common_release(sk);
490 static void lana_proto_hash(struct sock *sk)
494 static void lana_proto_unhash(struct sock *sk)
498 static int lana_proto_get_port(struct sock *sk, unsigned short sport)
500 return 0;
503 static struct lana_protocol *pflana_proto_get(int proto)
505 struct lana_protocol *ret = NULL;
507 if (proto < 0 || proto >= LANA_NPROTO)
508 return NULL;
509 rcu_read_lock();
510 ret = rcu_dereference_raw(proto_tab[proto]);
511 rcu_read_unlock();
513 return ret;
516 static int lana_family_create(struct net *net, struct socket *sock,
517 int protocol, int kern)
519 struct sock *sk;
520 struct lana_protocol *lp;
521 struct lana_sock *ls;
523 if (!net_eq(net, &init_net))
524 return -EAFNOSUPPORT;
526 if (protocol == LANA_PROTO_AUTO) {
527 switch (sock->type) {
528 case SOCK_RAW:
529 if (!capable(CAP_SYS_ADMIN))
530 return -EPERM;
531 protocol = LANA_PROTO_RAW;
532 break;
533 default:
534 return -EPROTONOSUPPORT;
538 lp = pflana_proto_get(protocol);
539 if (!lp)
540 return -EPROTONOSUPPORT;
542 sk = sk_alloc(net, PF_LANA, GFP_KERNEL, lp->proto);
543 if (!sk)
544 return -ENOMEM;
545 if (lana_sk_init(sk) < 0) {
546 sock_put(sk);
547 return -ENOMEM;
550 sock_init_data(sock, sk);
551 sock->state = SS_UNCONNECTED;
552 sock->ops = lp->ops;
554 sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
555 sk->sk_protocol = protocol;
556 sk->sk_family = PF_LANA;
557 sk->sk_type = sock->type;
558 sk->sk_prot->init(sk);
560 ls = to_lana_sk(sk);
561 ls->bound = 0;
563 return 0;
566 static const struct net_proto_family lana_family_ops = {
567 .family = PF_LANA,
568 .create = lana_family_create,
569 .owner = THIS_MODULE,
572 static const struct proto_ops lana_raw_ops = {
573 .family = PF_LANA,
574 .owner = THIS_MODULE,
575 .release = lana_raw_release,
576 .recvmsg = sock_common_recvmsg,
577 .sendmsg = lana_raw_sendmsg,
578 .poll = lana_raw_poll,
579 .bind = lana_raw_bind,
580 .setsockopt = sock_no_setsockopt,
581 .getsockopt = sock_no_getsockopt,
582 .connect = sock_no_connect,
583 .socketpair = sock_no_socketpair,
584 .accept = sock_no_accept,
585 .getname = sock_no_getname,
586 .ioctl = sock_no_ioctl,
587 .listen = sock_no_listen,
588 .shutdown = sock_no_shutdown,
589 .mmap = sock_no_mmap,
590 .sendpage = sock_no_sendpage,
593 static struct proto lana_proto __read_mostly = {
594 .name = "LANA",
595 .owner = THIS_MODULE,
596 .obj_size = sizeof(struct lana_sock),
597 .backlog_rcv = lana_proto_backlog_rcv,
598 .close = lana_proto_close,
599 .init = lana_proto_init,
600 .recvmsg = lana_proto_recvmsg,
601 .sendmsg = lana_proto_sendmsg,
602 .hash = lana_proto_hash,
603 .unhash = lana_proto_unhash,
604 .get_port = lana_proto_get_port,
607 static struct lana_protocol lana_proto_raw __read_mostly = {
608 .protocol = LANA_PROTO_RAW,
609 .ops = &lana_raw_ops,
610 .proto = &lana_proto,
611 .owner = THIS_MODULE,
614 int pflana_proto_register(int proto, struct lana_protocol *lp)
616 int err;
618 if (!lp || proto < 0 || proto >= LANA_NPROTO)
619 return -EINVAL;
620 if (rcu_dereference_raw(proto_tab[proto]))
621 return -EBUSY;
623 err = proto_register(lp->proto, 1);
624 if (err)
625 return err;
627 mutex_lock(&proto_tab_lock);
628 lp->protocol = proto;
629 rcu_assign_pointer(proto_tab[proto], lp);
630 mutex_unlock(&proto_tab_lock);
631 synchronize_rcu();
633 if (lp->owner != THIS_MODULE)
634 __module_get(lp->owner);
635 return 0;
637 EXPORT_SYMBOL(pflana_proto_register);
639 void pflana_proto_unregister(struct lana_protocol *lp)
641 if (!lp)
642 return;
643 if (lp->protocol < 0 || lp->protocol >= LANA_NPROTO)
644 return;
645 if (!rcu_dereference_raw(proto_tab[lp->protocol]))
646 return;
648 BUG_ON(proto_tab[lp->protocol] != lp);
650 mutex_lock(&proto_tab_lock);
651 rcu_assign_pointer(proto_tab[lp->protocol], NULL);
652 mutex_unlock(&proto_tab_lock);
653 synchronize_rcu();
655 proto_unregister(lp->proto);
656 if (lp->owner != THIS_MODULE)
657 module_put(lp->owner);
659 EXPORT_SYMBOL(pflana_proto_unregister);
661 static int init_fb_pflana(void)
663 int ret, i;
664 for (i = 0; i < LANA_NPROTO; ++i)
665 rcu_assign_pointer(proto_tab[i], NULL);
667 ret = pflana_proto_register(LANA_PROTO_RAW, &lana_proto_raw);
668 if (ret)
669 return ret;
671 ret = sock_register(&lana_family_ops);
672 if (ret) {
673 pflana_proto_unregister(&lana_proto_raw);
674 return ret;
676 return 0;
679 static void cleanup_fb_pflana(void)
681 int i;
682 sock_unregister(PF_LANA);
683 for (i = 0; i < LANA_NPROTO; ++i)
684 pflana_proto_unregister(rcu_dereference_raw(proto_tab[i]));
687 static struct fblock *fb_pflana_build_fblock(char *name)
689 int ret = 0;
690 unsigned int cpu;
691 struct fblock *fb;
692 struct fb_pflana_priv __percpu *fb_priv;
694 fb = alloc_fblock(GFP_ATOMIC);
695 if (!fb)
696 return NULL;
697 fb_priv = alloc_percpu(struct fb_pflana_priv);
698 if (!fb_priv)
699 goto err;
700 get_online_cpus();
701 for_each_online_cpu(cpu) {
702 struct fb_pflana_priv *fb_priv_cpu;
703 fb_priv_cpu = per_cpu_ptr(fb_priv, cpu);
704 seqlock_init(&fb_priv_cpu->lock);
705 fb_priv_cpu->port[0] = IDP_UNKNOWN;
706 fb_priv_cpu->port[1] = IDP_UNKNOWN;
708 put_online_cpus();
710 ret = init_fblock(fb, name, fb_priv);
711 if (ret)
712 goto err2;
713 fb->netfb_rx = fb_pflana_netrx;
714 fb->event_rx = fb_pflana_event;
715 fb->factory = NULL;
716 ret = register_fblock_namespace(fb);
717 if (ret)
718 goto err3;
719 __module_get(THIS_MODULE);
720 return fb;
721 err3:
722 cleanup_fblock_ctor(fb);
723 err2:
724 free_percpu(fb_priv);
725 err:
726 kfree_fblock(fb);
727 fb = NULL;
728 return NULL;
731 static void fb_pflana_destroy_fblock(struct fblock *fb)
733 unregister_fblock_namespace_no_rcu(fb);
734 cleanup_fblock(fb);
735 free_percpu(rcu_dereference_raw(fb->private_data));
736 kfree_fblock(fb);
737 module_put(THIS_MODULE);
740 static int __init init_fb_pflana_module(void)
742 return init_fb_pflana();
745 static void __exit cleanup_fb_pflana_module(void)
747 cleanup_fb_pflana();
750 module_init(init_fb_pflana_module);
751 module_exit(cleanup_fb_pflana_module);
753 MODULE_LICENSE("GPL");
754 MODULE_AUTHOR("Daniel Borkmann <dborkma@tik.ee.ethz.ch>");
755 MODULE_DESCRIPTION("LANA PF_LANA module");