pf_lana: release lock in sendmsg after ppe has finished
[ana-net.git] / src / fb_pflana.c
blobcb70cfadad50f6c034f7ff22023ad489852aab82
1 /*
2 * Lightweight Autonomic Network Architecture
4 * LANA BSD Socket interface for communication with user level.
5 * PF_LANA protocol family socket handler.
7 * Copyright 2011 Daniel Borkmann <dborkma@tik.ee.ethz.ch>,
8 * Swiss federal institute of technology (ETH Zurich)
9 * Subject to the GPL.
12 #include <linux/kernel.h>
13 #include <linux/module.h>
14 #include <linux/spinlock.h>
15 #include <linux/notifier.h>
16 #include <linux/rcupdate.h>
17 #include <linux/seqlock.h>
18 #include <linux/bug.h>
19 #include <linux/percpu.h>
20 #include <linux/prefetch.h>
21 #include <linux/atomic.h>
22 #include <linux/slab.h>
23 #include <net/sock.h>
25 #include "xt_fblock.h"
26 #include "xt_builder.h"
27 #include "xt_idp.h"
28 #include "xt_skb.h"
29 #include "xt_engine.h"
30 #include "xt_builder.h"
32 #define AF_LANA 27 /* For now.. */
33 #define PF_LANA AF_LANA
35 /* LANA protocol types on top of the PF_LANA family */
36 #define LANA_PROTO_AUTO 0 /* Auto-select if none is given */
37 #define LANA_PROTO_RAW 1 /* LANA raw proto, currently the only one */
38 /* Total num of protos available */
39 #define LANA_NPROTO 2
41 /* Protocols in LANA family */
42 struct lana_protocol {
43 int protocol;
44 const struct proto_ops *ops;
45 struct proto *proto;
46 struct module *owner;
49 struct fb_pflana_priv {
50 idp_t port[2];
51 seqlock_t lock;
52 struct lana_sock *sock_self;
55 struct lana_sock {
56 struct sock sk;
57 struct fblock *fb;
58 int ifindex;
59 int bound;
62 static DEFINE_MUTEX(proto_tab_lock);
64 static struct lana_protocol *proto_tab[LANA_NPROTO] __read_mostly;
66 static int fb_pflana_netrx(const struct fblock * const fb,
67 struct sk_buff *skb,
68 enum path_type * const dir)
70 u8 *skb_head = skb->data;
71 int skb_len = skb->len;
72 struct sock *sk;
73 struct fb_pflana_priv __percpu *fb_priv_cpu;
75 fb_priv_cpu = this_cpu_ptr(rcu_dereference_raw(fb->private_data));
76 sk = &fb_priv_cpu->sock_self->sk;
78 if (skb_shared(skb)) {
79 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
80 if (skb_head != skb->data) {
81 skb->data = skb_head;
82 skb->len = skb_len;
84 if (nskb == NULL)
85 goto out;
86 kfree_skb(skb);
87 skb = nskb;
89 sock_queue_rcv_skb(sk, skb);
90 out:
91 /* We are last in chain. */
92 write_next_idp_to_skb(skb, fb->idp, IDP_UNKNOWN);
93 return PPE_HALT;
96 static int fb_pflana_event(struct notifier_block *self, unsigned long cmd,
97 void *args)
99 int ret = NOTIFY_OK;
100 unsigned int cpu;
101 struct fblock *fb;
102 struct fb_pflana_priv __percpu *fb_priv;
104 rcu_read_lock();
105 fb = rcu_dereference_raw(container_of(self, struct fblock_notifier,
106 nb)->self);
107 fb_priv = (struct fb_pflana_priv __percpu *)
108 rcu_dereference_raw(fb->private_data);
109 rcu_read_unlock();
111 switch (cmd) {
112 case FBLOCK_BIND_IDP: {
113 int bound = 0;
114 struct fblock_bind_msg *msg = args;
115 get_online_cpus();
116 for_each_online_cpu(cpu) {
117 struct fb_pflana_priv *fb_priv_cpu;
118 fb_priv_cpu = per_cpu_ptr(fb_priv, cpu);
119 if (fb_priv_cpu->port[msg->dir] == IDP_UNKNOWN) {
120 write_seqlock(&fb_priv_cpu->lock);
121 fb_priv_cpu->port[msg->dir] = msg->idp;
122 write_sequnlock(&fb_priv_cpu->lock);
123 bound = 1;
124 } else {
125 ret = NOTIFY_BAD;
126 break;
129 put_online_cpus();
130 if (bound)
131 printk(KERN_INFO "[%s::bsdsock] port %s bound to IDP%u\n",
132 fb->name, path_names[msg->dir], msg->idp);
133 } break;
134 case FBLOCK_UNBIND_IDP: {
135 int unbound = 0;
136 struct fblock_bind_msg *msg = args;
137 get_online_cpus();
138 for_each_online_cpu(cpu) {
139 struct fb_pflana_priv *fb_priv_cpu;
140 fb_priv_cpu = per_cpu_ptr(fb_priv, cpu);
141 if (fb_priv_cpu->port[msg->dir] == msg->idp) {
142 write_seqlock(&fb_priv_cpu->lock);
143 fb_priv_cpu->port[msg->dir] = IDP_UNKNOWN;
144 write_sequnlock(&fb_priv_cpu->lock);
145 unbound = 1;
146 } else {
147 ret = NOTIFY_BAD;
148 break;
151 put_online_cpus();
152 if (unbound)
153 printk(KERN_INFO "[%s::bsdsock] port %s unbound\n",
154 fb->name, path_names[msg->dir]);
155 } break;
156 default:
157 break;
160 return ret;
163 static struct fblock *get_bound_fblock(struct fblock *self,
164 enum path_type dir)
166 idp_t fbidp;
167 unsigned int seq;
168 struct fb_pflana_priv __percpu *fb_priv_cpu;
169 fb_priv_cpu = this_cpu_ptr(rcu_dereference_raw(self->private_data));
170 do {
171 seq = read_seqbegin(&fb_priv_cpu->lock);
172 fbidp = fb_priv_cpu->port[dir];
173 } while (read_seqretry(&fb_priv_cpu->lock, seq));
174 return search_fblock(fbidp);
177 static inline struct lana_sock *to_lana_sk(const struct sock *sk)
179 return container_of(sk, struct lana_sock, sk);
182 static struct fblock *fb_pflana_build_fblock(char *name);
184 static int lana_sk_init(struct sock* sk)
186 int cpu;
187 char name[32];
188 struct lana_sock *lana = to_lana_sk(sk);
190 memset(name, 0, sizeof(name));
191 snprintf(name, sizeof(name), "%p", &lana->sk);
192 lana->fb = fb_pflana_build_fblock(name);
193 if (!lana->fb)
194 return -ENOMEM;
195 get_online_cpus();
196 for_each_online_cpu(cpu) {
197 struct fb_pflana_priv *fb_priv_cpu;
198 fb_priv_cpu = per_cpu_ptr(lana->fb->private_data, cpu);
199 fb_priv_cpu->sock_self = lana;
201 put_online_cpus();
202 smp_wmb();
203 return 0;
206 static void fb_pflana_destroy_fblock(struct fblock *fb);
208 static void lana_sk_free(struct sock *sk)
210 struct fblock *fb_bound;
211 struct lana_sock *lana;
213 lana = to_lana_sk(sk);
214 fb_bound = get_bound_fblock(lana->fb, TYPE_INGRESS);
215 if (fb_bound) {
216 fblock_unbind(fb_bound, lana->fb);
217 put_fblock(fb_bound);
219 fb_bound = get_bound_fblock(lana->fb, TYPE_EGRESS);
220 if (fb_bound) {
221 fblock_unbind(lana->fb, fb_bound);
222 put_fblock(fb_bound);
225 fb_pflana_destroy_fblock(lana->fb);
228 static int lana_raw_release(struct socket *sock)
230 struct sock *sk = sock->sk;
231 if (sk) {
232 sock->sk = NULL;
233 sk->sk_prot->close(sk, 0);
234 lana_sk_free(sk);
236 return 0;
239 static int lana_raw_bind(struct socket *sock, struct sockaddr *addr, int len)
241 int idx;
242 struct sock *sk = sock->sk;
243 struct net_device *dev = NULL;
244 struct lana_sock *lana = to_lana_sk(sk);
246 if (len < sizeof(struct sockaddr))
247 return -EINVAL;
248 if (addr->sa_family != AF_LANA)
249 return -EINVAL;
251 idx = addr->sa_data[0];
252 dev = dev_get_by_index(sock_net(sk), idx);
253 if (dev == NULL)
254 return -ENODEV;
255 lana->ifindex = idx;
256 lana->bound = 1;
257 dev_put(dev);
259 return 0;
262 static unsigned int lana_raw_poll(struct file *file, struct socket *sock,
263 poll_table *wait)
265 unsigned int mask = 0;
266 struct sock *sk = sock->sk;
267 poll_wait(file, sk_sleep(sk), wait);
268 if (!skb_queue_empty(&sk->sk_receive_queue))
269 mask |= POLLIN | POLLRDNORM;
270 return mask;
273 static int lana_raw_sendmsg(struct kiocb *iocb, struct socket *sock,
274 struct msghdr *msg, size_t len)
276 struct sock *sk = sock->sk;
277 return sk->sk_prot->sendmsg(iocb, sk, msg, len);
280 /* Todo later: send bound dev from fb_eth, not from userspace */
281 static int lana_proto_sendmsg(struct kiocb *iocb, struct sock *sk,
282 struct msghdr *msg, size_t len)
284 int err;
285 unsigned int seq;
286 struct net *net = sock_net(sk);
287 struct net_device *dev;
288 struct sockaddr *target;
289 struct sk_buff *skb;
290 struct lana_sock *lana = to_lana_sk(sk);
291 struct fblock *fb = lana->fb;
292 struct fb_pflana_priv *fb_priv_cpu;
294 if (msg->msg_name == NULL)
295 return -EDESTADDRREQ;
296 if (msg->msg_namelen < sizeof(struct sockaddr))
297 return -EINVAL;
299 target = (struct sockaddr *) msg->msg_name;
300 if (unlikely(target->sa_family != AF_LANA))
301 return -EAFNOSUPPORT;
303 lock_sock(sk);
304 if (sk->sk_bound_dev_if || lana->bound) {
305 dev = dev_get_by_index(net, lana->bound ? lana->ifindex :
306 sk->sk_bound_dev_if);
307 } else {
308 dev = dev_getfirstbyhwtype(sock_net(sk), ETH_P_ALL); //FIXME
310 release_sock(sk);
312 if (!dev || !(dev->flags & IFF_UP) || unlikely(len > dev->mtu)) {
313 err = -EIO;
314 goto drop_put;
317 skb = sock_alloc_send_skb(sk, LL_ALLOCATED_SPACE(dev) + len,
318 msg->msg_flags & MSG_DONTWAIT, &err);
319 if (!skb)
320 goto drop_put;
322 skb_reserve(skb, LL_RESERVED_SPACE(dev));
324 skb_reset_mac_header(skb);
325 skb_reset_network_header(skb);
327 err = memcpy_fromiovec((void *) skb_put(skb, len), msg->msg_iov, len);
328 if (err < 0)
329 goto drop;
331 skb->dev = dev;
332 skb->sk = sk;
333 skb->protocol = htons(ETH_P_ALL); //FIXME
334 skb_orphan(skb);
335 dev_put(dev);
337 rcu_read_lock();
338 fb_priv_cpu = this_cpu_ptr(rcu_dereference(fb->private_data));
339 do {
340 seq = read_seqbegin(&fb_priv_cpu->lock);
341 write_next_idp_to_skb(skb, fb->idp,
342 fb_priv_cpu->port[TYPE_EGRESS]);
343 } while (read_seqretry(&fb_priv_cpu->lock, seq));
345 process_packet(skb, TYPE_EGRESS);
346 rcu_read_unlock();
348 return (err >= 0) ? len : err;
349 drop:
350 kfree_skb(skb);
351 drop_put:
352 dev_put(dev);
353 return err;
356 static int lana_proto_recvmsg(struct kiocb *iocb, struct sock *sk,
357 struct msghdr *msg, size_t len, int noblock,
358 int flags, int *addr_len)
360 int err = 0;
361 struct sk_buff *skb;
362 size_t copied = 0;
364 skb = skb_recv_datagram(sk, flags, noblock, &err);
365 if (!skb) {
366 if (sk->sk_shutdown & RCV_SHUTDOWN)
367 return 0;
368 return err;
370 msg->msg_namelen = 0;
371 if (addr_len)
372 *addr_len = msg->msg_namelen;
373 copied = skb->len;
374 if (len < copied) {
375 msg->msg_flags |= MSG_TRUNC;
376 copied = len;
378 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
379 if (err == 0)
380 sock_recv_ts_and_drops(msg, sk, skb);
381 skb_free_datagram(sk, skb);
383 return err ? : copied;
386 static int lana_proto_backlog_rcv(struct sock *sk, struct sk_buff *skb)
388 int err = -EPROTONOSUPPORT;
390 kfree_skb(skb);
391 #if 0
392 switch (sk->sk_protocol) {
393 case LANA_PROTO_RAW:
394 err = sock_queue_rcv_skb(sk, skb);
395 if (err != 0)
396 kfree_skb(skb);
397 break;
398 default:
399 kfree_skb(skb);
400 err = -EPROTONOSUPPORT;
401 break;
403 #endif
404 return err ? NET_RX_DROP : NET_RX_SUCCESS;
407 #if 0 /* unused */
408 static int lana_common_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
409 struct msghdr *msg, size_t len, int flags)
411 int err = 0;
412 long timeout;
413 size_t target, chunk, copied = 0;
414 struct sock *sk = sock->sk;
415 struct sk_buff *skb;
417 msg->msg_namelen = 0;
418 lock_sock(sk);
419 timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
420 target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
421 do {
422 skb = skb_dequeue(&sk->sk_receive_queue);
423 if (!skb) {
424 if (copied >= target)
425 break;
426 err = sock_error(sk);
427 if (err || sk->sk_shutdown & RCV_SHUTDOWN)
428 break;
429 err = -EAGAIN;
430 if (!timeout)
431 break;
432 timeout = sk_wait_data(sk, &timeout);
433 if (signal_pending(current)) {
434 err = sock_intr_errno(timeout);
435 break;
437 continue;
439 chunk = min_t(size_t, skb->len, len);
440 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
441 skb_queue_head(&sk->sk_receive_queue, skb);
442 if (!copied)
443 copied = -EFAULT;
444 break;
446 copied += chunk;
447 len -= chunk;
448 sock_recv_ts_and_drops(msg, sk, skb);
449 if (!(flags & MSG_PEEK)) {
450 skb_pull(skb, chunk);
451 if (skb->len) {
452 skb_queue_head(&sk->sk_receive_queue, skb);
453 break;
455 kfree_skb(skb);
456 } else {
457 /* put message back and return */
458 skb_queue_head(&sk->sk_receive_queue, skb);
459 break;
461 } while (len > 0);
463 release_sock(sk);
464 return copied ? : err;
466 #endif
468 static void lana_proto_destruct(struct sock *sk)
470 skb_queue_purge(&sk->sk_receive_queue);
473 static int lana_proto_init(struct sock *sk)
475 sk->sk_destruct = lana_proto_destruct;
476 return 0;
479 static void lana_proto_close(struct sock *sk, long timeout)
481 sk_common_release(sk);
484 static void lana_proto_hash(struct sock *sk)
488 static void lana_proto_unhash(struct sock *sk)
492 static int lana_proto_get_port(struct sock *sk, unsigned short sport)
494 return 0;
497 static struct lana_protocol *pflana_proto_get(int proto)
499 struct lana_protocol *ret = NULL;
501 if (proto < 0 || proto >= LANA_NPROTO)
502 return NULL;
503 rcu_read_lock();
504 ret = rcu_dereference_raw(proto_tab[proto]);
505 rcu_read_unlock();
507 return ret;
510 static int lana_family_create(struct net *net, struct socket *sock,
511 int protocol, int kern)
513 struct sock *sk;
514 struct lana_protocol *lp;
515 struct lana_sock *ls;
517 if (!net_eq(net, &init_net))
518 return -EAFNOSUPPORT;
520 if (protocol == LANA_PROTO_AUTO) {
521 switch (sock->type) {
522 case SOCK_RAW:
523 if (!capable(CAP_SYS_ADMIN))
524 return -EPERM;
525 protocol = LANA_PROTO_RAW;
526 break;
527 default:
528 return -EPROTONOSUPPORT;
532 lp = pflana_proto_get(protocol);
533 if (!lp)
534 return -EPROTONOSUPPORT;
536 sk = sk_alloc(net, PF_LANA, GFP_KERNEL, lp->proto);
537 if (!sk)
538 return -ENOMEM;
539 if (lana_sk_init(sk) < 0) {
540 sock_put(sk);
541 return -ENOMEM;
544 sock_init_data(sock, sk);
545 sock->state = SS_UNCONNECTED;
546 sock->ops = lp->ops;
548 sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
549 sk->sk_protocol = protocol;
550 sk->sk_family = PF_LANA;
551 sk->sk_type = sock->type;
552 sk->sk_prot->init(sk);
554 ls = to_lana_sk(sk);
555 ls->bound = 0;
557 return 0;
560 static const struct net_proto_family lana_family_ops = {
561 .family = PF_LANA,
562 .create = lana_family_create,
563 .owner = THIS_MODULE,
566 static const struct proto_ops lana_raw_ops = {
567 .family = PF_LANA,
568 .owner = THIS_MODULE,
569 .release = lana_raw_release,
570 .recvmsg = sock_common_recvmsg,
571 .sendmsg = lana_raw_sendmsg,
572 .poll = lana_raw_poll,
573 .bind = lana_raw_bind,
574 .setsockopt = sock_no_setsockopt,
575 .getsockopt = sock_no_getsockopt,
576 .connect = sock_no_connect,
577 .socketpair = sock_no_socketpair,
578 .accept = sock_no_accept,
579 .getname = sock_no_getname,
580 .ioctl = sock_no_ioctl,
581 .listen = sock_no_listen,
582 .shutdown = sock_no_shutdown,
583 .mmap = sock_no_mmap,
584 .sendpage = sock_no_sendpage,
587 static struct proto lana_proto __read_mostly = {
588 .name = "LANA",
589 .owner = THIS_MODULE,
590 .obj_size = sizeof(struct lana_sock),
591 .backlog_rcv = lana_proto_backlog_rcv,
592 .close = lana_proto_close,
593 .init = lana_proto_init,
594 .recvmsg = lana_proto_recvmsg,
595 .sendmsg = lana_proto_sendmsg,
596 .hash = lana_proto_hash,
597 .unhash = lana_proto_unhash,
598 .get_port = lana_proto_get_port,
601 static struct lana_protocol lana_proto_raw __read_mostly = {
602 .protocol = LANA_PROTO_RAW,
603 .ops = &lana_raw_ops,
604 .proto = &lana_proto,
605 .owner = THIS_MODULE,
608 int pflana_proto_register(int proto, struct lana_protocol *lp)
610 int err;
612 if (!lp || proto < 0 || proto >= LANA_NPROTO)
613 return -EINVAL;
614 if (rcu_dereference_raw(proto_tab[proto]))
615 return -EBUSY;
617 err = proto_register(lp->proto, 1);
618 if (err)
619 return err;
621 mutex_lock(&proto_tab_lock);
622 lp->protocol = proto;
623 rcu_assign_pointer(proto_tab[proto], lp);
624 mutex_unlock(&proto_tab_lock);
625 synchronize_rcu();
627 if (lp->owner != THIS_MODULE)
628 __module_get(lp->owner);
629 return 0;
631 EXPORT_SYMBOL(pflana_proto_register);
633 void pflana_proto_unregister(struct lana_protocol *lp)
635 if (!lp)
636 return;
637 if (lp->protocol < 0 || lp->protocol >= LANA_NPROTO)
638 return;
639 if (!rcu_dereference_raw(proto_tab[lp->protocol]))
640 return;
642 BUG_ON(proto_tab[lp->protocol] != lp);
644 mutex_lock(&proto_tab_lock);
645 rcu_assign_pointer(proto_tab[lp->protocol], NULL);
646 mutex_unlock(&proto_tab_lock);
647 synchronize_rcu();
649 proto_unregister(lp->proto);
650 if (lp->owner != THIS_MODULE)
651 module_put(lp->owner);
653 EXPORT_SYMBOL(pflana_proto_unregister);
655 static int init_fb_pflana(void)
657 int ret, i;
658 for (i = 0; i < LANA_NPROTO; ++i)
659 rcu_assign_pointer(proto_tab[i], NULL);
661 ret = pflana_proto_register(LANA_PROTO_RAW, &lana_proto_raw);
662 if (ret)
663 return ret;
665 ret = sock_register(&lana_family_ops);
666 if (ret) {
667 pflana_proto_unregister(&lana_proto_raw);
668 return ret;
670 return 0;
673 static void cleanup_fb_pflana(void)
675 int i;
676 sock_unregister(PF_LANA);
677 for (i = 0; i < LANA_NPROTO; ++i)
678 pflana_proto_unregister(rcu_dereference_raw(proto_tab[i]));
681 static struct fblock *fb_pflana_build_fblock(char *name)
683 int ret = 0;
684 unsigned int cpu;
685 struct fblock *fb;
686 struct fb_pflana_priv __percpu *fb_priv;
688 fb = alloc_fblock(GFP_ATOMIC);
689 if (!fb)
690 return NULL;
691 fb_priv = alloc_percpu(struct fb_pflana_priv);
692 if (!fb_priv)
693 goto err;
694 get_online_cpus();
695 for_each_online_cpu(cpu) {
696 struct fb_pflana_priv *fb_priv_cpu;
697 fb_priv_cpu = per_cpu_ptr(fb_priv, cpu);
698 seqlock_init(&fb_priv_cpu->lock);
699 fb_priv_cpu->port[0] = IDP_UNKNOWN;
700 fb_priv_cpu->port[1] = IDP_UNKNOWN;
702 put_online_cpus();
704 ret = init_fblock(fb, name, fb_priv);
705 if (ret)
706 goto err2;
707 fb->netfb_rx = fb_pflana_netrx;
708 fb->event_rx = fb_pflana_event;
709 fb->factory = NULL;
710 ret = register_fblock_namespace(fb);
711 if (ret)
712 goto err3;
713 __module_get(THIS_MODULE);
714 return fb;
715 err3:
716 cleanup_fblock_ctor(fb);
717 err2:
718 free_percpu(fb_priv);
719 err:
720 kfree_fblock(fb);
721 fb = NULL;
722 return NULL;
725 static void fb_pflana_destroy_fblock(struct fblock *fb)
727 unregister_fblock_namespace_no_rcu(fb);
728 cleanup_fblock(fb);
729 free_percpu(rcu_dereference_raw(fb->private_data));
730 kfree_fblock(fb);
731 module_put(THIS_MODULE);
734 static int __init init_fb_pflana_module(void)
736 return init_fb_pflana();
739 static void __exit cleanup_fb_pflana_module(void)
741 synchronize_rcu();
742 cleanup_fb_pflana();
745 module_init(init_fb_pflana_module);
746 module_exit(cleanup_fb_pflana_module);
748 MODULE_LICENSE("GPL");
749 MODULE_AUTHOR("Daniel Borkmann <dborkma@tik.ee.ethz.ch>");
750 MODULE_DESCRIPTION("LANA PF_LANA module");