2 * Lightweight Autonomic Network Architecture
4 * LANA BSD Socket interface for communication with user level.
5 * PF_LANA protocol family socket handler.
7 * Copyright 2011 Daniel Borkmann <dborkma@tik.ee.ethz.ch>,
8 * Swiss federal institute of technology (ETH Zurich)
12 #include <linux/kernel.h>
13 #include <linux/module.h>
14 #include <linux/spinlock.h>
15 #include <linux/notifier.h>
16 #include <linux/rcupdate.h>
17 #include <linux/seqlock.h>
18 #include <linux/bug.h>
19 #include <linux/percpu.h>
20 #include <linux/prefetch.h>
21 #include <linux/atomic.h>
22 #include <linux/slab.h>
25 #include "xt_fblock.h"
26 #include "xt_builder.h"
29 #include "xt_engine.h"
30 #include "xt_builder.h"
32 #define AF_LANA 27 /* For now.. */
33 #define PF_LANA AF_LANA
35 /* LANA protocol types on top of the PF_LANA family */
36 #define LANA_PROTO_AUTO 0
37 #define LANA_PROTO_RAW 1
40 /* Protocols in LANA family */
41 struct lana_protocol
{
43 const struct proto_ops
*ops
;
48 struct fb_pflana_priv
{
51 struct lana_sock
*sock_self
;
61 static DEFINE_MUTEX(proto_tab_lock
);
63 static struct lana_protocol
*proto_tab
[LANA_NPROTO
] __read_mostly
;
65 static int fb_pflana_netrx(const struct fblock
* const fb
,
67 enum path_type
* const dir
)
69 u8
*skb_head
= skb
->data
;
70 int skb_len
= skb
->len
;
72 struct fb_pflana_priv __percpu
*fb_priv_cpu
;
74 fb_priv_cpu
= this_cpu_ptr(rcu_dereference_raw(fb
->private_data
));
75 sk
= &fb_priv_cpu
->sock_self
->sk
;
77 if (skb_shared(skb
)) {
78 struct sk_buff
*nskb
= skb_clone(skb
, GFP_ATOMIC
);
79 if (skb_head
!= skb
->data
) {
88 sk_receive_skb(sk
, skb
, 0);
90 write_next_idp_to_skb(skb
, fb
->idp
, IDP_UNKNOWN
);
94 static int fb_pflana_event(struct notifier_block
*self
, unsigned long cmd
,
100 static struct fblock
*get_bound_fblock(struct fblock
*self
, enum path_type dir
)
104 struct fb_pflana_priv __percpu
*fb_priv_cpu
;
105 fb_priv_cpu
= this_cpu_ptr(rcu_dereference_raw(self
->private_data
));
107 seq
= read_seqbegin(&fb_priv_cpu
->lock
);
108 fbidp
= fb_priv_cpu
->port
[dir
];
109 } while (read_seqretry(&fb_priv_cpu
->lock
, seq
));
110 return search_fblock(fbidp
);
113 static inline struct lana_sock
*to_lana_sk(const struct sock
*sk
)
115 return container_of(sk
, struct lana_sock
, sk
);
118 static struct fblock
*fb_pflana_ctor(char *name
);
120 static int lana_sk_init(struct sock
* sk
)
124 struct lana_sock
*lana
= to_lana_sk(sk
);
126 memset(name
, 0, sizeof(name
));
127 snprintf(name
, sizeof(name
), "%p", &lana
->sk
);
128 lana
->fb
= fb_pflana_ctor(name
);
132 for_each_online_cpu(cpu
) {
133 struct fb_pflana_priv
*fb_priv_cpu
;
134 fb_priv_cpu
= per_cpu_ptr(lana
->fb
->private_data
, cpu
);
135 fb_priv_cpu
->sock_self
= lana
;
142 static void lana_sk_free(struct sock
*sk
)
144 struct fblock
*fb_bound
;
145 struct lana_sock
*lana
;
147 lana
= to_lana_sk(sk
);
148 fb_bound
= get_bound_fblock(lana
->fb
, TYPE_INGRESS
);
150 fblock_unbind(fb_bound
, lana
->fb
);
151 put_fblock(fb_bound
);
153 fb_bound
= get_bound_fblock(lana
->fb
, TYPE_EGRESS
);
155 fblock_unbind(lana
->fb
, fb_bound
);
156 put_fblock(fb_bound
);
158 unregister_fblock_namespace(lana
->fb
);
161 static int lana_raw_release(struct socket
*sock
)
163 struct sock
*sk
= sock
->sk
;
166 sk
->sk_prot
->close(sk
, 0);
172 static int lana_raw_bind(struct socket
*sock
, struct sockaddr
*addr
, int len
)
175 struct sock
*sk
= sock
->sk
;
176 struct net_device
*dev
= NULL
;
177 struct lana_sock
*lana
= to_lana_sk(sk
);
179 if (len
< sizeof(struct sockaddr
))
181 if (addr
->sa_family
!= AF_LANA
)
184 idx
= addr
->sa_data
[0];
185 dev
= dev_get_by_index(sock_net(sk
), idx
);
195 static unsigned int lana_raw_poll(struct file
*file
, struct socket
*sock
,
198 unsigned int mask
= 0;
199 struct sock
*sk
= sock
->sk
;
200 poll_wait(file
, sk_sleep(sk
), wait
);
201 if (!skb_queue_empty(&sk
->sk_receive_queue
))
202 mask
|= POLLIN
| POLLRDNORM
;
206 static int lana_raw_sendmsg(struct kiocb
*iocb
, struct socket
*sock
,
207 struct msghdr
*msg
, size_t len
)
209 struct sock
*sk
= sock
->sk
;
210 return sk
->sk_prot
->sendmsg(iocb
, sk
, msg
, len
);
213 static int lana_proto_sendmsg(struct kiocb
*iocb
, struct sock
*sk
,
214 struct msghdr
*msg
, size_t len
)
218 struct net
*net
= sock_net(sk
);
219 struct net_device
*dev
;
220 struct sockaddr
*target
;
222 struct lana_sock
*lana
= to_lana_sk(sk
);
223 struct fblock
*fb
= lana
->fb
;
224 struct fb_pflana_priv
*fb_priv_cpu
;
226 if (msg
->msg_name
== NULL
)
227 return -EDESTADDRREQ
;
228 if (msg
->msg_namelen
< sizeof(struct sockaddr
))
231 target
= (struct sockaddr
*) msg
->msg_name
;
232 if (target
->sa_family
!= AF_LANA
)
233 return -EAFNOSUPPORT
;
234 if (sk
->sk_bound_dev_if
|| lana
->bound
)
235 dev
= dev_get_by_index(net
, lana
->bound
? lana
->ifindex
:
236 sk
->sk_bound_dev_if
);
239 if (!dev
|| !(dev
->flags
& IFF_UP
)) {
244 skb
= sock_alloc_send_skb(sk
, LL_ALLOCATED_SPACE(dev
) + len
,
245 msg
->msg_flags
& MSG_DONTWAIT
, &err
);
249 skb_reserve(skb
, LL_RESERVED_SPACE(dev
));
250 skb_reset_mac_header(skb
);
251 skb_reset_network_header(skb
);
253 skb
->pkt_type
= PACKET_OUTGOING
;
256 skb
->protocol
= htons(ETH_P_ALL
); //FIXME
257 skb
->priority
= sk
->sk_priority
;
258 skb
->mark
= sk
->sk_mark
;
260 err
= memcpy_fromiovec((void *) skb_put(skb
, len
), msg
->msg_iov
, len
);
263 if (skb
->pkt_type
== PACKET_LOOPBACK
) {
268 fb_priv_cpu
= this_cpu_ptr(rcu_dereference(fb
->private_data
));
270 seq
= read_seqbegin(&fb_priv_cpu
->lock
);
271 write_next_idp_to_skb(skb
, fb
->idp
, 1
272 /*fb_priv_cpu->port[TYPE_EGRESS]*/);
273 } while (read_seqretry(&fb_priv_cpu
->lock
, seq
));
276 process_packet(skb
, TYPE_EGRESS
);
278 return (err
>= 0) ? len
: err
;
286 static int lana_proto_recvmsg(struct kiocb
*iocb
, struct sock
*sk
,
287 struct msghdr
*msg
, size_t len
, int noblock
,
288 int flags
, int *addr_len
)
294 skb
= skb_recv_datagram(sk
, flags
, noblock
, &err
);
296 if (sk
->sk_shutdown
& RCV_SHUTDOWN
)
300 msg
->msg_namelen
= 0;
302 *addr_len
= msg
->msg_namelen
;
305 msg
->msg_flags
|= MSG_TRUNC
;
308 err
= skb_copy_datagram_iovec(skb
, 0, msg
->msg_iov
, copied
);
310 sock_recv_ts_and_drops(msg
, sk
, skb
);
311 skb_free_datagram(sk
, skb
);
313 return err
? : copied
;
316 static int lana_proto_backlog_rcv(struct sock
*sk
, struct sk_buff
*skb
)
318 int err
= -EPROTONOSUPPORT
;
320 switch (sk
->sk_protocol
) {
322 err
= sock_queue_rcv_skb(sk
, skb
);
328 err
= -EPROTONOSUPPORT
;
332 return err
? NET_RX_DROP
: NET_RX_SUCCESS
;
336 static int lana_common_stream_recvmsg(struct kiocb
*iocb
, struct socket
*sock
,
337 struct msghdr
*msg
, size_t len
, int flags
)
341 size_t target
, chunk
, copied
= 0;
342 struct sock
*sk
= sock
->sk
;
345 msg
->msg_namelen
= 0;
347 timeout
= sock_rcvtimeo(sk
, flags
& MSG_DONTWAIT
);
348 target
= sock_rcvlowat(sk
, flags
& MSG_WAITALL
, len
);
350 skb
= skb_dequeue(&sk
->sk_receive_queue
);
352 if (copied
>= target
)
354 err
= sock_error(sk
);
355 if (err
|| sk
->sk_shutdown
& RCV_SHUTDOWN
)
360 timeout
= sk_wait_data(sk
, &timeout
);
361 if (signal_pending(current
)) {
362 err
= sock_intr_errno(timeout
);
367 chunk
= min_t(size_t, skb
->len
, len
);
368 if (memcpy_toiovec(msg
->msg_iov
, skb
->data
, chunk
)) {
369 skb_queue_head(&sk
->sk_receive_queue
, skb
);
376 sock_recv_ts_and_drops(msg
, sk
, skb
);
377 if (!(flags
& MSG_PEEK
)) {
378 skb_pull(skb
, chunk
);
380 skb_queue_head(&sk
->sk_receive_queue
, skb
);
385 /* put message back and return */
386 skb_queue_head(&sk
->sk_receive_queue
, skb
);
392 return copied
? : err
;
396 static void lana_proto_destruct(struct sock
*sk
)
398 skb_queue_purge(&sk
->sk_receive_queue
);
401 static int lana_proto_init(struct sock
*sk
)
403 sk
->sk_destruct
= lana_proto_destruct
;
407 static void lana_proto_close(struct sock
*sk
, long timeout
)
409 sk_common_release(sk
);
412 static void lana_proto_hash(struct sock
*sk
)
416 static void lana_proto_unhash(struct sock
*sk
)
420 static int lana_proto_get_port(struct sock
*sk
, unsigned short sport
)
425 static struct lana_protocol
*pflana_proto_get(int proto
)
427 struct lana_protocol
*ret
= NULL
;
429 if (proto
< 0 || proto
>= LANA_NPROTO
)
432 ret
= rcu_dereference_raw(proto_tab
[proto
]);
438 static int lana_family_create(struct net
*net
, struct socket
*sock
,
439 int protocol
, int kern
)
442 struct lana_protocol
*lp
;
443 struct lana_sock
*ls
;
445 if (!net_eq(net
, &init_net
))
446 return -EAFNOSUPPORT
;
448 if (protocol
== LANA_PROTO_AUTO
) {
449 switch (sock
->type
) {
451 if (!capable(CAP_SYS_ADMIN
))
453 protocol
= LANA_PROTO_RAW
;
456 return -EPROTONOSUPPORT
;
460 lp
= pflana_proto_get(protocol
);
462 return -EPROTONOSUPPORT
;
464 sk
= sk_alloc(net
, PF_LANA
, GFP_KERNEL
, lp
->proto
);
467 if (lana_sk_init(sk
) < 0) {
472 sock_init_data(sock
, sk
);
473 sock
->state
= SS_UNCONNECTED
;
476 sk
->sk_backlog_rcv
= sk
->sk_prot
->backlog_rcv
;
477 sk
->sk_protocol
= protocol
;
478 sk
->sk_family
= PF_LANA
;
479 sk
->sk_type
= sock
->type
;
480 sk
->sk_prot
->init(sk
);
488 static const struct net_proto_family lana_family_ops
= {
490 .create
= lana_family_create
,
491 .owner
= THIS_MODULE
,
494 static const struct proto_ops lana_raw_ops
= {
496 .owner
= THIS_MODULE
,
498 .release
= lana_raw_release
,
499 .recvmsg
= sock_common_recvmsg
,
500 .sendmsg
= lana_raw_sendmsg
,
501 .poll
= lana_raw_poll
,
502 .bind
= lana_raw_bind
,
503 /* v- not supported */
504 .setsockopt
= sock_no_setsockopt
,
505 .getsockopt
= sock_no_getsockopt
,
506 .connect
= sock_no_connect
,
507 .socketpair
= sock_no_socketpair
,
508 .accept
= sock_no_accept
,
509 .getname
= sock_no_getname
,
510 .ioctl
= sock_no_ioctl
,
511 .listen
= sock_no_listen
,
512 .shutdown
= sock_no_shutdown
,
513 .mmap
= sock_no_mmap
,
514 .sendpage
= sock_no_sendpage
,
517 static struct proto lana_proto __read_mostly
= {
519 .owner
= THIS_MODULE
,
520 .obj_size
= sizeof(struct lana_sock
),
521 .backlog_rcv
= lana_proto_backlog_rcv
,
522 .close
= lana_proto_close
,
523 .init
= lana_proto_init
,
524 .recvmsg
= lana_proto_recvmsg
,
525 .sendmsg
= lana_proto_sendmsg
,
526 .hash
= lana_proto_hash
,
527 .unhash
= lana_proto_unhash
,
528 .get_port
= lana_proto_get_port
,
531 static struct lana_protocol lana_proto_raw __read_mostly
= {
532 .protocol
= LANA_PROTO_RAW
,
533 .ops
= &lana_raw_ops
,
534 .proto
= &lana_proto
,
535 .owner
= THIS_MODULE
,
538 int pflana_proto_register(int proto
, struct lana_protocol
*lp
)
542 if (!lp
|| proto
< 0 || proto
>= LANA_NPROTO
)
544 if (rcu_dereference_raw(proto_tab
[proto
]))
547 err
= proto_register(lp
->proto
, 1);
551 mutex_lock(&proto_tab_lock
);
552 lp
->protocol
= proto
;
553 rcu_assign_pointer(proto_tab
[proto
], lp
);
554 mutex_unlock(&proto_tab_lock
);
557 if (lp
->owner
!= THIS_MODULE
)
558 __module_get(lp
->owner
);
561 EXPORT_SYMBOL(pflana_proto_register
);
563 void pflana_proto_unregister(struct lana_protocol
*lp
)
567 if (lp
->protocol
< 0 || lp
->protocol
>= LANA_NPROTO
)
569 if (!rcu_dereference_raw(proto_tab
[lp
->protocol
]))
572 BUG_ON(proto_tab
[lp
->protocol
] != lp
);
574 mutex_lock(&proto_tab_lock
);
575 rcu_assign_pointer(proto_tab
[lp
->protocol
], NULL
);
576 mutex_unlock(&proto_tab_lock
);
579 proto_unregister(lp
->proto
);
580 if (lp
->owner
!= THIS_MODULE
)
581 module_put(lp
->owner
);
583 EXPORT_SYMBOL(pflana_proto_unregister
);
585 static int init_fb_pflana(void)
588 for (i
= 0; i
< LANA_NPROTO
; ++i
)
589 rcu_assign_pointer(proto_tab
[i
], NULL
);
591 /* Default proto types we definately want to load */
592 ret
= pflana_proto_register(LANA_PROTO_RAW
, &lana_proto_raw
);
596 ret
= sock_register(&lana_family_ops
);
598 pflana_proto_unregister(&lana_proto_raw
);
604 static void cleanup_fb_pflana(void)
607 sock_unregister(PF_LANA
);
608 for (i
= 0; i
< LANA_NPROTO
; ++i
)
609 pflana_proto_unregister(rcu_dereference_raw(proto_tab
[i
]));
612 static struct fblock_factory fb_pflana_factory
;
614 static struct fblock
*fb_pflana_ctor(char *name
)
619 struct fb_pflana_priv __percpu
*fb_priv
;
621 fb
= alloc_fblock(GFP_ATOMIC
);
624 fb_priv
= alloc_percpu(struct fb_pflana_priv
);
628 for_each_online_cpu(cpu
) {
629 struct fb_pflana_priv
*fb_priv_cpu
;
630 fb_priv_cpu
= per_cpu_ptr(fb_priv
, cpu
);
631 seqlock_init(&fb_priv_cpu
->lock
);
632 fb_priv_cpu
->port
[0] = IDP_UNKNOWN
;
633 fb_priv_cpu
->port
[1] = IDP_UNKNOWN
;
637 ret
= init_fblock(fb
, name
, fb_priv
);
640 fb
->netfb_rx
= fb_pflana_netrx
;
641 fb
->event_rx
= fb_pflana_event
;
642 fb
->factory
= &fb_pflana_factory
;
643 ret
= register_fblock_namespace(fb
);
646 __module_get(THIS_MODULE
);
649 cleanup_fblock_ctor(fb
);
651 free_percpu(fb_priv
);
658 static void fb_pflana_dtor(struct fblock
*fb
)
660 free_percpu(rcu_dereference_raw(fb
->private_data
));
661 module_put(THIS_MODULE
);
664 static struct fblock_factory fb_pflana_factory
= {
667 .ctor
= fb_pflana_ctor
,
668 .dtor
= fb_pflana_dtor
,
669 .owner
= THIS_MODULE
,
672 static int __init
init_fb_pflana_module(void)
675 ret
= init_fb_pflana();
678 ret
= register_fblock_type(&fb_pflana_factory
);
684 static void __exit
cleanup_fb_pflana_module(void)
687 unregister_fblock_type(&fb_pflana_factory
);
690 module_init(init_fb_pflana_module
);
691 module_exit(cleanup_fb_pflana_module
);
693 MODULE_LICENSE("GPL");
694 MODULE_AUTHOR("Daniel Borkmann <dborkma@tik.ee.ethz.ch>");
695 MODULE_DESCRIPTION("LANA PF_LANA module");