2 * Lightweight Autonomic Network Architecture
4 * Ethernet vlink layer. This module allows to operate virtual LANA Ethernet
5 * devices which are configurable via ifconfig et. al. and bound to a real
6 * underlying device. Similar to VLANs, multiple virtual devices can be
7 * bound to a real network device. Multiplexing and demultiplexing happens
10 * Copyright 2011 Daniel Borkmann <dborkma@tik.ee.ethz.ch>,
11 * Swiss federal institute of technology (ETH Zurich)
15 #include <linux/module.h>
16 #include <linux/kernel.h>
17 #include <linux/init.h>
18 #include <linux/notifier.h>
19 #include <linux/netdevice.h>
20 #include <linux/rtnetlink.h>
21 #include <linux/ethtool.h>
22 #include <linux/etherdevice.h>
23 #include <linux/if_ether.h>
24 #include <linux/if_arp.h>
26 #include <linux/list.h>
27 #include <linux/u64_stats_sync.h>
28 #include <net/rtnetlink.h>
31 #include "xt_fblock.h"
33 #define IFF_VLINK_MAS 0x20000 /* Master device */
34 #define IFF_VLINK_DEV 0x40000 /* Slave device */
36 /* Ethernet LANA packet with 10 Bit port ID */
37 #define ETH_P_LANA 0xAC00
45 struct u64_stats_sync syncp
;
50 static struct net_device_ops fb_ethvlink_netdev_ops __read_mostly
;
51 static struct rtnl_link_ops fb_ethvlink_rtnl_ops __read_mostly
;
52 static struct ethtool_ops fb_ethvlink_ethtool_ops __read_mostly
;
53 static struct header_ops fb_ethvlink_header_ops __read_mostly
;
55 static LIST_HEAD(fb_ethvlink_vdevs
);
56 static DEFINE_SPINLOCK(fb_ethvlink_vdevs_lock
);
58 struct fb_ethvlink_private
{
60 struct list_head list
;
61 struct net_device
*self
;
62 struct net_device
*real_dev
;
63 int (*netvif_rx
)(struct sk_buff
*skb
, struct net_device
*dev
);
66 static int fb_ethvlink_init(struct net_device
*dev
)
68 dev
->dstats
= alloc_percpu(struct pcpu_dstats
);
74 static void fb_ethvlink_uninit(struct net_device
*dev
)
76 free_percpu(dev
->dstats
);
79 static int fb_ethvlink_open(struct net_device
*dev
)
81 struct fb_ethvlink_private
*dev_priv
= netdev_priv(dev
);
83 netif_start_queue(dev
);
84 if (netif_carrier_ok(dev_priv
->real_dev
)) {
85 netif_tx_lock_bh(dev
);
86 netif_carrier_on(dev
);
87 netif_tx_unlock_bh(dev
);
93 static int fb_ethvlink_stop(struct net_device
*dev
)
95 netif_tx_lock_bh(dev
);
96 netif_carrier_off(dev
);
97 netif_tx_unlock_bh(dev
);
98 netif_stop_queue(dev
);
103 static inline int fb_ethvlink_real_dev_is_hooked(struct net_device
*dev
)
105 return (dev
->priv_flags
& IFF_VLINK_MAS
) == IFF_VLINK_MAS
;
108 static inline void fb_ethvlink_make_real_dev_hooked(struct net_device
*dev
)
110 dev
->priv_flags
|= IFF_VLINK_MAS
;
113 static inline void fb_ethvlink_make_real_dev_unhooked(struct net_device
*dev
)
115 dev
->priv_flags
&= ~IFF_VLINK_MAS
;
118 static int fb_ethvlink_queue_xmit(struct sk_buff
*skb
,
119 struct net_device
*dev
)
121 struct fb_ethvlink_private
*dev_priv
= netdev_priv(dev
);
123 /* Exit the lana stack here, egress path */
124 netdev_printk(KERN_DEBUG
, dev
, "tx'ed packet!\n");
125 skb_set_dev(skb
, dev_priv
->real_dev
);
126 return dev_queue_xmit(skb
);
130 * Egress path. This is fairly easy, since we enter with our virtual
131 * device and just need to lookup the real networking device, reset the
132 * skb to the real device and enqueue it. Done!
134 netdev_tx_t
fb_ethvlink_start_xmit(struct sk_buff
*skb
,
135 struct net_device
*dev
)
138 struct pcpu_dstats
*dstats
;
140 dstats
= this_cpu_ptr(dev
->dstats
);
141 ret
= fb_ethvlink_queue_xmit(skb
, dev
);
142 if (likely(ret
== NET_XMIT_SUCCESS
|| ret
== NET_XMIT_CN
)) {
143 u64_stats_update_begin(&dstats
->syncp
);
144 dstats
->tx_packets
++;
145 dstats
->tx_bytes
+= skb
->len
;
146 u64_stats_update_end(&dstats
->syncp
);
148 this_cpu_inc(dstats
->tx_dropped
);
153 int fb_ethvlink_handle_frame_virt(struct sk_buff
*skb
,
154 struct net_device
*dev
)
156 /* Enter the lana stack here, ingress path */
157 netdev_printk(KERN_DEBUG
, dev
, "rx'ed packet!\n");
158 return NET_RX_SUCCESS
;
162 * Origin __netif_receive_skb, with rcu_read_lock! We're at a point
163 * where bridging code and macvlan code is usually invoked, so we're
164 * in fast-path on our real device (not virtual!) before all the usual
165 * stack is being processed by deliver_skb! This means we return NULL
166 * if our lana stack processed the packet, so that the rcu_read_lock
167 * gets unlocked and we're done. On the other hand, if we want packages
168 * to be processed by the kernel network stack, we go out by delivering
169 * the valid pointer to the skb. Basically, here's the point where we
170 * demultiplex the ingress path to registered virtual lana devices.
172 static rx_handler_result_t
fb_ethvlink_handle_frame(struct sk_buff
**pskb
)
176 struct sk_buff
*skb
= *pskb
;
177 struct net_device
*dev
;
178 struct fb_ethvlink_private
*vdev
;
179 struct pcpu_dstats
*dstats
;
182 if (unlikely((dev
->flags
& IFF_UP
) != IFF_UP
))
185 if (unlikely(skb
->pkt_type
== PACKET_LOOPBACK
))
186 return RX_HANDLER_PASS
;
188 if (unlikely(!is_valid_ether_addr(eth_hdr(skb
)->h_source
)))
191 skb
= skb_share_check(skb
, GFP_ATOMIC
);
193 return RX_HANDLER_CONSUMED
;
195 if ((eth_hdr(skb
)->h_proto
& __constant_htons(ETH_P_LANA
)) !=
196 __constant_htons(ETH_P_LANA
))
197 return RX_HANDLER_PASS
;
199 vport
= ntohs(eth_hdr(skb
)->h_proto
&
200 ~__constant_htons(ETH_P_LANA
));
202 list_for_each_entry_rcu(vdev
, &fb_ethvlink_vdevs
, list
) {
203 if (vport
== vdev
->port
&& dev
== vdev
->real_dev
) {
204 dstats
= this_cpu_ptr(vdev
->self
->dstats
);
205 ret
= vdev
->netvif_rx(skb
, vdev
->self
);
206 if (ret
== NET_RX_SUCCESS
) {
207 u64_stats_update_begin(&dstats
->syncp
);
208 dstats
->rx_packets
++;
209 dstats
->rx_bytes
+= skb
->len
;
210 u64_stats_update_end(&dstats
->syncp
);
212 this_cpu_inc(dstats
->rx_errors
);
219 return RX_HANDLER_CONSUMED
;
222 static void fb_ethvlink_ethtool_get_drvinfo(struct net_device
*dev
,
223 struct ethtool_drvinfo
*drvinfo
)
225 snprintf(drvinfo
->driver
, sizeof(drvinfo
->driver
), "ethvlink");
226 snprintf(drvinfo
->version
, sizeof(drvinfo
->version
), "0.1");
229 static u32
fb_ethvlink_ethtool_get_rx_csum(struct net_device
*dev
)
231 const struct fb_ethvlink_private
*vdev
= netdev_priv(dev
);
232 return dev_ethtool_get_rx_csum(vdev
->real_dev
);
235 static int fb_ethvlink_ethtool_get_settings(struct net_device
*dev
,
236 struct ethtool_cmd
*cmd
)
238 const struct fb_ethvlink_private
*vdev
= netdev_priv(dev
);
239 return dev_ethtool_get_settings(vdev
->real_dev
, cmd
);
242 static u32
fb_ethvlink_ethtool_get_flags(struct net_device
*dev
)
244 const struct fb_ethvlink_private
*vdev
= netdev_priv(dev
);
245 return dev_ethtool_get_flags(vdev
->real_dev
);
248 static void fb_ethvlink_dev_setup(struct net_device
*dev
)
252 dev
->ethtool_ops
= &fb_ethvlink_ethtool_ops
;
253 dev
->netdev_ops
= &fb_ethvlink_netdev_ops
;
254 dev
->rtnl_link_ops
= &fb_ethvlink_rtnl_ops
;
255 dev
->header_ops
= &fb_ethvlink_header_ops
;
256 dev
->tx_queue_len
= 0;
257 dev
->priv_flags
&= ~IFF_XMIT_DST_RELEASE
;
258 dev
->destructor
= free_netdev
;
260 random_ether_addr(dev
->dev_addr
);
261 memset(dev
->broadcast
, 0, sizeof(dev
->broadcast
));
264 static int fb_ethvlink_validate(struct nlattr
**tb
, struct nlattr
**data
)
266 if (tb
[IFLA_ADDRESS
]) {
267 if (nla_len(tb
[IFLA_ADDRESS
]) != ETH_ALEN
)
269 if (!is_valid_ether_addr(nla_data(tb
[IFLA_ADDRESS
])))
270 return -EADDRNOTAVAIL
;
276 static int fb_ethvlink_create_header(struct sk_buff
*skb
,
277 struct net_device
*dev
,
278 unsigned short type
, const void *daddr
,
279 const void *saddr
, unsigned len
)
281 const struct fb_ethvlink_private
*vdev
= netdev_priv(dev
);
282 return dev_hard_header(skb
, vdev
->real_dev
, type
, daddr
,
283 saddr
? : dev
->dev_addr
, len
);
286 static struct rtnl_link_stats64
*
287 fb_ethvlink_get_stats64(struct net_device
*dev
,
288 struct rtnl_link_stats64
*stats
)
292 for_each_possible_cpu(i
) {
293 u64 tbytes
, tpackets
, rbytes
, rpackets
;
295 const struct pcpu_dstats
*dstats
;
297 dstats
= per_cpu_ptr(dev
->dstats
, i
);
300 start
= u64_stats_fetch_begin(&dstats
->syncp
);
301 tbytes
= dstats
->tx_bytes
;
302 tpackets
= dstats
->tx_packets
;
303 rbytes
= dstats
->rx_bytes
;
304 rpackets
= dstats
->rx_packets
;
305 } while (u64_stats_fetch_retry(&dstats
->syncp
, start
));
307 stats
->tx_bytes
+= tbytes
;
308 stats
->tx_packets
+= tpackets
;
309 stats
->rx_bytes
+= rbytes
;
310 stats
->rx_packets
+= rpackets
;
316 static int fb_ethvlink_add_dev(struct vlinknlmsg
*vhdr
,
317 struct nlmsghdr
*nlh
)
321 struct net_device
*dev
;
322 struct net_device
*root
;
323 struct fb_ethvlink_private
*dev_priv
, *vdev
;
325 if (vhdr
->cmd
!= VLINKNLCMD_ADD_DEVICE
)
326 return NETLINK_VLINK_RX_NXT
;
328 root
= dev_get_by_name(&init_net
, vhdr
->virt_name
);
332 root
= dev_get_by_name(&init_net
, vhdr
->real_name
);
333 if (root
&& (root
->priv_flags
& IFF_VLINK_DEV
) == IFF_VLINK_DEV
)
341 list_for_each_entry_rcu(vdev
, &fb_ethvlink_vdevs
, list
) {
342 if (vdev
->port
== vhdr
->port
) {
349 dev
= alloc_netdev(sizeof(*dev_priv
), vhdr
->virt_name
,
350 fb_ethvlink_dev_setup
);
354 ret
= dev_alloc_name(dev
, dev
->name
);
358 ret
= register_netdev(dev
);
362 dev
->priv_flags
|= vhdr
->flags
;
363 dev
->priv_flags
|= IFF_VLINK_DEV
;
364 dev_priv
= netdev_priv(dev
);
365 dev_priv
->port
= vhdr
->port
;
366 dev_priv
->self
= dev
;
367 dev_priv
->real_dev
= root
;
368 dev_priv
->netvif_rx
= fb_ethvlink_handle_frame_virt
;
370 netif_stacked_transfer_operstate(dev_priv
->real_dev
, dev
);
372 dev_put(dev_priv
->real_dev
);
374 spin_lock_irqsave(&fb_ethvlink_vdevs_lock
, flags
);
375 list_add_rcu(&dev_priv
->list
, &fb_ethvlink_vdevs
);
376 spin_unlock_irqrestore(&fb_ethvlink_vdevs_lock
, flags
);
378 netif_tx_lock_bh(dev
);
379 netif_carrier_off(dev
);
380 netif_tx_unlock_bh(dev
);
382 printk(KERN_INFO
"[lana] %s stacked on carrier %s:%u\n",
383 vhdr
->virt_name
, vhdr
->real_name
, dev_priv
->port
);
384 return NETLINK_VLINK_RX_STOP
;
390 return NETLINK_VLINK_RX_EMERG
;
396 static int fb_ethvlink_start_hook_dev(struct vlinknlmsg
*vhdr
,
397 struct nlmsghdr
*nlh
)
400 struct net_device
*root
;
402 if (vhdr
->cmd
!= VLINKNLCMD_START_HOOK_DEVICE
)
403 return NETLINK_VLINK_RX_NXT
;
405 root
= dev_get_by_name(&init_net
, vhdr
->real_name
);
406 if (root
&& (root
->priv_flags
& IFF_VLINK_DEV
) == IFF_VLINK_DEV
)
409 return NETLINK_VLINK_RX_EMERG
;
411 if (fb_ethvlink_real_dev_is_hooked(root
))
415 ret
= netdev_rx_handler_register(root
, fb_ethvlink_handle_frame
,
421 fb_ethvlink_make_real_dev_hooked(root
);
422 printk(KERN_INFO
"[lana] hook attached to carrier %s\n",
426 return NETLINK_VLINK_RX_STOP
;
429 return NETLINK_VLINK_RX_EMERG
;
432 static int fb_ethvlink_stop_hook_dev(struct vlinknlmsg
*vhdr
,
433 struct nlmsghdr
*nlh
)
435 struct net_device
*root
;
437 if (vhdr
->cmd
!= VLINKNLCMD_STOP_HOOK_DEVICE
)
438 return NETLINK_VLINK_RX_NXT
;
440 root
= dev_get_by_name(&init_net
, vhdr
->real_name
);
441 if (root
&& (root
->priv_flags
& IFF_VLINK_DEV
) == IFF_VLINK_DEV
)
444 return NETLINK_VLINK_RX_EMERG
;
446 if (!fb_ethvlink_real_dev_is_hooked(root
))
450 netdev_rx_handler_unregister(root
);
453 fb_ethvlink_make_real_dev_unhooked(root
);
454 printk(KERN_INFO
"[lana] hook detached from carrier %s\n",
458 return NETLINK_VLINK_RX_STOP
;
461 return NETLINK_VLINK_RX_EMERG
;
464 static void fb_ethvlink_rm_dev_common(struct net_device
*dev
)
466 netif_tx_lock_bh(dev
);
467 netif_carrier_off(dev
);
468 netif_tx_unlock_bh(dev
);
470 printk(KERN_INFO
"[lana] %s unregistered\n", dev
->name
);
473 unregister_netdevice(dev
);
477 static int fb_ethvlink_rm_dev(struct vlinknlmsg
*vhdr
, struct nlmsghdr
*nlh
)
481 struct fb_ethvlink_private
*dev_priv
, *vdev
;
482 struct net_device
*dev
;
484 if (vhdr
->cmd
!= VLINKNLCMD_RM_DEVICE
)
485 return NETLINK_VLINK_RX_NXT
;
487 dev
= dev_get_by_name(&init_net
, vhdr
->virt_name
);
489 return NETLINK_VLINK_RX_EMERG
;
490 if ((dev
->priv_flags
& IFF_VLINK_DEV
) != IFF_VLINK_DEV
)
492 if ((dev
->flags
& IFF_RUNNING
) == IFF_RUNNING
)
496 dev_priv
= netdev_priv(dev
);
500 list_for_each_entry_rcu(vdev
, &fb_ethvlink_vdevs
, list
)
501 if (dev_priv
->real_dev
== vdev
->real_dev
)
506 /* We're last client on carrier! */
507 if (fb_ethvlink_real_dev_is_hooked(dev_priv
->real_dev
)) {
509 netdev_rx_handler_unregister(dev_priv
->real_dev
);
512 fb_ethvlink_make_real_dev_unhooked(dev_priv
->real_dev
);
513 printk(KERN_INFO
"[lana] hook detached from %s\n",
514 dev_priv
->real_dev
->name
);
518 spin_lock_irqsave(&fb_ethvlink_vdevs_lock
, flags
);
519 list_del_rcu(&dev_priv
->list
);
520 spin_unlock_irqrestore(&fb_ethvlink_vdevs_lock
, flags
);
522 fb_ethvlink_rm_dev_common(dev
);
524 return NETLINK_VLINK_RX_STOP
;
528 return NETLINK_VLINK_RX_EMERG
;
531 static int fb_ethvlink_dev_event(struct notifier_block
*self
,
532 unsigned long event
, void *ptr
)
535 struct net_device
*dev
= ptr
;
536 struct fb_ethvlink_private
*vdev
;
537 struct vlinknlmsg vhdr
;
545 list_for_each_entry_rcu(vdev
, &fb_ethvlink_vdevs
, list
)
546 if (vdev
->real_dev
== dev
)
547 netif_stacked_transfer_operstate(vdev
->real_dev
,
551 case NETDEV_FEAT_CHANGE
:
552 /* Nothing right now */
554 case NETDEV_UNREGISTER
:
555 if (dev
->reg_state
!= NETREG_UNREGISTERING
)
558 memset(&vhdr
, 0, sizeof(vhdr
));
559 vhdr
.cmd
= VLINKNLCMD_RM_DEVICE
;
560 spin_lock_irqsave(&fb_ethvlink_vdevs_lock
, flags
);
561 list_for_each_entry_rcu(vdev
, &fb_ethvlink_vdevs
, list
) {
562 if (vdev
->real_dev
== dev
) {
563 memset(vhdr
.virt_name
, 0,
564 sizeof(vhdr
.virt_name
));
565 strlcpy(vhdr
.virt_name
, vdev
->self
->name
,
566 strlen(vdev
->self
->name
));
567 fb_ethvlink_rm_dev(&vhdr
, NULL
);
570 spin_unlock_irqrestore(&fb_ethvlink_vdevs_lock
, flags
);
572 case NETDEV_PRE_TYPE_CHANGE
:
581 static struct ethtool_ops fb_ethvlink_ethtool_ops __read_mostly
= {
582 .get_link
= ethtool_op_get_link
,
583 .get_settings
= fb_ethvlink_ethtool_get_settings
,
584 .get_rx_csum
= fb_ethvlink_ethtool_get_rx_csum
,
585 .get_drvinfo
= fb_ethvlink_ethtool_get_drvinfo
,
586 .get_flags
= fb_ethvlink_ethtool_get_flags
,
589 static struct net_device_ops fb_ethvlink_netdev_ops __read_mostly
= {
590 .ndo_init
= fb_ethvlink_init
,
591 .ndo_uninit
= fb_ethvlink_uninit
,
592 .ndo_open
= fb_ethvlink_open
,
593 .ndo_stop
= fb_ethvlink_stop
,
594 .ndo_start_xmit
= fb_ethvlink_start_xmit
,
595 .ndo_get_stats64
= fb_ethvlink_get_stats64
,
596 .ndo_change_mtu
= eth_change_mtu
,
597 .ndo_set_mac_address
= eth_mac_addr
,
598 .ndo_validate_addr
= eth_validate_addr
,
601 static struct header_ops fb_ethvlink_header_ops __read_mostly
= {
602 .create
= fb_ethvlink_create_header
,
603 .rebuild
= eth_rebuild_header
,
604 .parse
= eth_header_parse
,
605 .cache
= eth_header_cache
,
606 .cache_update
= eth_header_cache_update
,
609 static struct rtnl_link_ops fb_ethvlink_rtnl_ops __read_mostly
= {
611 .priv_size
= sizeof(struct fb_ethvlink_private
),
612 .setup
= fb_ethvlink_dev_setup
,
613 .validate
= fb_ethvlink_validate
,
616 static struct vlink_subsys fb_ethvlink_sys __read_mostly
= {
618 .type
= VLINKNLGRP_ETHERNET
,
619 .rwsem
= __RWSEM_INITIALIZER(fb_ethvlink_sys
.rwsem
),
622 static struct notifier_block fb_ethvlink_notifier_block __read_mostly
= {
623 .notifier_call
= fb_ethvlink_dev_event
,
626 static struct vlink_callback fb_ethvlink_add_dev_cb
=
627 VLINK_CALLBACK_INIT(fb_ethvlink_add_dev
, NETLINK_VLINK_PRIO_NORM
);
628 static struct vlink_callback fb_ethvlink_rm_dev_cb
=
629 VLINK_CALLBACK_INIT(fb_ethvlink_rm_dev
, NETLINK_VLINK_PRIO_NORM
);
630 static struct vlink_callback fb_ethvlink_start_hook_dev_cb
=
631 VLINK_CALLBACK_INIT(fb_ethvlink_start_hook_dev
, NETLINK_VLINK_PRIO_HIGH
);
632 static struct vlink_callback fb_ethvlink_stop_hook_dev_cb
=
633 VLINK_CALLBACK_INIT(fb_ethvlink_stop_hook_dev
, NETLINK_VLINK_PRIO_HIGH
);
635 static int __init
init_fb_ethvlink_module(void)
639 ret
= vlink_subsys_register(&fb_ethvlink_sys
);
643 vlink_add_callback(&fb_ethvlink_sys
, &fb_ethvlink_add_dev_cb
);
644 vlink_add_callback(&fb_ethvlink_sys
, &fb_ethvlink_rm_dev_cb
);
645 vlink_add_callback(&fb_ethvlink_sys
, &fb_ethvlink_start_hook_dev_cb
);
646 vlink_add_callback(&fb_ethvlink_sys
, &fb_ethvlink_stop_hook_dev_cb
);
648 ret
= rtnl_link_register(&fb_ethvlink_rtnl_ops
);
652 register_netdevice_notifier(&fb_ethvlink_notifier_block
);
654 printk(KERN_INFO
"[lana] Ethernet vlink layer loaded!\n");
658 vlink_subsys_unregister_batch(&fb_ethvlink_sys
);
662 static void __exit
cleanup_fb_ethvlink_module(void)
664 struct fb_ethvlink_private
*vdev
;
667 list_for_each_entry_rcu(vdev
, &fb_ethvlink_vdevs
, list
) {
668 if (fb_ethvlink_real_dev_is_hooked(vdev
->real_dev
)) {
670 netdev_rx_handler_unregister(vdev
->real_dev
);
673 fb_ethvlink_make_real_dev_unhooked(vdev
->real_dev
);
674 printk(KERN_INFO
"[lana] hook detached from %s\n",
675 vdev
->real_dev
->name
);
678 fb_ethvlink_rm_dev_common(vdev
->self
);
682 unregister_netdevice_notifier(&fb_ethvlink_notifier_block
);
683 rtnl_link_unregister(&fb_ethvlink_rtnl_ops
);
684 vlink_subsys_unregister_batch(&fb_ethvlink_sys
);
686 printk(KERN_INFO
"[lana] Ethernet vlink layer removed!\n");
689 module_init(init_fb_ethvlink_module
);
690 module_exit(cleanup_fb_ethvlink_module
);
692 MODULE_ALIAS_RTNL_LINK("lana");
693 MODULE_LICENSE("GPL");
694 MODULE_AUTHOR("Daniel Borkmann <dborkma@tik.ee.ethz.ch>");
695 MODULE_DESCRIPTION("Ethernet virtual link layer driver");