2 * net/switchdev/switchdev.c - Switch device API
3 * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
4 * Copyright (c) 2014-2015 Scott Feldman <sfeldma@gmail.com>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
12 #include <linux/kernel.h>
13 #include <linux/types.h>
14 #include <linux/init.h>
15 #include <linux/mutex.h>
16 #include <linux/notifier.h>
17 #include <linux/netdevice.h>
18 #include <net/ip_fib.h>
19 #include <net/switchdev.h>
22 * switchdev_port_attr_get - Get port attribute
25 * @attr: attribute to get
27 int switchdev_port_attr_get(struct net_device
*dev
, struct switchdev_attr
*attr
)
29 const struct switchdev_ops
*ops
= dev
->switchdev_ops
;
30 struct net_device
*lower_dev
;
31 struct list_head
*iter
;
32 struct switchdev_attr first
= {
33 .id
= SWITCHDEV_ATTR_UNDEFINED
35 int err
= -EOPNOTSUPP
;
37 if (ops
&& ops
->switchdev_port_attr_get
)
38 return ops
->switchdev_port_attr_get(dev
, attr
);
40 if (attr
->flags
& SWITCHDEV_F_NO_RECURSE
)
43 /* Switch device port(s) may be stacked under
44 * bond/team/vlan dev, so recurse down to get attr on
45 * each port. Return -ENODATA if attr values don't
46 * compare across ports.
49 netdev_for_each_lower_dev(dev
, lower_dev
, iter
) {
50 err
= switchdev_port_attr_get(lower_dev
, attr
);
53 if (first
.id
== SWITCHDEV_ATTR_UNDEFINED
)
55 else if (memcmp(&first
, attr
, sizeof(*attr
)))
61 EXPORT_SYMBOL_GPL(switchdev_port_attr_get
);
63 static int __switchdev_port_attr_set(struct net_device
*dev
,
64 struct switchdev_attr
*attr
)
66 const struct switchdev_ops
*ops
= dev
->switchdev_ops
;
67 struct net_device
*lower_dev
;
68 struct list_head
*iter
;
69 int err
= -EOPNOTSUPP
;
71 if (ops
&& ops
->switchdev_port_attr_set
)
72 return ops
->switchdev_port_attr_set(dev
, attr
);
74 if (attr
->flags
& SWITCHDEV_F_NO_RECURSE
)
77 /* Switch device port(s) may be stacked under
78 * bond/team/vlan dev, so recurse down to set attr on
82 netdev_for_each_lower_dev(dev
, lower_dev
, iter
) {
83 err
= __switchdev_port_attr_set(lower_dev
, attr
);
91 struct switchdev_attr_set_work
{
92 struct work_struct work
;
93 struct net_device
*dev
;
94 struct switchdev_attr attr
;
97 static void switchdev_port_attr_set_work(struct work_struct
*work
)
99 struct switchdev_attr_set_work
*asw
=
100 container_of(work
, struct switchdev_attr_set_work
, work
);
104 err
= switchdev_port_attr_set(asw
->dev
, &asw
->attr
);
112 static int switchdev_port_attr_set_defer(struct net_device
*dev
,
113 struct switchdev_attr
*attr
)
115 struct switchdev_attr_set_work
*asw
;
117 asw
= kmalloc(sizeof(*asw
), GFP_ATOMIC
);
121 INIT_WORK(&asw
->work
, switchdev_port_attr_set_work
);
125 memcpy(&asw
->attr
, attr
, sizeof(asw
->attr
));
127 schedule_work(&asw
->work
);
133 * switchdev_port_attr_set - Set port attribute
136 * @attr: attribute to set
138 * Use a 2-phase prepare-commit transaction model to ensure
139 * system is not left in a partially updated state due to
140 * failure from driver/device.
142 int switchdev_port_attr_set(struct net_device
*dev
, struct switchdev_attr
*attr
)
146 if (!rtnl_is_locked()) {
147 /* Running prepare-commit transaction across stacked
148 * devices requires nothing moves, so if rtnl_lock is
149 * not held, schedule a worker thread to hold rtnl_lock
150 * while setting attr.
153 return switchdev_port_attr_set_defer(dev
, attr
);
156 /* Phase I: prepare for attr set. Driver/device should fail
157 * here if there are going to be issues in the commit phase,
158 * such as lack of resources or support. The driver/device
159 * should reserve resources needed for the commit phase here,
160 * but should not commit the attr.
163 attr
->trans
= SWITCHDEV_TRANS_PREPARE
;
164 err
= __switchdev_port_attr_set(dev
, attr
);
166 /* Prepare phase failed: abort the transaction. Any
167 * resources reserved in the prepare phase are
171 attr
->trans
= SWITCHDEV_TRANS_ABORT
;
172 __switchdev_port_attr_set(dev
, attr
);
177 /* Phase II: commit attr set. This cannot fail as a fault
178 * of driver/device. If it does, it's a bug in the driver/device
179 * because the driver said everythings was OK in phase I.
182 attr
->trans
= SWITCHDEV_TRANS_COMMIT
;
183 err
= __switchdev_port_attr_set(dev
, attr
);
188 EXPORT_SYMBOL_GPL(switchdev_port_attr_set
);
191 * switchdev_port_stp_update - Notify switch device port of STP
194 * @state: port STP state
196 * Notify switch device port of bridge port STP state change.
198 int switchdev_port_stp_update(struct net_device
*dev
, u8 state
)
200 const struct switchdev_ops
*ops
= dev
->switchdev_ops
;
201 struct net_device
*lower_dev
;
202 struct list_head
*iter
;
203 int err
= -EOPNOTSUPP
;
205 if (ops
&& ops
->switchdev_port_stp_update
)
206 return ops
->switchdev_port_stp_update(dev
, state
);
208 netdev_for_each_lower_dev(dev
, lower_dev
, iter
) {
209 err
= switchdev_port_stp_update(lower_dev
, state
);
210 if (err
&& err
!= -EOPNOTSUPP
)
216 EXPORT_SYMBOL_GPL(switchdev_port_stp_update
);
218 static DEFINE_MUTEX(switchdev_mutex
);
219 static RAW_NOTIFIER_HEAD(switchdev_notif_chain
);
222 * register_switchdev_notifier - Register notifier
223 * @nb: notifier_block
225 * Register switch device notifier. This should be used by code
226 * which needs to monitor events happening in particular device.
227 * Return values are same as for atomic_notifier_chain_register().
229 int register_switchdev_notifier(struct notifier_block
*nb
)
233 mutex_lock(&switchdev_mutex
);
234 err
= raw_notifier_chain_register(&switchdev_notif_chain
, nb
);
235 mutex_unlock(&switchdev_mutex
);
238 EXPORT_SYMBOL_GPL(register_switchdev_notifier
);
241 * unregister_switchdev_notifier - Unregister notifier
242 * @nb: notifier_block
244 * Unregister switch device notifier.
245 * Return values are same as for atomic_notifier_chain_unregister().
247 int unregister_switchdev_notifier(struct notifier_block
*nb
)
251 mutex_lock(&switchdev_mutex
);
252 err
= raw_notifier_chain_unregister(&switchdev_notif_chain
, nb
);
253 mutex_unlock(&switchdev_mutex
);
256 EXPORT_SYMBOL_GPL(unregister_switchdev_notifier
);
259 * call_switchdev_notifiers - Call notifiers
260 * @val: value passed unmodified to notifier function
262 * @info: notifier information data
264 * Call all network notifier blocks. This should be called by driver
265 * when it needs to propagate hardware event.
266 * Return values are same as for atomic_notifier_call_chain().
268 int call_switchdev_notifiers(unsigned long val
, struct net_device
*dev
,
269 struct switchdev_notifier_info
*info
)
274 mutex_lock(&switchdev_mutex
);
275 err
= raw_notifier_call_chain(&switchdev_notif_chain
, val
, info
);
276 mutex_unlock(&switchdev_mutex
);
279 EXPORT_SYMBOL_GPL(call_switchdev_notifiers
);
282 * switchdev_port_bridge_setlink - Notify switch device port of bridge
286 * @nlh: netlink msg with bridge port attributes
287 * @flags: bridge setlink flags
289 * Notify switch device port of bridge port attributes
291 int switchdev_port_bridge_setlink(struct net_device
*dev
,
292 struct nlmsghdr
*nlh
, u16 flags
)
294 const struct net_device_ops
*ops
= dev
->netdev_ops
;
296 if (!(dev
->features
& NETIF_F_HW_SWITCH_OFFLOAD
))
299 if (!ops
->ndo_bridge_setlink
)
302 return ops
->ndo_bridge_setlink(dev
, nlh
, flags
);
304 EXPORT_SYMBOL_GPL(switchdev_port_bridge_setlink
);
307 * switchdev_port_bridge_dellink - Notify switch device port of bridge
308 * port attribute delete
311 * @nlh: netlink msg with bridge port attributes
312 * @flags: bridge setlink flags
314 * Notify switch device port of bridge port attribute delete
316 int switchdev_port_bridge_dellink(struct net_device
*dev
,
317 struct nlmsghdr
*nlh
, u16 flags
)
319 const struct net_device_ops
*ops
= dev
->netdev_ops
;
321 if (!(dev
->features
& NETIF_F_HW_SWITCH_OFFLOAD
))
324 if (!ops
->ndo_bridge_dellink
)
327 return ops
->ndo_bridge_dellink(dev
, nlh
, flags
);
329 EXPORT_SYMBOL_GPL(switchdev_port_bridge_dellink
);
332 * ndo_dflt_switchdev_port_bridge_setlink - default ndo bridge setlink
333 * op for master devices
336 * @nlh: netlink msg with bridge port attributes
337 * @flags: bridge setlink flags
339 * Notify master device slaves of bridge port attributes
341 int ndo_dflt_switchdev_port_bridge_setlink(struct net_device
*dev
,
342 struct nlmsghdr
*nlh
, u16 flags
)
344 struct net_device
*lower_dev
;
345 struct list_head
*iter
;
346 int ret
= 0, err
= 0;
348 if (!(dev
->features
& NETIF_F_HW_SWITCH_OFFLOAD
))
351 netdev_for_each_lower_dev(dev
, lower_dev
, iter
) {
352 err
= switchdev_port_bridge_setlink(lower_dev
, nlh
, flags
);
353 if (err
&& err
!= -EOPNOTSUPP
)
359 EXPORT_SYMBOL_GPL(ndo_dflt_switchdev_port_bridge_setlink
);
362 * ndo_dflt_switchdev_port_bridge_dellink - default ndo bridge dellink
363 * op for master devices
366 * @nlh: netlink msg with bridge port attributes
367 * @flags: bridge dellink flags
369 * Notify master device slaves of bridge port attribute deletes
371 int ndo_dflt_switchdev_port_bridge_dellink(struct net_device
*dev
,
372 struct nlmsghdr
*nlh
, u16 flags
)
374 struct net_device
*lower_dev
;
375 struct list_head
*iter
;
376 int ret
= 0, err
= 0;
378 if (!(dev
->features
& NETIF_F_HW_SWITCH_OFFLOAD
))
381 netdev_for_each_lower_dev(dev
, lower_dev
, iter
) {
382 err
= switchdev_port_bridge_dellink(lower_dev
, nlh
, flags
);
383 if (err
&& err
!= -EOPNOTSUPP
)
389 EXPORT_SYMBOL_GPL(ndo_dflt_switchdev_port_bridge_dellink
);
391 static struct net_device
*switchdev_get_lowest_dev(struct net_device
*dev
)
393 const struct switchdev_ops
*ops
= dev
->switchdev_ops
;
394 struct net_device
*lower_dev
;
395 struct net_device
*port_dev
;
396 struct list_head
*iter
;
398 /* Recusively search down until we find a sw port dev.
399 * (A sw port dev supports switchdev_port_attr_get).
402 if (ops
&& ops
->switchdev_port_attr_get
)
405 netdev_for_each_lower_dev(dev
, lower_dev
, iter
) {
406 port_dev
= switchdev_get_lowest_dev(lower_dev
);
414 static struct net_device
*switchdev_get_dev_by_nhs(struct fib_info
*fi
)
416 struct switchdev_attr attr
= {
417 .id
= SWITCHDEV_ATTR_PORT_PARENT_ID
,
419 struct switchdev_attr prev_attr
;
420 struct net_device
*dev
= NULL
;
423 /* For this route, all nexthop devs must be on the same switch. */
425 for (nhsel
= 0; nhsel
< fi
->fib_nhs
; nhsel
++) {
426 const struct fib_nh
*nh
= &fi
->fib_nh
[nhsel
];
431 dev
= switchdev_get_lowest_dev(nh
->nh_dev
);
435 if (switchdev_port_attr_get(dev
, &attr
))
439 if (prev_attr
.ppid
.id_len
!= attr
.ppid
.id_len
)
441 if (memcmp(prev_attr
.ppid
.id
, attr
.ppid
.id
,
453 * switchdev_fib_ipv4_add - Add IPv4 route entry to switch
455 * @dst: route's IPv4 destination address
456 * @dst_len: destination address length (prefix length)
457 * @fi: route FIB info structure
460 * @nlflags: netlink flags passed in (NLM_F_*)
461 * @tb_id: route table ID
463 * Add IPv4 route entry to switch device.
465 int switchdev_fib_ipv4_add(u32 dst
, int dst_len
, struct fib_info
*fi
,
466 u8 tos
, u8 type
, u32 nlflags
, u32 tb_id
)
468 struct net_device
*dev
;
469 const struct switchdev_ops
*ops
;
472 /* Don't offload route if using custom ip rules or if
473 * IPv4 FIB offloading has been disabled completely.
476 #ifdef CONFIG_IP_MULTIPLE_TABLES
477 if (fi
->fib_net
->ipv4
.fib_has_custom_rules
)
481 if (fi
->fib_net
->ipv4
.fib_offload_disabled
)
484 dev
= switchdev_get_dev_by_nhs(fi
);
487 ops
= dev
->switchdev_ops
;
489 if (ops
->switchdev_fib_ipv4_add
) {
490 err
= ops
->switchdev_fib_ipv4_add(dev
, htonl(dst
), dst_len
,
491 fi
, tos
, type
, nlflags
,
494 fi
->fib_flags
|= RTNH_F_EXTERNAL
;
499 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_add
);
502 * switchdev_fib_ipv4_del - Delete IPv4 route entry from switch
504 * @dst: route's IPv4 destination address
505 * @dst_len: destination address length (prefix length)
506 * @fi: route FIB info structure
509 * @tb_id: route table ID
511 * Delete IPv4 route entry from switch device.
513 int switchdev_fib_ipv4_del(u32 dst
, int dst_len
, struct fib_info
*fi
,
514 u8 tos
, u8 type
, u32 tb_id
)
516 struct net_device
*dev
;
517 const struct switchdev_ops
*ops
;
520 if (!(fi
->fib_flags
& RTNH_F_EXTERNAL
))
523 dev
= switchdev_get_dev_by_nhs(fi
);
526 ops
= dev
->switchdev_ops
;
528 if (ops
->switchdev_fib_ipv4_del
) {
529 err
= ops
->switchdev_fib_ipv4_del(dev
, htonl(dst
), dst_len
,
530 fi
, tos
, type
, tb_id
);
532 fi
->fib_flags
&= ~RTNH_F_EXTERNAL
;
537 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_del
);
540 * switchdev_fib_ipv4_abort - Abort an IPv4 FIB operation
542 * @fi: route FIB info structure
544 void switchdev_fib_ipv4_abort(struct fib_info
*fi
)
546 /* There was a problem installing this route to the offload
547 * device. For now, until we come up with more refined
548 * policy handling, abruptly end IPv4 fib offloading for
549 * for entire net by flushing offload device(s) of all
550 * IPv4 routes, and mark IPv4 fib offloading broken from
551 * this point forward.
554 fib_flush_external(fi
->fib_net
);
555 fi
->fib_net
->ipv4
.fib_offload_disabled
= true;
557 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort
);