switchdev: convert STP update to switchdev attr set
[deliverable/linux.git] / net / switchdev / switchdev.c
CommitLineData
007f790c
JP
1/*
2 * net/switchdev/switchdev.c - Switch device API
3 * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
f8f21471 4 * Copyright (c) 2014-2015 Scott Feldman <sfeldma@gmail.com>
007f790c
JP
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 */
11
12#include <linux/kernel.h>
13#include <linux/types.h>
14#include <linux/init.h>
03bf0c28
JP
15#include <linux/mutex.h>
16#include <linux/notifier.h>
007f790c 17#include <linux/netdevice.h>
5e8d9049 18#include <net/ip_fib.h>
007f790c
JP
19#include <net/switchdev.h>
20
3094333d
SF
21/**
22 * switchdev_port_attr_get - Get port attribute
23 *
24 * @dev: port device
25 * @attr: attribute to get
26 */
27int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr)
28{
29 const struct switchdev_ops *ops = dev->switchdev_ops;
30 struct net_device *lower_dev;
31 struct list_head *iter;
32 struct switchdev_attr first = {
33 .id = SWITCHDEV_ATTR_UNDEFINED
34 };
35 int err = -EOPNOTSUPP;
36
37 if (ops && ops->switchdev_port_attr_get)
38 return ops->switchdev_port_attr_get(dev, attr);
39
40 if (attr->flags & SWITCHDEV_F_NO_RECURSE)
41 return err;
42
43 /* Switch device port(s) may be stacked under
44 * bond/team/vlan dev, so recurse down to get attr on
45 * each port. Return -ENODATA if attr values don't
46 * compare across ports.
47 */
48
49 netdev_for_each_lower_dev(dev, lower_dev, iter) {
50 err = switchdev_port_attr_get(lower_dev, attr);
51 if (err)
52 break;
53 if (first.id == SWITCHDEV_ATTR_UNDEFINED)
54 first = *attr;
55 else if (memcmp(&first, attr, sizeof(*attr)))
56 return -ENODATA;
57 }
58
59 return err;
60}
61EXPORT_SYMBOL_GPL(switchdev_port_attr_get);
62
63static int __switchdev_port_attr_set(struct net_device *dev,
64 struct switchdev_attr *attr)
65{
66 const struct switchdev_ops *ops = dev->switchdev_ops;
67 struct net_device *lower_dev;
68 struct list_head *iter;
69 int err = -EOPNOTSUPP;
70
71 if (ops && ops->switchdev_port_attr_set)
72 return ops->switchdev_port_attr_set(dev, attr);
73
74 if (attr->flags & SWITCHDEV_F_NO_RECURSE)
75 return err;
76
77 /* Switch device port(s) may be stacked under
78 * bond/team/vlan dev, so recurse down to set attr on
79 * each port.
80 */
81
82 netdev_for_each_lower_dev(dev, lower_dev, iter) {
83 err = __switchdev_port_attr_set(lower_dev, attr);
84 if (err)
85 break;
86 }
87
88 return err;
89}
90
91struct switchdev_attr_set_work {
92 struct work_struct work;
93 struct net_device *dev;
94 struct switchdev_attr attr;
95};
96
97static void switchdev_port_attr_set_work(struct work_struct *work)
98{
99 struct switchdev_attr_set_work *asw =
100 container_of(work, struct switchdev_attr_set_work, work);
101 int err;
102
103 rtnl_lock();
104 err = switchdev_port_attr_set(asw->dev, &asw->attr);
105 BUG_ON(err);
106 rtnl_unlock();
107
108 dev_put(asw->dev);
109 kfree(work);
110}
111
112static int switchdev_port_attr_set_defer(struct net_device *dev,
113 struct switchdev_attr *attr)
114{
115 struct switchdev_attr_set_work *asw;
116
117 asw = kmalloc(sizeof(*asw), GFP_ATOMIC);
118 if (!asw)
119 return -ENOMEM;
120
121 INIT_WORK(&asw->work, switchdev_port_attr_set_work);
122
123 dev_hold(dev);
124 asw->dev = dev;
125 memcpy(&asw->attr, attr, sizeof(asw->attr));
126
127 schedule_work(&asw->work);
128
129 return 0;
130}
131
132/**
133 * switchdev_port_attr_set - Set port attribute
134 *
135 * @dev: port device
136 * @attr: attribute to set
137 *
138 * Use a 2-phase prepare-commit transaction model to ensure
139 * system is not left in a partially updated state due to
140 * failure from driver/device.
141 */
142int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr)
143{
144 int err;
145
146 if (!rtnl_is_locked()) {
147 /* Running prepare-commit transaction across stacked
148 * devices requires nothing moves, so if rtnl_lock is
149 * not held, schedule a worker thread to hold rtnl_lock
150 * while setting attr.
151 */
152
153 return switchdev_port_attr_set_defer(dev, attr);
154 }
155
156 /* Phase I: prepare for attr set. Driver/device should fail
157 * here if there are going to be issues in the commit phase,
158 * such as lack of resources or support. The driver/device
159 * should reserve resources needed for the commit phase here,
160 * but should not commit the attr.
161 */
162
163 attr->trans = SWITCHDEV_TRANS_PREPARE;
164 err = __switchdev_port_attr_set(dev, attr);
165 if (err) {
166 /* Prepare phase failed: abort the transaction. Any
167 * resources reserved in the prepare phase are
168 * released.
169 */
170
171 attr->trans = SWITCHDEV_TRANS_ABORT;
172 __switchdev_port_attr_set(dev, attr);
173
174 return err;
175 }
176
177 /* Phase II: commit attr set. This cannot fail as a fault
178 * of driver/device. If it does, it's a bug in the driver/device
179 * because the driver said everythings was OK in phase I.
180 */
181
182 attr->trans = SWITCHDEV_TRANS_COMMIT;
183 err = __switchdev_port_attr_set(dev, attr);
184 BUG_ON(err);
185
186 return err;
187}
188EXPORT_SYMBOL_GPL(switchdev_port_attr_set);
189
ebb9a03a
JP
190static DEFINE_MUTEX(switchdev_mutex);
191static RAW_NOTIFIER_HEAD(switchdev_notif_chain);
03bf0c28
JP
192
193/**
ebb9a03a 194 * register_switchdev_notifier - Register notifier
03bf0c28
JP
195 * @nb: notifier_block
196 *
197 * Register switch device notifier. This should be used by code
198 * which needs to monitor events happening in particular device.
199 * Return values are same as for atomic_notifier_chain_register().
200 */
ebb9a03a 201int register_switchdev_notifier(struct notifier_block *nb)
03bf0c28
JP
202{
203 int err;
204
ebb9a03a
JP
205 mutex_lock(&switchdev_mutex);
206 err = raw_notifier_chain_register(&switchdev_notif_chain, nb);
207 mutex_unlock(&switchdev_mutex);
03bf0c28
JP
208 return err;
209}
ebb9a03a 210EXPORT_SYMBOL_GPL(register_switchdev_notifier);
03bf0c28
JP
211
212/**
ebb9a03a 213 * unregister_switchdev_notifier - Unregister notifier
03bf0c28
JP
214 * @nb: notifier_block
215 *
216 * Unregister switch device notifier.
217 * Return values are same as for atomic_notifier_chain_unregister().
218 */
ebb9a03a 219int unregister_switchdev_notifier(struct notifier_block *nb)
03bf0c28
JP
220{
221 int err;
222
ebb9a03a
JP
223 mutex_lock(&switchdev_mutex);
224 err = raw_notifier_chain_unregister(&switchdev_notif_chain, nb);
225 mutex_unlock(&switchdev_mutex);
03bf0c28
JP
226 return err;
227}
ebb9a03a 228EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
03bf0c28
JP
229
230/**
ebb9a03a 231 * call_switchdev_notifiers - Call notifiers
03bf0c28
JP
232 * @val: value passed unmodified to notifier function
233 * @dev: port device
234 * @info: notifier information data
235 *
236 * Call all network notifier blocks. This should be called by driver
237 * when it needs to propagate hardware event.
238 * Return values are same as for atomic_notifier_call_chain().
239 */
ebb9a03a
JP
240int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
241 struct switchdev_notifier_info *info)
03bf0c28
JP
242{
243 int err;
244
245 info->dev = dev;
ebb9a03a
JP
246 mutex_lock(&switchdev_mutex);
247 err = raw_notifier_call_chain(&switchdev_notif_chain, val, info);
248 mutex_unlock(&switchdev_mutex);
03bf0c28
JP
249 return err;
250}
ebb9a03a 251EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
8a44dbb2
RP
252
253/**
ebb9a03a 254 * switchdev_port_bridge_setlink - Notify switch device port of bridge
8a44dbb2
RP
255 * port attributes
256 *
257 * @dev: port device
258 * @nlh: netlink msg with bridge port attributes
259 * @flags: bridge setlink flags
260 *
261 * Notify switch device port of bridge port attributes
262 */
ebb9a03a
JP
263int switchdev_port_bridge_setlink(struct net_device *dev,
264 struct nlmsghdr *nlh, u16 flags)
8a44dbb2
RP
265{
266 const struct net_device_ops *ops = dev->netdev_ops;
267
268 if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD))
269 return 0;
270
271 if (!ops->ndo_bridge_setlink)
272 return -EOPNOTSUPP;
273
274 return ops->ndo_bridge_setlink(dev, nlh, flags);
275}
ebb9a03a 276EXPORT_SYMBOL_GPL(switchdev_port_bridge_setlink);
8a44dbb2
RP
277
278/**
ebb9a03a 279 * switchdev_port_bridge_dellink - Notify switch device port of bridge
8a44dbb2
RP
280 * port attribute delete
281 *
282 * @dev: port device
283 * @nlh: netlink msg with bridge port attributes
284 * @flags: bridge setlink flags
285 *
286 * Notify switch device port of bridge port attribute delete
287 */
ebb9a03a
JP
288int switchdev_port_bridge_dellink(struct net_device *dev,
289 struct nlmsghdr *nlh, u16 flags)
8a44dbb2
RP
290{
291 const struct net_device_ops *ops = dev->netdev_ops;
292
293 if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD))
294 return 0;
295
296 if (!ops->ndo_bridge_dellink)
297 return -EOPNOTSUPP;
298
299 return ops->ndo_bridge_dellink(dev, nlh, flags);
300}
ebb9a03a 301EXPORT_SYMBOL_GPL(switchdev_port_bridge_dellink);
8a44dbb2
RP
302
303/**
ebb9a03a
JP
304 * ndo_dflt_switchdev_port_bridge_setlink - default ndo bridge setlink
305 * op for master devices
8a44dbb2
RP
306 *
307 * @dev: port device
308 * @nlh: netlink msg with bridge port attributes
309 * @flags: bridge setlink flags
310 *
311 * Notify master device slaves of bridge port attributes
312 */
ebb9a03a
JP
313int ndo_dflt_switchdev_port_bridge_setlink(struct net_device *dev,
314 struct nlmsghdr *nlh, u16 flags)
8a44dbb2
RP
315{
316 struct net_device *lower_dev;
317 struct list_head *iter;
318 int ret = 0, err = 0;
319
320 if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD))
321 return ret;
322
323 netdev_for_each_lower_dev(dev, lower_dev, iter) {
ebb9a03a 324 err = switchdev_port_bridge_setlink(lower_dev, nlh, flags);
8a44dbb2
RP
325 if (err && err != -EOPNOTSUPP)
326 ret = err;
327 }
328
329 return ret;
330}
ebb9a03a 331EXPORT_SYMBOL_GPL(ndo_dflt_switchdev_port_bridge_setlink);
8a44dbb2
RP
332
333/**
ebb9a03a
JP
334 * ndo_dflt_switchdev_port_bridge_dellink - default ndo bridge dellink
335 * op for master devices
8a44dbb2
RP
336 *
337 * @dev: port device
338 * @nlh: netlink msg with bridge port attributes
339 * @flags: bridge dellink flags
340 *
341 * Notify master device slaves of bridge port attribute deletes
342 */
ebb9a03a
JP
343int ndo_dflt_switchdev_port_bridge_dellink(struct net_device *dev,
344 struct nlmsghdr *nlh, u16 flags)
8a44dbb2
RP
345{
346 struct net_device *lower_dev;
347 struct list_head *iter;
348 int ret = 0, err = 0;
349
350 if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD))
351 return ret;
352
353 netdev_for_each_lower_dev(dev, lower_dev, iter) {
ebb9a03a 354 err = switchdev_port_bridge_dellink(lower_dev, nlh, flags);
8a44dbb2
RP
355 if (err && err != -EOPNOTSUPP)
356 ret = err;
357 }
358
359 return ret;
360}
ebb9a03a 361EXPORT_SYMBOL_GPL(ndo_dflt_switchdev_port_bridge_dellink);
5e8d9049 362
ebb9a03a 363static struct net_device *switchdev_get_lowest_dev(struct net_device *dev)
b5d6fbde 364{
9d47c0a2 365 const struct switchdev_ops *ops = dev->switchdev_ops;
b5d6fbde
SF
366 struct net_device *lower_dev;
367 struct net_device *port_dev;
368 struct list_head *iter;
369
370 /* Recusively search down until we find a sw port dev.
f8e20a9f 371 * (A sw port dev supports switchdev_port_attr_get).
b5d6fbde
SF
372 */
373
f8e20a9f 374 if (ops && ops->switchdev_port_attr_get)
b5d6fbde
SF
375 return dev;
376
377 netdev_for_each_lower_dev(dev, lower_dev, iter) {
ebb9a03a 378 port_dev = switchdev_get_lowest_dev(lower_dev);
b5d6fbde
SF
379 if (port_dev)
380 return port_dev;
381 }
382
383 return NULL;
384}
385
ebb9a03a 386static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi)
b5d6fbde 387{
f8e20a9f
SF
388 struct switchdev_attr attr = {
389 .id = SWITCHDEV_ATTR_PORT_PARENT_ID,
390 };
391 struct switchdev_attr prev_attr;
b5d6fbde
SF
392 struct net_device *dev = NULL;
393 int nhsel;
394
395 /* For this route, all nexthop devs must be on the same switch. */
396
397 for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
398 const struct fib_nh *nh = &fi->fib_nh[nhsel];
399
400 if (!nh->nh_dev)
401 return NULL;
402
ebb9a03a 403 dev = switchdev_get_lowest_dev(nh->nh_dev);
b5d6fbde
SF
404 if (!dev)
405 return NULL;
406
f8e20a9f 407 if (switchdev_port_attr_get(dev, &attr))
b5d6fbde
SF
408 return NULL;
409
410 if (nhsel > 0) {
f8e20a9f 411 if (prev_attr.ppid.id_len != attr.ppid.id_len)
b5d6fbde 412 return NULL;
f8e20a9f
SF
413 if (memcmp(prev_attr.ppid.id, attr.ppid.id,
414 attr.ppid.id_len))
b5d6fbde
SF
415 return NULL;
416 }
417
f8e20a9f 418 prev_attr = attr;
b5d6fbde
SF
419 }
420
421 return dev;
422}
423
5e8d9049 424/**
ebb9a03a 425 * switchdev_fib_ipv4_add - Add IPv4 route entry to switch
5e8d9049
SF
426 *
427 * @dst: route's IPv4 destination address
428 * @dst_len: destination address length (prefix length)
429 * @fi: route FIB info structure
430 * @tos: route TOS
431 * @type: route type
f8f21471 432 * @nlflags: netlink flags passed in (NLM_F_*)
5e8d9049
SF
433 * @tb_id: route table ID
434 *
435 * Add IPv4 route entry to switch device.
436 */
ebb9a03a
JP
437int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
438 u8 tos, u8 type, u32 nlflags, u32 tb_id)
5e8d9049 439{
b5d6fbde 440 struct net_device *dev;
9d47c0a2 441 const struct switchdev_ops *ops;
b5d6fbde
SF
442 int err = 0;
443
8e05fd71
SF
444 /* Don't offload route if using custom ip rules or if
445 * IPv4 FIB offloading has been disabled completely.
446 */
447
e1315db1
SF
448#ifdef CONFIG_IP_MULTIPLE_TABLES
449 if (fi->fib_net->ipv4.fib_has_custom_rules)
450 return 0;
451#endif
452
453 if (fi->fib_net->ipv4.fib_offload_disabled)
104616e7
SF
454 return 0;
455
ebb9a03a 456 dev = switchdev_get_dev_by_nhs(fi);
b5d6fbde
SF
457 if (!dev)
458 return 0;
9d47c0a2 459 ops = dev->switchdev_ops;
b5d6fbde 460
9d47c0a2
JP
461 if (ops->switchdev_fib_ipv4_add) {
462 err = ops->switchdev_fib_ipv4_add(dev, htonl(dst), dst_len,
463 fi, tos, type, nlflags,
464 tb_id);
b5d6fbde
SF
465 if (!err)
466 fi->fib_flags |= RTNH_F_EXTERNAL;
467 }
468
469 return err;
5e8d9049 470}
ebb9a03a 471EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_add);
5e8d9049
SF
472
473/**
ebb9a03a 474 * switchdev_fib_ipv4_del - Delete IPv4 route entry from switch
5e8d9049
SF
475 *
476 * @dst: route's IPv4 destination address
477 * @dst_len: destination address length (prefix length)
478 * @fi: route FIB info structure
479 * @tos: route TOS
480 * @type: route type
481 * @tb_id: route table ID
482 *
483 * Delete IPv4 route entry from switch device.
484 */
ebb9a03a
JP
485int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
486 u8 tos, u8 type, u32 tb_id)
5e8d9049 487{
b5d6fbde 488 struct net_device *dev;
9d47c0a2 489 const struct switchdev_ops *ops;
b5d6fbde
SF
490 int err = 0;
491
492 if (!(fi->fib_flags & RTNH_F_EXTERNAL))
493 return 0;
494
ebb9a03a 495 dev = switchdev_get_dev_by_nhs(fi);
b5d6fbde
SF
496 if (!dev)
497 return 0;
9d47c0a2 498 ops = dev->switchdev_ops;
b5d6fbde 499
9d47c0a2
JP
500 if (ops->switchdev_fib_ipv4_del) {
501 err = ops->switchdev_fib_ipv4_del(dev, htonl(dst), dst_len,
502 fi, tos, type, tb_id);
b5d6fbde
SF
503 if (!err)
504 fi->fib_flags &= ~RTNH_F_EXTERNAL;
505 }
506
507 return err;
5e8d9049 508}
ebb9a03a 509EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_del);
8e05fd71
SF
510
511/**
ebb9a03a 512 * switchdev_fib_ipv4_abort - Abort an IPv4 FIB operation
8e05fd71
SF
513 *
514 * @fi: route FIB info structure
515 */
ebb9a03a 516void switchdev_fib_ipv4_abort(struct fib_info *fi)
8e05fd71
SF
517{
518 /* There was a problem installing this route to the offload
519 * device. For now, until we come up with more refined
520 * policy handling, abruptly end IPv4 fib offloading for
521 * for entire net by flushing offload device(s) of all
522 * IPv4 routes, and mark IPv4 fib offloading broken from
523 * this point forward.
524 */
525
526 fib_flush_external(fi->fib_net);
527 fi->fib_net->ipv4.fib_offload_disabled = true;
528}
ebb9a03a 529EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort);
This page took 0.070819 seconds and 5 git commands to generate.