Commit | Line | Data |
---|---|---|
007f790c JP |
1 | /* |
2 | * net/switchdev/switchdev.c - Switch device API | |
3 | * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us> | |
f8f21471 | 4 | * Copyright (c) 2014-2015 Scott Feldman <sfeldma@gmail.com> |
007f790c JP |
5 | * |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License as published by | |
8 | * the Free Software Foundation; either version 2 of the License, or | |
9 | * (at your option) any later version. | |
10 | */ | |
11 | ||
12 | #include <linux/kernel.h> | |
13 | #include <linux/types.h> | |
14 | #include <linux/init.h> | |
03bf0c28 JP |
15 | #include <linux/mutex.h> |
16 | #include <linux/notifier.h> | |
007f790c | 17 | #include <linux/netdevice.h> |
5e8d9049 | 18 | #include <net/ip_fib.h> |
007f790c JP |
19 | #include <net/switchdev.h> |
20 | ||
3094333d SF |
21 | /** |
22 | * switchdev_port_attr_get - Get port attribute | |
23 | * | |
24 | * @dev: port device | |
25 | * @attr: attribute to get | |
26 | */ | |
27 | int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr) | |
28 | { | |
29 | const struct switchdev_ops *ops = dev->switchdev_ops; | |
30 | struct net_device *lower_dev; | |
31 | struct list_head *iter; | |
32 | struct switchdev_attr first = { | |
33 | .id = SWITCHDEV_ATTR_UNDEFINED | |
34 | }; | |
35 | int err = -EOPNOTSUPP; | |
36 | ||
37 | if (ops && ops->switchdev_port_attr_get) | |
38 | return ops->switchdev_port_attr_get(dev, attr); | |
39 | ||
40 | if (attr->flags & SWITCHDEV_F_NO_RECURSE) | |
41 | return err; | |
42 | ||
43 | /* Switch device port(s) may be stacked under | |
44 | * bond/team/vlan dev, so recurse down to get attr on | |
45 | * each port. Return -ENODATA if attr values don't | |
46 | * compare across ports. | |
47 | */ | |
48 | ||
49 | netdev_for_each_lower_dev(dev, lower_dev, iter) { | |
50 | err = switchdev_port_attr_get(lower_dev, attr); | |
51 | if (err) | |
52 | break; | |
53 | if (first.id == SWITCHDEV_ATTR_UNDEFINED) | |
54 | first = *attr; | |
55 | else if (memcmp(&first, attr, sizeof(*attr))) | |
56 | return -ENODATA; | |
57 | } | |
58 | ||
59 | return err; | |
60 | } | |
61 | EXPORT_SYMBOL_GPL(switchdev_port_attr_get); | |
62 | ||
63 | static int __switchdev_port_attr_set(struct net_device *dev, | |
64 | struct switchdev_attr *attr) | |
65 | { | |
66 | const struct switchdev_ops *ops = dev->switchdev_ops; | |
67 | struct net_device *lower_dev; | |
68 | struct list_head *iter; | |
69 | int err = -EOPNOTSUPP; | |
70 | ||
71 | if (ops && ops->switchdev_port_attr_set) | |
72 | return ops->switchdev_port_attr_set(dev, attr); | |
73 | ||
74 | if (attr->flags & SWITCHDEV_F_NO_RECURSE) | |
75 | return err; | |
76 | ||
77 | /* Switch device port(s) may be stacked under | |
78 | * bond/team/vlan dev, so recurse down to set attr on | |
79 | * each port. | |
80 | */ | |
81 | ||
82 | netdev_for_each_lower_dev(dev, lower_dev, iter) { | |
83 | err = __switchdev_port_attr_set(lower_dev, attr); | |
84 | if (err) | |
85 | break; | |
86 | } | |
87 | ||
88 | return err; | |
89 | } | |
90 | ||
91 | struct switchdev_attr_set_work { | |
92 | struct work_struct work; | |
93 | struct net_device *dev; | |
94 | struct switchdev_attr attr; | |
95 | }; | |
96 | ||
97 | static void switchdev_port_attr_set_work(struct work_struct *work) | |
98 | { | |
99 | struct switchdev_attr_set_work *asw = | |
100 | container_of(work, struct switchdev_attr_set_work, work); | |
101 | int err; | |
102 | ||
103 | rtnl_lock(); | |
104 | err = switchdev_port_attr_set(asw->dev, &asw->attr); | |
105 | BUG_ON(err); | |
106 | rtnl_unlock(); | |
107 | ||
108 | dev_put(asw->dev); | |
109 | kfree(work); | |
110 | } | |
111 | ||
112 | static int switchdev_port_attr_set_defer(struct net_device *dev, | |
113 | struct switchdev_attr *attr) | |
114 | { | |
115 | struct switchdev_attr_set_work *asw; | |
116 | ||
117 | asw = kmalloc(sizeof(*asw), GFP_ATOMIC); | |
118 | if (!asw) | |
119 | return -ENOMEM; | |
120 | ||
121 | INIT_WORK(&asw->work, switchdev_port_attr_set_work); | |
122 | ||
123 | dev_hold(dev); | |
124 | asw->dev = dev; | |
125 | memcpy(&asw->attr, attr, sizeof(asw->attr)); | |
126 | ||
127 | schedule_work(&asw->work); | |
128 | ||
129 | return 0; | |
130 | } | |
131 | ||
132 | /** | |
133 | * switchdev_port_attr_set - Set port attribute | |
134 | * | |
135 | * @dev: port device | |
136 | * @attr: attribute to set | |
137 | * | |
138 | * Use a 2-phase prepare-commit transaction model to ensure | |
139 | * system is not left in a partially updated state due to | |
140 | * failure from driver/device. | |
141 | */ | |
142 | int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr) | |
143 | { | |
144 | int err; | |
145 | ||
146 | if (!rtnl_is_locked()) { | |
147 | /* Running prepare-commit transaction across stacked | |
148 | * devices requires nothing moves, so if rtnl_lock is | |
149 | * not held, schedule a worker thread to hold rtnl_lock | |
150 | * while setting attr. | |
151 | */ | |
152 | ||
153 | return switchdev_port_attr_set_defer(dev, attr); | |
154 | } | |
155 | ||
156 | /* Phase I: prepare for attr set. Driver/device should fail | |
157 | * here if there are going to be issues in the commit phase, | |
158 | * such as lack of resources or support. The driver/device | |
159 | * should reserve resources needed for the commit phase here, | |
160 | * but should not commit the attr. | |
161 | */ | |
162 | ||
163 | attr->trans = SWITCHDEV_TRANS_PREPARE; | |
164 | err = __switchdev_port_attr_set(dev, attr); | |
165 | if (err) { | |
166 | /* Prepare phase failed: abort the transaction. Any | |
167 | * resources reserved in the prepare phase are | |
168 | * released. | |
169 | */ | |
170 | ||
171 | attr->trans = SWITCHDEV_TRANS_ABORT; | |
172 | __switchdev_port_attr_set(dev, attr); | |
173 | ||
174 | return err; | |
175 | } | |
176 | ||
177 | /* Phase II: commit attr set. This cannot fail as a fault | |
178 | * of driver/device. If it does, it's a bug in the driver/device | |
179 | * because the driver said everythings was OK in phase I. | |
180 | */ | |
181 | ||
182 | attr->trans = SWITCHDEV_TRANS_COMMIT; | |
183 | err = __switchdev_port_attr_set(dev, attr); | |
184 | BUG_ON(err); | |
185 | ||
186 | return err; | |
187 | } | |
188 | EXPORT_SYMBOL_GPL(switchdev_port_attr_set); | |
189 | ||
ebb9a03a JP |
190 | static DEFINE_MUTEX(switchdev_mutex); |
191 | static RAW_NOTIFIER_HEAD(switchdev_notif_chain); | |
03bf0c28 JP |
192 | |
193 | /** | |
ebb9a03a | 194 | * register_switchdev_notifier - Register notifier |
03bf0c28 JP |
195 | * @nb: notifier_block |
196 | * | |
197 | * Register switch device notifier. This should be used by code | |
198 | * which needs to monitor events happening in particular device. | |
199 | * Return values are same as for atomic_notifier_chain_register(). | |
200 | */ | |
ebb9a03a | 201 | int register_switchdev_notifier(struct notifier_block *nb) |
03bf0c28 JP |
202 | { |
203 | int err; | |
204 | ||
ebb9a03a JP |
205 | mutex_lock(&switchdev_mutex); |
206 | err = raw_notifier_chain_register(&switchdev_notif_chain, nb); | |
207 | mutex_unlock(&switchdev_mutex); | |
03bf0c28 JP |
208 | return err; |
209 | } | |
ebb9a03a | 210 | EXPORT_SYMBOL_GPL(register_switchdev_notifier); |
03bf0c28 JP |
211 | |
212 | /** | |
ebb9a03a | 213 | * unregister_switchdev_notifier - Unregister notifier |
03bf0c28 JP |
214 | * @nb: notifier_block |
215 | * | |
216 | * Unregister switch device notifier. | |
217 | * Return values are same as for atomic_notifier_chain_unregister(). | |
218 | */ | |
ebb9a03a | 219 | int unregister_switchdev_notifier(struct notifier_block *nb) |
03bf0c28 JP |
220 | { |
221 | int err; | |
222 | ||
ebb9a03a JP |
223 | mutex_lock(&switchdev_mutex); |
224 | err = raw_notifier_chain_unregister(&switchdev_notif_chain, nb); | |
225 | mutex_unlock(&switchdev_mutex); | |
03bf0c28 JP |
226 | return err; |
227 | } | |
ebb9a03a | 228 | EXPORT_SYMBOL_GPL(unregister_switchdev_notifier); |
03bf0c28 JP |
229 | |
230 | /** | |
ebb9a03a | 231 | * call_switchdev_notifiers - Call notifiers |
03bf0c28 JP |
232 | * @val: value passed unmodified to notifier function |
233 | * @dev: port device | |
234 | * @info: notifier information data | |
235 | * | |
236 | * Call all network notifier blocks. This should be called by driver | |
237 | * when it needs to propagate hardware event. | |
238 | * Return values are same as for atomic_notifier_call_chain(). | |
239 | */ | |
ebb9a03a JP |
240 | int call_switchdev_notifiers(unsigned long val, struct net_device *dev, |
241 | struct switchdev_notifier_info *info) | |
03bf0c28 JP |
242 | { |
243 | int err; | |
244 | ||
245 | info->dev = dev; | |
ebb9a03a JP |
246 | mutex_lock(&switchdev_mutex); |
247 | err = raw_notifier_call_chain(&switchdev_notif_chain, val, info); | |
248 | mutex_unlock(&switchdev_mutex); | |
03bf0c28 JP |
249 | return err; |
250 | } | |
ebb9a03a | 251 | EXPORT_SYMBOL_GPL(call_switchdev_notifiers); |
8a44dbb2 RP |
252 | |
253 | /** | |
ebb9a03a | 254 | * switchdev_port_bridge_setlink - Notify switch device port of bridge |
8a44dbb2 RP |
255 | * port attributes |
256 | * | |
257 | * @dev: port device | |
258 | * @nlh: netlink msg with bridge port attributes | |
259 | * @flags: bridge setlink flags | |
260 | * | |
261 | * Notify switch device port of bridge port attributes | |
262 | */ | |
ebb9a03a JP |
263 | int switchdev_port_bridge_setlink(struct net_device *dev, |
264 | struct nlmsghdr *nlh, u16 flags) | |
8a44dbb2 RP |
265 | { |
266 | const struct net_device_ops *ops = dev->netdev_ops; | |
267 | ||
268 | if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD)) | |
269 | return 0; | |
270 | ||
271 | if (!ops->ndo_bridge_setlink) | |
272 | return -EOPNOTSUPP; | |
273 | ||
274 | return ops->ndo_bridge_setlink(dev, nlh, flags); | |
275 | } | |
ebb9a03a | 276 | EXPORT_SYMBOL_GPL(switchdev_port_bridge_setlink); |
8a44dbb2 RP |
277 | |
278 | /** | |
ebb9a03a | 279 | * switchdev_port_bridge_dellink - Notify switch device port of bridge |
8a44dbb2 RP |
280 | * port attribute delete |
281 | * | |
282 | * @dev: port device | |
283 | * @nlh: netlink msg with bridge port attributes | |
284 | * @flags: bridge setlink flags | |
285 | * | |
286 | * Notify switch device port of bridge port attribute delete | |
287 | */ | |
ebb9a03a JP |
288 | int switchdev_port_bridge_dellink(struct net_device *dev, |
289 | struct nlmsghdr *nlh, u16 flags) | |
8a44dbb2 RP |
290 | { |
291 | const struct net_device_ops *ops = dev->netdev_ops; | |
292 | ||
293 | if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD)) | |
294 | return 0; | |
295 | ||
296 | if (!ops->ndo_bridge_dellink) | |
297 | return -EOPNOTSUPP; | |
298 | ||
299 | return ops->ndo_bridge_dellink(dev, nlh, flags); | |
300 | } | |
ebb9a03a | 301 | EXPORT_SYMBOL_GPL(switchdev_port_bridge_dellink); |
8a44dbb2 RP |
302 | |
303 | /** | |
ebb9a03a JP |
304 | * ndo_dflt_switchdev_port_bridge_setlink - default ndo bridge setlink |
305 | * op for master devices | |
8a44dbb2 RP |
306 | * |
307 | * @dev: port device | |
308 | * @nlh: netlink msg with bridge port attributes | |
309 | * @flags: bridge setlink flags | |
310 | * | |
311 | * Notify master device slaves of bridge port attributes | |
312 | */ | |
ebb9a03a JP |
313 | int ndo_dflt_switchdev_port_bridge_setlink(struct net_device *dev, |
314 | struct nlmsghdr *nlh, u16 flags) | |
8a44dbb2 RP |
315 | { |
316 | struct net_device *lower_dev; | |
317 | struct list_head *iter; | |
318 | int ret = 0, err = 0; | |
319 | ||
320 | if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD)) | |
321 | return ret; | |
322 | ||
323 | netdev_for_each_lower_dev(dev, lower_dev, iter) { | |
ebb9a03a | 324 | err = switchdev_port_bridge_setlink(lower_dev, nlh, flags); |
8a44dbb2 RP |
325 | if (err && err != -EOPNOTSUPP) |
326 | ret = err; | |
327 | } | |
328 | ||
329 | return ret; | |
330 | } | |
ebb9a03a | 331 | EXPORT_SYMBOL_GPL(ndo_dflt_switchdev_port_bridge_setlink); |
8a44dbb2 RP |
332 | |
333 | /** | |
ebb9a03a JP |
334 | * ndo_dflt_switchdev_port_bridge_dellink - default ndo bridge dellink |
335 | * op for master devices | |
8a44dbb2 RP |
336 | * |
337 | * @dev: port device | |
338 | * @nlh: netlink msg with bridge port attributes | |
339 | * @flags: bridge dellink flags | |
340 | * | |
341 | * Notify master device slaves of bridge port attribute deletes | |
342 | */ | |
ebb9a03a JP |
343 | int ndo_dflt_switchdev_port_bridge_dellink(struct net_device *dev, |
344 | struct nlmsghdr *nlh, u16 flags) | |
8a44dbb2 RP |
345 | { |
346 | struct net_device *lower_dev; | |
347 | struct list_head *iter; | |
348 | int ret = 0, err = 0; | |
349 | ||
350 | if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD)) | |
351 | return ret; | |
352 | ||
353 | netdev_for_each_lower_dev(dev, lower_dev, iter) { | |
ebb9a03a | 354 | err = switchdev_port_bridge_dellink(lower_dev, nlh, flags); |
8a44dbb2 RP |
355 | if (err && err != -EOPNOTSUPP) |
356 | ret = err; | |
357 | } | |
358 | ||
359 | return ret; | |
360 | } | |
ebb9a03a | 361 | EXPORT_SYMBOL_GPL(ndo_dflt_switchdev_port_bridge_dellink); |
5e8d9049 | 362 | |
ebb9a03a | 363 | static struct net_device *switchdev_get_lowest_dev(struct net_device *dev) |
b5d6fbde | 364 | { |
9d47c0a2 | 365 | const struct switchdev_ops *ops = dev->switchdev_ops; |
b5d6fbde SF |
366 | struct net_device *lower_dev; |
367 | struct net_device *port_dev; | |
368 | struct list_head *iter; | |
369 | ||
370 | /* Recusively search down until we find a sw port dev. | |
f8e20a9f | 371 | * (A sw port dev supports switchdev_port_attr_get). |
b5d6fbde SF |
372 | */ |
373 | ||
f8e20a9f | 374 | if (ops && ops->switchdev_port_attr_get) |
b5d6fbde SF |
375 | return dev; |
376 | ||
377 | netdev_for_each_lower_dev(dev, lower_dev, iter) { | |
ebb9a03a | 378 | port_dev = switchdev_get_lowest_dev(lower_dev); |
b5d6fbde SF |
379 | if (port_dev) |
380 | return port_dev; | |
381 | } | |
382 | ||
383 | return NULL; | |
384 | } | |
385 | ||
ebb9a03a | 386 | static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi) |
b5d6fbde | 387 | { |
f8e20a9f SF |
388 | struct switchdev_attr attr = { |
389 | .id = SWITCHDEV_ATTR_PORT_PARENT_ID, | |
390 | }; | |
391 | struct switchdev_attr prev_attr; | |
b5d6fbde SF |
392 | struct net_device *dev = NULL; |
393 | int nhsel; | |
394 | ||
395 | /* For this route, all nexthop devs must be on the same switch. */ | |
396 | ||
397 | for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { | |
398 | const struct fib_nh *nh = &fi->fib_nh[nhsel]; | |
399 | ||
400 | if (!nh->nh_dev) | |
401 | return NULL; | |
402 | ||
ebb9a03a | 403 | dev = switchdev_get_lowest_dev(nh->nh_dev); |
b5d6fbde SF |
404 | if (!dev) |
405 | return NULL; | |
406 | ||
f8e20a9f | 407 | if (switchdev_port_attr_get(dev, &attr)) |
b5d6fbde SF |
408 | return NULL; |
409 | ||
410 | if (nhsel > 0) { | |
f8e20a9f | 411 | if (prev_attr.ppid.id_len != attr.ppid.id_len) |
b5d6fbde | 412 | return NULL; |
f8e20a9f SF |
413 | if (memcmp(prev_attr.ppid.id, attr.ppid.id, |
414 | attr.ppid.id_len)) | |
b5d6fbde SF |
415 | return NULL; |
416 | } | |
417 | ||
f8e20a9f | 418 | prev_attr = attr; |
b5d6fbde SF |
419 | } |
420 | ||
421 | return dev; | |
422 | } | |
423 | ||
5e8d9049 | 424 | /** |
ebb9a03a | 425 | * switchdev_fib_ipv4_add - Add IPv4 route entry to switch |
5e8d9049 SF |
426 | * |
427 | * @dst: route's IPv4 destination address | |
428 | * @dst_len: destination address length (prefix length) | |
429 | * @fi: route FIB info structure | |
430 | * @tos: route TOS | |
431 | * @type: route type | |
f8f21471 | 432 | * @nlflags: netlink flags passed in (NLM_F_*) |
5e8d9049 SF |
433 | * @tb_id: route table ID |
434 | * | |
435 | * Add IPv4 route entry to switch device. | |
436 | */ | |
ebb9a03a JP |
437 | int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, |
438 | u8 tos, u8 type, u32 nlflags, u32 tb_id) | |
5e8d9049 | 439 | { |
b5d6fbde | 440 | struct net_device *dev; |
9d47c0a2 | 441 | const struct switchdev_ops *ops; |
b5d6fbde SF |
442 | int err = 0; |
443 | ||
8e05fd71 SF |
444 | /* Don't offload route if using custom ip rules or if |
445 | * IPv4 FIB offloading has been disabled completely. | |
446 | */ | |
447 | ||
e1315db1 SF |
448 | #ifdef CONFIG_IP_MULTIPLE_TABLES |
449 | if (fi->fib_net->ipv4.fib_has_custom_rules) | |
450 | return 0; | |
451 | #endif | |
452 | ||
453 | if (fi->fib_net->ipv4.fib_offload_disabled) | |
104616e7 SF |
454 | return 0; |
455 | ||
ebb9a03a | 456 | dev = switchdev_get_dev_by_nhs(fi); |
b5d6fbde SF |
457 | if (!dev) |
458 | return 0; | |
9d47c0a2 | 459 | ops = dev->switchdev_ops; |
b5d6fbde | 460 | |
9d47c0a2 JP |
461 | if (ops->switchdev_fib_ipv4_add) { |
462 | err = ops->switchdev_fib_ipv4_add(dev, htonl(dst), dst_len, | |
463 | fi, tos, type, nlflags, | |
464 | tb_id); | |
b5d6fbde SF |
465 | if (!err) |
466 | fi->fib_flags |= RTNH_F_EXTERNAL; | |
467 | } | |
468 | ||
469 | return err; | |
5e8d9049 | 470 | } |
ebb9a03a | 471 | EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_add); |
5e8d9049 SF |
472 | |
473 | /** | |
ebb9a03a | 474 | * switchdev_fib_ipv4_del - Delete IPv4 route entry from switch |
5e8d9049 SF |
475 | * |
476 | * @dst: route's IPv4 destination address | |
477 | * @dst_len: destination address length (prefix length) | |
478 | * @fi: route FIB info structure | |
479 | * @tos: route TOS | |
480 | * @type: route type | |
481 | * @tb_id: route table ID | |
482 | * | |
483 | * Delete IPv4 route entry from switch device. | |
484 | */ | |
ebb9a03a JP |
485 | int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, |
486 | u8 tos, u8 type, u32 tb_id) | |
5e8d9049 | 487 | { |
b5d6fbde | 488 | struct net_device *dev; |
9d47c0a2 | 489 | const struct switchdev_ops *ops; |
b5d6fbde SF |
490 | int err = 0; |
491 | ||
492 | if (!(fi->fib_flags & RTNH_F_EXTERNAL)) | |
493 | return 0; | |
494 | ||
ebb9a03a | 495 | dev = switchdev_get_dev_by_nhs(fi); |
b5d6fbde SF |
496 | if (!dev) |
497 | return 0; | |
9d47c0a2 | 498 | ops = dev->switchdev_ops; |
b5d6fbde | 499 | |
9d47c0a2 JP |
500 | if (ops->switchdev_fib_ipv4_del) { |
501 | err = ops->switchdev_fib_ipv4_del(dev, htonl(dst), dst_len, | |
502 | fi, tos, type, tb_id); | |
b5d6fbde SF |
503 | if (!err) |
504 | fi->fib_flags &= ~RTNH_F_EXTERNAL; | |
505 | } | |
506 | ||
507 | return err; | |
5e8d9049 | 508 | } |
ebb9a03a | 509 | EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_del); |
8e05fd71 SF |
510 | |
511 | /** | |
ebb9a03a | 512 | * switchdev_fib_ipv4_abort - Abort an IPv4 FIB operation |
8e05fd71 SF |
513 | * |
514 | * @fi: route FIB info structure | |
515 | */ | |
ebb9a03a | 516 | void switchdev_fib_ipv4_abort(struct fib_info *fi) |
8e05fd71 SF |
517 | { |
518 | /* There was a problem installing this route to the offload | |
519 | * device. For now, until we come up with more refined | |
520 | * policy handling, abruptly end IPv4 fib offloading for | |
521 | * for entire net by flushing offload device(s) of all | |
522 | * IPv4 routes, and mark IPv4 fib offloading broken from | |
523 | * this point forward. | |
524 | */ | |
525 | ||
526 | fib_flush_external(fi->fib_net); | |
527 | fi->fib_net->ipv4.fib_offload_disabled = true; | |
528 | } | |
ebb9a03a | 529 | EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort); |