Merge tag 'nfs-rdma-for-4.3-2' of git://git.linux-nfs.org/projects/anna/nfs-rdma
[deliverable/linux.git] / net / mpls / af_mpls.c
1 #include <linux/types.h>
2 #include <linux/skbuff.h>
3 #include <linux/socket.h>
4 #include <linux/sysctl.h>
5 #include <linux/net.h>
6 #include <linux/module.h>
7 #include <linux/if_arp.h>
8 #include <linux/ipv6.h>
9 #include <linux/mpls.h>
10 #include <linux/vmalloc.h>
11 #include <net/ip.h>
12 #include <net/dst.h>
13 #include <net/sock.h>
14 #include <net/arp.h>
15 #include <net/ip_fib.h>
16 #include <net/netevent.h>
17 #include <net/netns/generic.h>
18 #if IS_ENABLED(CONFIG_IPV6)
19 #include <net/ipv6.h>
20 #include <net/addrconf.h>
21 #endif
22 #include "internal.h"
23
24 #define LABEL_NOT_SPECIFIED (1<<20)
25 #define MAX_NEW_LABELS 2
26
27 /* This maximum ha length copied from the definition of struct neighbour */
28 #define MAX_VIA_ALEN (ALIGN(MAX_ADDR_LEN, sizeof(unsigned long)))
29
30 enum mpls_payload_type {
31 MPT_UNSPEC, /* IPv4 or IPv6 */
32 MPT_IPV4 = 4,
33 MPT_IPV6 = 6,
34
35 /* Other types not implemented:
36 * - Pseudo-wire with or without control word (RFC4385)
37 * - GAL (RFC5586)
38 */
39 };
40
41 struct mpls_route { /* next hop label forwarding entry */
42 struct net_device __rcu *rt_dev;
43 struct rcu_head rt_rcu;
44 u32 rt_label[MAX_NEW_LABELS];
45 u8 rt_protocol; /* routing protocol that set this entry */
46 u8 rt_payload_type;
47 u8 rt_labels;
48 u8 rt_via_alen;
49 u8 rt_via_table;
50 u8 rt_via[0];
51 };
52
53 static int zero = 0;
54 static int label_limit = (1 << 20) - 1;
55
56 static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt,
57 struct nlmsghdr *nlh, struct net *net, u32 portid,
58 unsigned int nlm_flags);
59
60 static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned index)
61 {
62 struct mpls_route *rt = NULL;
63
64 if (index < net->mpls.platform_labels) {
65 struct mpls_route __rcu **platform_label =
66 rcu_dereference(net->mpls.platform_label);
67 rt = rcu_dereference(platform_label[index]);
68 }
69 return rt;
70 }
71
72 static inline struct mpls_dev *mpls_dev_get(const struct net_device *dev)
73 {
74 return rcu_dereference_rtnl(dev->mpls_ptr);
75 }
76
77 bool mpls_output_possible(const struct net_device *dev)
78 {
79 return dev && (dev->flags & IFF_UP) && netif_carrier_ok(dev);
80 }
81 EXPORT_SYMBOL_GPL(mpls_output_possible);
82
83 static unsigned int mpls_rt_header_size(const struct mpls_route *rt)
84 {
85 /* The size of the layer 2.5 labels to be added for this route */
86 return rt->rt_labels * sizeof(struct mpls_shim_hdr);
87 }
88
89 unsigned int mpls_dev_mtu(const struct net_device *dev)
90 {
91 /* The amount of data the layer 2 frame can hold */
92 return dev->mtu;
93 }
94 EXPORT_SYMBOL_GPL(mpls_dev_mtu);
95
96 bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
97 {
98 if (skb->len <= mtu)
99 return false;
100
101 if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
102 return false;
103
104 return true;
105 }
106 EXPORT_SYMBOL_GPL(mpls_pkt_too_big);
107
108 static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb,
109 struct mpls_entry_decoded dec)
110 {
111 enum mpls_payload_type payload_type;
112 bool success = false;
113
114 /* The IPv4 code below accesses through the IPv4 header
115 * checksum, which is 12 bytes into the packet.
116 * The IPv6 code below accesses through the IPv6 hop limit
117 * which is 8 bytes into the packet.
118 *
119 * For all supported cases there should always be at least 12
120 * bytes of packet data present. The IPv4 header is 20 bytes
121 * without options and the IPv6 header is always 40 bytes
122 * long.
123 */
124 if (!pskb_may_pull(skb, 12))
125 return false;
126
127 payload_type = rt->rt_payload_type;
128 if (payload_type == MPT_UNSPEC)
129 payload_type = ip_hdr(skb)->version;
130
131 switch (payload_type) {
132 case MPT_IPV4: {
133 struct iphdr *hdr4 = ip_hdr(skb);
134 skb->protocol = htons(ETH_P_IP);
135 csum_replace2(&hdr4->check,
136 htons(hdr4->ttl << 8),
137 htons(dec.ttl << 8));
138 hdr4->ttl = dec.ttl;
139 success = true;
140 break;
141 }
142 case MPT_IPV6: {
143 struct ipv6hdr *hdr6 = ipv6_hdr(skb);
144 skb->protocol = htons(ETH_P_IPV6);
145 hdr6->hop_limit = dec.ttl;
146 success = true;
147 break;
148 }
149 case MPT_UNSPEC:
150 break;
151 }
152
153 return success;
154 }
155
156 static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
157 struct packet_type *pt, struct net_device *orig_dev)
158 {
159 struct net *net = dev_net(dev);
160 struct mpls_shim_hdr *hdr;
161 struct mpls_route *rt;
162 struct mpls_entry_decoded dec;
163 struct net_device *out_dev;
164 struct mpls_dev *mdev;
165 unsigned int hh_len;
166 unsigned int new_header_size;
167 unsigned int mtu;
168 int err;
169
170 /* Careful this entire function runs inside of an rcu critical section */
171
172 mdev = mpls_dev_get(dev);
173 if (!mdev || !mdev->input_enabled)
174 goto drop;
175
176 if (skb->pkt_type != PACKET_HOST)
177 goto drop;
178
179 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
180 goto drop;
181
182 if (!pskb_may_pull(skb, sizeof(*hdr)))
183 goto drop;
184
185 /* Read and decode the label */
186 hdr = mpls_hdr(skb);
187 dec = mpls_entry_decode(hdr);
188
189 /* Pop the label */
190 skb_pull(skb, sizeof(*hdr));
191 skb_reset_network_header(skb);
192
193 skb_orphan(skb);
194
195 rt = mpls_route_input_rcu(net, dec.label);
196 if (!rt)
197 goto drop;
198
199 /* Find the output device */
200 out_dev = rcu_dereference(rt->rt_dev);
201 if (!mpls_output_possible(out_dev))
202 goto drop;
203
204 if (skb_warn_if_lro(skb))
205 goto drop;
206
207 skb_forward_csum(skb);
208
209 /* Verify ttl is valid */
210 if (dec.ttl <= 1)
211 goto drop;
212 dec.ttl -= 1;
213
214 /* Verify the destination can hold the packet */
215 new_header_size = mpls_rt_header_size(rt);
216 mtu = mpls_dev_mtu(out_dev);
217 if (mpls_pkt_too_big(skb, mtu - new_header_size))
218 goto drop;
219
220 hh_len = LL_RESERVED_SPACE(out_dev);
221 if (!out_dev->header_ops)
222 hh_len = 0;
223
224 /* Ensure there is enough space for the headers in the skb */
225 if (skb_cow(skb, hh_len + new_header_size))
226 goto drop;
227
228 skb->dev = out_dev;
229 skb->protocol = htons(ETH_P_MPLS_UC);
230
231 if (unlikely(!new_header_size && dec.bos)) {
232 /* Penultimate hop popping */
233 if (!mpls_egress(rt, skb, dec))
234 goto drop;
235 } else {
236 bool bos;
237 int i;
238 skb_push(skb, new_header_size);
239 skb_reset_network_header(skb);
240 /* Push the new labels */
241 hdr = mpls_hdr(skb);
242 bos = dec.bos;
243 for (i = rt->rt_labels - 1; i >= 0; i--) {
244 hdr[i] = mpls_entry_encode(rt->rt_label[i], dec.ttl, 0, bos);
245 bos = false;
246 }
247 }
248
249 err = neigh_xmit(rt->rt_via_table, out_dev, rt->rt_via, skb);
250 if (err)
251 net_dbg_ratelimited("%s: packet transmission failed: %d\n",
252 __func__, err);
253 return 0;
254
255 drop:
256 kfree_skb(skb);
257 return NET_RX_DROP;
258 }
259
260 static struct packet_type mpls_packet_type __read_mostly = {
261 .type = cpu_to_be16(ETH_P_MPLS_UC),
262 .func = mpls_forward,
263 };
264
265 static const struct nla_policy rtm_mpls_policy[RTA_MAX+1] = {
266 [RTA_DST] = { .type = NLA_U32 },
267 [RTA_OIF] = { .type = NLA_U32 },
268 };
269
270 struct mpls_route_config {
271 u32 rc_protocol;
272 u32 rc_ifindex;
273 u16 rc_via_table;
274 u16 rc_via_alen;
275 u8 rc_via[MAX_VIA_ALEN];
276 u32 rc_label;
277 u32 rc_output_labels;
278 u32 rc_output_label[MAX_NEW_LABELS];
279 u32 rc_nlflags;
280 enum mpls_payload_type rc_payload_type;
281 struct nl_info rc_nlinfo;
282 };
283
284 static struct mpls_route *mpls_rt_alloc(size_t alen)
285 {
286 struct mpls_route *rt;
287
288 rt = kzalloc(sizeof(*rt) + alen, GFP_KERNEL);
289 if (rt)
290 rt->rt_via_alen = alen;
291 return rt;
292 }
293
294 static void mpls_rt_free(struct mpls_route *rt)
295 {
296 if (rt)
297 kfree_rcu(rt, rt_rcu);
298 }
299
300 static void mpls_notify_route(struct net *net, unsigned index,
301 struct mpls_route *old, struct mpls_route *new,
302 const struct nl_info *info)
303 {
304 struct nlmsghdr *nlh = info ? info->nlh : NULL;
305 unsigned portid = info ? info->portid : 0;
306 int event = new ? RTM_NEWROUTE : RTM_DELROUTE;
307 struct mpls_route *rt = new ? new : old;
308 unsigned nlm_flags = (old && new) ? NLM_F_REPLACE : 0;
309 /* Ignore reserved labels for now */
310 if (rt && (index >= MPLS_LABEL_FIRST_UNRESERVED))
311 rtmsg_lfib(event, index, rt, nlh, net, portid, nlm_flags);
312 }
313
314 static void mpls_route_update(struct net *net, unsigned index,
315 struct net_device *dev, struct mpls_route *new,
316 const struct nl_info *info)
317 {
318 struct mpls_route __rcu **platform_label;
319 struct mpls_route *rt, *old = NULL;
320
321 ASSERT_RTNL();
322
323 platform_label = rtnl_dereference(net->mpls.platform_label);
324 rt = rtnl_dereference(platform_label[index]);
325 if (!dev || (rt && (rtnl_dereference(rt->rt_dev) == dev))) {
326 rcu_assign_pointer(platform_label[index], new);
327 old = rt;
328 }
329
330 mpls_notify_route(net, index, old, new, info);
331
332 /* If we removed a route free it now */
333 mpls_rt_free(old);
334 }
335
336 static unsigned find_free_label(struct net *net)
337 {
338 struct mpls_route __rcu **platform_label;
339 size_t platform_labels;
340 unsigned index;
341
342 platform_label = rtnl_dereference(net->mpls.platform_label);
343 platform_labels = net->mpls.platform_labels;
344 for (index = MPLS_LABEL_FIRST_UNRESERVED; index < platform_labels;
345 index++) {
346 if (!rtnl_dereference(platform_label[index]))
347 return index;
348 }
349 return LABEL_NOT_SPECIFIED;
350 }
351
352 #if IS_ENABLED(CONFIG_INET)
353 static struct net_device *inet_fib_lookup_dev(struct net *net, void *addr)
354 {
355 struct net_device *dev;
356 struct rtable *rt;
357 struct in_addr daddr;
358
359 memcpy(&daddr, addr, sizeof(struct in_addr));
360 rt = ip_route_output(net, daddr.s_addr, 0, 0, 0);
361 if (IS_ERR(rt))
362 return ERR_CAST(rt);
363
364 dev = rt->dst.dev;
365 dev_hold(dev);
366
367 ip_rt_put(rt);
368
369 return dev;
370 }
371 #else
372 static struct net_device *inet_fib_lookup_dev(struct net *net, void *addr)
373 {
374 return ERR_PTR(-EAFNOSUPPORT);
375 }
376 #endif
377
378 #if IS_ENABLED(CONFIG_IPV6)
379 static struct net_device *inet6_fib_lookup_dev(struct net *net, void *addr)
380 {
381 struct net_device *dev;
382 struct dst_entry *dst;
383 struct flowi6 fl6;
384 int err;
385
386 if (!ipv6_stub)
387 return ERR_PTR(-EAFNOSUPPORT);
388
389 memset(&fl6, 0, sizeof(fl6));
390 memcpy(&fl6.daddr, addr, sizeof(struct in6_addr));
391 err = ipv6_stub->ipv6_dst_lookup(net, NULL, &dst, &fl6);
392 if (err)
393 return ERR_PTR(err);
394
395 dev = dst->dev;
396 dev_hold(dev);
397 dst_release(dst);
398
399 return dev;
400 }
401 #else
402 static struct net_device *inet6_fib_lookup_dev(struct net *net, void *addr)
403 {
404 return ERR_PTR(-EAFNOSUPPORT);
405 }
406 #endif
407
408 static struct net_device *find_outdev(struct net *net,
409 struct mpls_route_config *cfg)
410 {
411 struct net_device *dev = NULL;
412
413 if (!cfg->rc_ifindex) {
414 switch (cfg->rc_via_table) {
415 case NEIGH_ARP_TABLE:
416 dev = inet_fib_lookup_dev(net, cfg->rc_via);
417 break;
418 case NEIGH_ND_TABLE:
419 dev = inet6_fib_lookup_dev(net, cfg->rc_via);
420 break;
421 case NEIGH_LINK_TABLE:
422 break;
423 }
424 } else {
425 dev = dev_get_by_index(net, cfg->rc_ifindex);
426 }
427
428 if (!dev)
429 return ERR_PTR(-ENODEV);
430
431 return dev;
432 }
433
434 static int mpls_route_add(struct mpls_route_config *cfg)
435 {
436 struct mpls_route __rcu **platform_label;
437 struct net *net = cfg->rc_nlinfo.nl_net;
438 struct net_device *dev = NULL;
439 struct mpls_route *rt, *old;
440 unsigned index;
441 int i;
442 int err = -EINVAL;
443
444 index = cfg->rc_label;
445
446 /* If a label was not specified during insert pick one */
447 if ((index == LABEL_NOT_SPECIFIED) &&
448 (cfg->rc_nlflags & NLM_F_CREATE)) {
449 index = find_free_label(net);
450 }
451
452 /* Reserved labels may not be set */
453 if (index < MPLS_LABEL_FIRST_UNRESERVED)
454 goto errout;
455
456 /* The full 20 bit range may not be supported. */
457 if (index >= net->mpls.platform_labels)
458 goto errout;
459
460 /* Ensure only a supported number of labels are present */
461 if (cfg->rc_output_labels > MAX_NEW_LABELS)
462 goto errout;
463
464 dev = find_outdev(net, cfg);
465 if (IS_ERR(dev)) {
466 err = PTR_ERR(dev);
467 dev = NULL;
468 goto errout;
469 }
470
471 /* Ensure this is a supported device */
472 err = -EINVAL;
473 if (!mpls_dev_get(dev))
474 goto errout;
475
476 err = -EINVAL;
477 if ((cfg->rc_via_table == NEIGH_LINK_TABLE) &&
478 (dev->addr_len != cfg->rc_via_alen))
479 goto errout;
480
481 /* Append makes no sense with mpls */
482 err = -EOPNOTSUPP;
483 if (cfg->rc_nlflags & NLM_F_APPEND)
484 goto errout;
485
486 err = -EEXIST;
487 platform_label = rtnl_dereference(net->mpls.platform_label);
488 old = rtnl_dereference(platform_label[index]);
489 if ((cfg->rc_nlflags & NLM_F_EXCL) && old)
490 goto errout;
491
492 err = -EEXIST;
493 if (!(cfg->rc_nlflags & NLM_F_REPLACE) && old)
494 goto errout;
495
496 err = -ENOENT;
497 if (!(cfg->rc_nlflags & NLM_F_CREATE) && !old)
498 goto errout;
499
500 err = -ENOMEM;
501 rt = mpls_rt_alloc(cfg->rc_via_alen);
502 if (!rt)
503 goto errout;
504
505 rt->rt_labels = cfg->rc_output_labels;
506 for (i = 0; i < rt->rt_labels; i++)
507 rt->rt_label[i] = cfg->rc_output_label[i];
508 rt->rt_protocol = cfg->rc_protocol;
509 RCU_INIT_POINTER(rt->rt_dev, dev);
510 rt->rt_payload_type = cfg->rc_payload_type;
511 rt->rt_via_table = cfg->rc_via_table;
512 memcpy(rt->rt_via, cfg->rc_via, cfg->rc_via_alen);
513
514 mpls_route_update(net, index, NULL, rt, &cfg->rc_nlinfo);
515
516 dev_put(dev);
517 return 0;
518
519 errout:
520 if (dev)
521 dev_put(dev);
522 return err;
523 }
524
525 static int mpls_route_del(struct mpls_route_config *cfg)
526 {
527 struct net *net = cfg->rc_nlinfo.nl_net;
528 unsigned index;
529 int err = -EINVAL;
530
531 index = cfg->rc_label;
532
533 /* Reserved labels may not be removed */
534 if (index < MPLS_LABEL_FIRST_UNRESERVED)
535 goto errout;
536
537 /* The full 20 bit range may not be supported */
538 if (index >= net->mpls.platform_labels)
539 goto errout;
540
541 mpls_route_update(net, index, NULL, NULL, &cfg->rc_nlinfo);
542
543 err = 0;
544 errout:
545 return err;
546 }
547
548 #define MPLS_PERDEV_SYSCTL_OFFSET(field) \
549 (&((struct mpls_dev *)0)->field)
550
551 static const struct ctl_table mpls_dev_table[] = {
552 {
553 .procname = "input",
554 .maxlen = sizeof(int),
555 .mode = 0644,
556 .proc_handler = proc_dointvec,
557 .data = MPLS_PERDEV_SYSCTL_OFFSET(input_enabled),
558 },
559 { }
560 };
561
562 static int mpls_dev_sysctl_register(struct net_device *dev,
563 struct mpls_dev *mdev)
564 {
565 char path[sizeof("net/mpls/conf/") + IFNAMSIZ];
566 struct ctl_table *table;
567 int i;
568
569 table = kmemdup(&mpls_dev_table, sizeof(mpls_dev_table), GFP_KERNEL);
570 if (!table)
571 goto out;
572
573 /* Table data contains only offsets relative to the base of
574 * the mdev at this point, so make them absolute.
575 */
576 for (i = 0; i < ARRAY_SIZE(mpls_dev_table); i++)
577 table[i].data = (char *)mdev + (uintptr_t)table[i].data;
578
579 snprintf(path, sizeof(path), "net/mpls/conf/%s", dev->name);
580
581 mdev->sysctl = register_net_sysctl(dev_net(dev), path, table);
582 if (!mdev->sysctl)
583 goto free;
584
585 return 0;
586
587 free:
588 kfree(table);
589 out:
590 return -ENOBUFS;
591 }
592
593 static void mpls_dev_sysctl_unregister(struct mpls_dev *mdev)
594 {
595 struct ctl_table *table;
596
597 table = mdev->sysctl->ctl_table_arg;
598 unregister_net_sysctl_table(mdev->sysctl);
599 kfree(table);
600 }
601
602 static struct mpls_dev *mpls_add_dev(struct net_device *dev)
603 {
604 struct mpls_dev *mdev;
605 int err = -ENOMEM;
606
607 ASSERT_RTNL();
608
609 mdev = kzalloc(sizeof(*mdev), GFP_KERNEL);
610 if (!mdev)
611 return ERR_PTR(err);
612
613 err = mpls_dev_sysctl_register(dev, mdev);
614 if (err)
615 goto free;
616
617 rcu_assign_pointer(dev->mpls_ptr, mdev);
618
619 return mdev;
620
621 free:
622 kfree(mdev);
623 return ERR_PTR(err);
624 }
625
626 static void mpls_ifdown(struct net_device *dev)
627 {
628 struct mpls_route __rcu **platform_label;
629 struct net *net = dev_net(dev);
630 struct mpls_dev *mdev;
631 unsigned index;
632
633 platform_label = rtnl_dereference(net->mpls.platform_label);
634 for (index = 0; index < net->mpls.platform_labels; index++) {
635 struct mpls_route *rt = rtnl_dereference(platform_label[index]);
636 if (!rt)
637 continue;
638 if (rtnl_dereference(rt->rt_dev) != dev)
639 continue;
640 rt->rt_dev = NULL;
641 }
642
643 mdev = mpls_dev_get(dev);
644 if (!mdev)
645 return;
646
647 mpls_dev_sysctl_unregister(mdev);
648
649 RCU_INIT_POINTER(dev->mpls_ptr, NULL);
650
651 kfree_rcu(mdev, rcu);
652 }
653
654 static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
655 void *ptr)
656 {
657 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
658 struct mpls_dev *mdev;
659
660 switch(event) {
661 case NETDEV_REGISTER:
662 /* For now just support ethernet devices */
663 if ((dev->type == ARPHRD_ETHER) ||
664 (dev->type == ARPHRD_LOOPBACK)) {
665 mdev = mpls_add_dev(dev);
666 if (IS_ERR(mdev))
667 return notifier_from_errno(PTR_ERR(mdev));
668 }
669 break;
670
671 case NETDEV_UNREGISTER:
672 mpls_ifdown(dev);
673 break;
674 case NETDEV_CHANGENAME:
675 mdev = mpls_dev_get(dev);
676 if (mdev) {
677 int err;
678
679 mpls_dev_sysctl_unregister(mdev);
680 err = mpls_dev_sysctl_register(dev, mdev);
681 if (err)
682 return notifier_from_errno(err);
683 }
684 break;
685 }
686 return NOTIFY_OK;
687 }
688
689 static struct notifier_block mpls_dev_notifier = {
690 .notifier_call = mpls_dev_notify,
691 };
692
693 static int nla_put_via(struct sk_buff *skb,
694 u8 table, const void *addr, int alen)
695 {
696 static const int table_to_family[NEIGH_NR_TABLES + 1] = {
697 AF_INET, AF_INET6, AF_DECnet, AF_PACKET,
698 };
699 struct nlattr *nla;
700 struct rtvia *via;
701 int family = AF_UNSPEC;
702
703 nla = nla_reserve(skb, RTA_VIA, alen + 2);
704 if (!nla)
705 return -EMSGSIZE;
706
707 if (table <= NEIGH_NR_TABLES)
708 family = table_to_family[table];
709
710 via = nla_data(nla);
711 via->rtvia_family = family;
712 memcpy(via->rtvia_addr, addr, alen);
713 return 0;
714 }
715
716 int nla_put_labels(struct sk_buff *skb, int attrtype,
717 u8 labels, const u32 label[])
718 {
719 struct nlattr *nla;
720 struct mpls_shim_hdr *nla_label;
721 bool bos;
722 int i;
723 nla = nla_reserve(skb, attrtype, labels*4);
724 if (!nla)
725 return -EMSGSIZE;
726
727 nla_label = nla_data(nla);
728 bos = true;
729 for (i = labels - 1; i >= 0; i--) {
730 nla_label[i] = mpls_entry_encode(label[i], 0, 0, bos);
731 bos = false;
732 }
733
734 return 0;
735 }
736 EXPORT_SYMBOL_GPL(nla_put_labels);
737
738 int nla_get_labels(const struct nlattr *nla,
739 u32 max_labels, u32 *labels, u32 label[])
740 {
741 unsigned len = nla_len(nla);
742 unsigned nla_labels;
743 struct mpls_shim_hdr *nla_label;
744 bool bos;
745 int i;
746
747 /* len needs to be an even multiple of 4 (the label size) */
748 if (len & 3)
749 return -EINVAL;
750
751 /* Limit the number of new labels allowed */
752 nla_labels = len/4;
753 if (nla_labels > max_labels)
754 return -EINVAL;
755
756 nla_label = nla_data(nla);
757 bos = true;
758 for (i = nla_labels - 1; i >= 0; i--, bos = false) {
759 struct mpls_entry_decoded dec;
760 dec = mpls_entry_decode(nla_label + i);
761
762 /* Ensure the bottom of stack flag is properly set
763 * and ttl and tc are both clear.
764 */
765 if ((dec.bos != bos) || dec.ttl || dec.tc)
766 return -EINVAL;
767
768 switch (dec.label) {
769 case MPLS_LABEL_IMPLNULL:
770 /* RFC3032: This is a label that an LSR may
771 * assign and distribute, but which never
772 * actually appears in the encapsulation.
773 */
774 return -EINVAL;
775 }
776
777 label[i] = dec.label;
778 }
779 *labels = nla_labels;
780 return 0;
781 }
782 EXPORT_SYMBOL_GPL(nla_get_labels);
783
784 static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh,
785 struct mpls_route_config *cfg)
786 {
787 struct rtmsg *rtm;
788 struct nlattr *tb[RTA_MAX+1];
789 int index;
790 int err;
791
792 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_mpls_policy);
793 if (err < 0)
794 goto errout;
795
796 err = -EINVAL;
797 rtm = nlmsg_data(nlh);
798 memset(cfg, 0, sizeof(*cfg));
799
800 if (rtm->rtm_family != AF_MPLS)
801 goto errout;
802 if (rtm->rtm_dst_len != 20)
803 goto errout;
804 if (rtm->rtm_src_len != 0)
805 goto errout;
806 if (rtm->rtm_tos != 0)
807 goto errout;
808 if (rtm->rtm_table != RT_TABLE_MAIN)
809 goto errout;
810 /* Any value is acceptable for rtm_protocol */
811
812 /* As mpls uses destination specific addresses
813 * (or source specific address in the case of multicast)
814 * all addresses have universal scope.
815 */
816 if (rtm->rtm_scope != RT_SCOPE_UNIVERSE)
817 goto errout;
818 if (rtm->rtm_type != RTN_UNICAST)
819 goto errout;
820 if (rtm->rtm_flags != 0)
821 goto errout;
822
823 cfg->rc_label = LABEL_NOT_SPECIFIED;
824 cfg->rc_protocol = rtm->rtm_protocol;
825 cfg->rc_nlflags = nlh->nlmsg_flags;
826 cfg->rc_nlinfo.portid = NETLINK_CB(skb).portid;
827 cfg->rc_nlinfo.nlh = nlh;
828 cfg->rc_nlinfo.nl_net = sock_net(skb->sk);
829
830 for (index = 0; index <= RTA_MAX; index++) {
831 struct nlattr *nla = tb[index];
832 if (!nla)
833 continue;
834
835 switch(index) {
836 case RTA_OIF:
837 cfg->rc_ifindex = nla_get_u32(nla);
838 break;
839 case RTA_NEWDST:
840 if (nla_get_labels(nla, MAX_NEW_LABELS,
841 &cfg->rc_output_labels,
842 cfg->rc_output_label))
843 goto errout;
844 break;
845 case RTA_DST:
846 {
847 u32 label_count;
848 if (nla_get_labels(nla, 1, &label_count,
849 &cfg->rc_label))
850 goto errout;
851
852 /* Reserved labels may not be set */
853 if (cfg->rc_label < MPLS_LABEL_FIRST_UNRESERVED)
854 goto errout;
855
856 break;
857 }
858 case RTA_VIA:
859 {
860 struct rtvia *via = nla_data(nla);
861 if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr))
862 goto errout;
863 cfg->rc_via_alen = nla_len(nla) -
864 offsetof(struct rtvia, rtvia_addr);
865 if (cfg->rc_via_alen > MAX_VIA_ALEN)
866 goto errout;
867
868 /* Validate the address family */
869 switch(via->rtvia_family) {
870 case AF_PACKET:
871 cfg->rc_via_table = NEIGH_LINK_TABLE;
872 break;
873 case AF_INET:
874 cfg->rc_via_table = NEIGH_ARP_TABLE;
875 if (cfg->rc_via_alen != 4)
876 goto errout;
877 break;
878 case AF_INET6:
879 cfg->rc_via_table = NEIGH_ND_TABLE;
880 if (cfg->rc_via_alen != 16)
881 goto errout;
882 break;
883 default:
884 /* Unsupported address family */
885 goto errout;
886 }
887
888 memcpy(cfg->rc_via, via->rtvia_addr, cfg->rc_via_alen);
889 break;
890 }
891 default:
892 /* Unsupported attribute */
893 goto errout;
894 }
895 }
896
897 err = 0;
898 errout:
899 return err;
900 }
901
902 static int mpls_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
903 {
904 struct mpls_route_config cfg;
905 int err;
906
907 err = rtm_to_route_config(skb, nlh, &cfg);
908 if (err < 0)
909 return err;
910
911 return mpls_route_del(&cfg);
912 }
913
914
915 static int mpls_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
916 {
917 struct mpls_route_config cfg;
918 int err;
919
920 err = rtm_to_route_config(skb, nlh, &cfg);
921 if (err < 0)
922 return err;
923
924 return mpls_route_add(&cfg);
925 }
926
927 static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event,
928 u32 label, struct mpls_route *rt, int flags)
929 {
930 struct net_device *dev;
931 struct nlmsghdr *nlh;
932 struct rtmsg *rtm;
933
934 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags);
935 if (nlh == NULL)
936 return -EMSGSIZE;
937
938 rtm = nlmsg_data(nlh);
939 rtm->rtm_family = AF_MPLS;
940 rtm->rtm_dst_len = 20;
941 rtm->rtm_src_len = 0;
942 rtm->rtm_tos = 0;
943 rtm->rtm_table = RT_TABLE_MAIN;
944 rtm->rtm_protocol = rt->rt_protocol;
945 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
946 rtm->rtm_type = RTN_UNICAST;
947 rtm->rtm_flags = 0;
948
949 if (rt->rt_labels &&
950 nla_put_labels(skb, RTA_NEWDST, rt->rt_labels, rt->rt_label))
951 goto nla_put_failure;
952 if (nla_put_via(skb, rt->rt_via_table, rt->rt_via, rt->rt_via_alen))
953 goto nla_put_failure;
954 dev = rtnl_dereference(rt->rt_dev);
955 if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex))
956 goto nla_put_failure;
957 if (nla_put_labels(skb, RTA_DST, 1, &label))
958 goto nla_put_failure;
959
960 nlmsg_end(skb, nlh);
961 return 0;
962
963 nla_put_failure:
964 nlmsg_cancel(skb, nlh);
965 return -EMSGSIZE;
966 }
967
968 static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb)
969 {
970 struct net *net = sock_net(skb->sk);
971 struct mpls_route __rcu **platform_label;
972 size_t platform_labels;
973 unsigned int index;
974
975 ASSERT_RTNL();
976
977 index = cb->args[0];
978 if (index < MPLS_LABEL_FIRST_UNRESERVED)
979 index = MPLS_LABEL_FIRST_UNRESERVED;
980
981 platform_label = rtnl_dereference(net->mpls.platform_label);
982 platform_labels = net->mpls.platform_labels;
983 for (; index < platform_labels; index++) {
984 struct mpls_route *rt;
985 rt = rtnl_dereference(platform_label[index]);
986 if (!rt)
987 continue;
988
989 if (mpls_dump_route(skb, NETLINK_CB(cb->skb).portid,
990 cb->nlh->nlmsg_seq, RTM_NEWROUTE,
991 index, rt, NLM_F_MULTI) < 0)
992 break;
993 }
994 cb->args[0] = index;
995
996 return skb->len;
997 }
998
999 static inline size_t lfib_nlmsg_size(struct mpls_route *rt)
1000 {
1001 size_t payload =
1002 NLMSG_ALIGN(sizeof(struct rtmsg))
1003 + nla_total_size(2 + rt->rt_via_alen) /* RTA_VIA */
1004 + nla_total_size(4); /* RTA_DST */
1005 if (rt->rt_labels) /* RTA_NEWDST */
1006 payload += nla_total_size(rt->rt_labels * 4);
1007 if (rt->rt_dev) /* RTA_OIF */
1008 payload += nla_total_size(4);
1009 return payload;
1010 }
1011
1012 static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt,
1013 struct nlmsghdr *nlh, struct net *net, u32 portid,
1014 unsigned int nlm_flags)
1015 {
1016 struct sk_buff *skb;
1017 u32 seq = nlh ? nlh->nlmsg_seq : 0;
1018 int err = -ENOBUFS;
1019
1020 skb = nlmsg_new(lfib_nlmsg_size(rt), GFP_KERNEL);
1021 if (skb == NULL)
1022 goto errout;
1023
1024 err = mpls_dump_route(skb, portid, seq, event, label, rt, nlm_flags);
1025 if (err < 0) {
1026 /* -EMSGSIZE implies BUG in lfib_nlmsg_size */
1027 WARN_ON(err == -EMSGSIZE);
1028 kfree_skb(skb);
1029 goto errout;
1030 }
1031 rtnl_notify(skb, net, portid, RTNLGRP_MPLS_ROUTE, nlh, GFP_KERNEL);
1032
1033 return;
1034 errout:
1035 if (err < 0)
1036 rtnl_set_sk_err(net, RTNLGRP_MPLS_ROUTE, err);
1037 }
1038
1039 static int resize_platform_label_table(struct net *net, size_t limit)
1040 {
1041 size_t size = sizeof(struct mpls_route *) * limit;
1042 size_t old_limit;
1043 size_t cp_size;
1044 struct mpls_route __rcu **labels = NULL, **old;
1045 struct mpls_route *rt0 = NULL, *rt2 = NULL;
1046 unsigned index;
1047
1048 if (size) {
1049 labels = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
1050 if (!labels)
1051 labels = vzalloc(size);
1052
1053 if (!labels)
1054 goto nolabels;
1055 }
1056
1057 /* In case the predefined labels need to be populated */
1058 if (limit > MPLS_LABEL_IPV4NULL) {
1059 struct net_device *lo = net->loopback_dev;
1060 rt0 = mpls_rt_alloc(lo->addr_len);
1061 if (!rt0)
1062 goto nort0;
1063 RCU_INIT_POINTER(rt0->rt_dev, lo);
1064 rt0->rt_protocol = RTPROT_KERNEL;
1065 rt0->rt_payload_type = MPT_IPV4;
1066 rt0->rt_via_table = NEIGH_LINK_TABLE;
1067 memcpy(rt0->rt_via, lo->dev_addr, lo->addr_len);
1068 }
1069 if (limit > MPLS_LABEL_IPV6NULL) {
1070 struct net_device *lo = net->loopback_dev;
1071 rt2 = mpls_rt_alloc(lo->addr_len);
1072 if (!rt2)
1073 goto nort2;
1074 RCU_INIT_POINTER(rt2->rt_dev, lo);
1075 rt2->rt_protocol = RTPROT_KERNEL;
1076 rt2->rt_payload_type = MPT_IPV6;
1077 rt2->rt_via_table = NEIGH_LINK_TABLE;
1078 memcpy(rt2->rt_via, lo->dev_addr, lo->addr_len);
1079 }
1080
1081 rtnl_lock();
1082 /* Remember the original table */
1083 old = rtnl_dereference(net->mpls.platform_label);
1084 old_limit = net->mpls.platform_labels;
1085
1086 /* Free any labels beyond the new table */
1087 for (index = limit; index < old_limit; index++)
1088 mpls_route_update(net, index, NULL, NULL, NULL);
1089
1090 /* Copy over the old labels */
1091 cp_size = size;
1092 if (old_limit < limit)
1093 cp_size = old_limit * sizeof(struct mpls_route *);
1094
1095 memcpy(labels, old, cp_size);
1096
1097 /* If needed set the predefined labels */
1098 if ((old_limit <= MPLS_LABEL_IPV6NULL) &&
1099 (limit > MPLS_LABEL_IPV6NULL)) {
1100 RCU_INIT_POINTER(labels[MPLS_LABEL_IPV6NULL], rt2);
1101 rt2 = NULL;
1102 }
1103
1104 if ((old_limit <= MPLS_LABEL_IPV4NULL) &&
1105 (limit > MPLS_LABEL_IPV4NULL)) {
1106 RCU_INIT_POINTER(labels[MPLS_LABEL_IPV4NULL], rt0);
1107 rt0 = NULL;
1108 }
1109
1110 /* Update the global pointers */
1111 net->mpls.platform_labels = limit;
1112 rcu_assign_pointer(net->mpls.platform_label, labels);
1113
1114 rtnl_unlock();
1115
1116 mpls_rt_free(rt2);
1117 mpls_rt_free(rt0);
1118
1119 if (old) {
1120 synchronize_rcu();
1121 kvfree(old);
1122 }
1123 return 0;
1124
1125 nort2:
1126 mpls_rt_free(rt0);
1127 nort0:
1128 kvfree(labels);
1129 nolabels:
1130 return -ENOMEM;
1131 }
1132
1133 static int mpls_platform_labels(struct ctl_table *table, int write,
1134 void __user *buffer, size_t *lenp, loff_t *ppos)
1135 {
1136 struct net *net = table->data;
1137 int platform_labels = net->mpls.platform_labels;
1138 int ret;
1139 struct ctl_table tmp = {
1140 .procname = table->procname,
1141 .data = &platform_labels,
1142 .maxlen = sizeof(int),
1143 .mode = table->mode,
1144 .extra1 = &zero,
1145 .extra2 = &label_limit,
1146 };
1147
1148 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
1149
1150 if (write && ret == 0)
1151 ret = resize_platform_label_table(net, platform_labels);
1152
1153 return ret;
1154 }
1155
1156 static const struct ctl_table mpls_table[] = {
1157 {
1158 .procname = "platform_labels",
1159 .data = NULL,
1160 .maxlen = sizeof(int),
1161 .mode = 0644,
1162 .proc_handler = mpls_platform_labels,
1163 },
1164 { }
1165 };
1166
1167 static int mpls_net_init(struct net *net)
1168 {
1169 struct ctl_table *table;
1170
1171 net->mpls.platform_labels = 0;
1172 net->mpls.platform_label = NULL;
1173
1174 table = kmemdup(mpls_table, sizeof(mpls_table), GFP_KERNEL);
1175 if (table == NULL)
1176 return -ENOMEM;
1177
1178 table[0].data = net;
1179 net->mpls.ctl = register_net_sysctl(net, "net/mpls", table);
1180 if (net->mpls.ctl == NULL) {
1181 kfree(table);
1182 return -ENOMEM;
1183 }
1184
1185 return 0;
1186 }
1187
1188 static void mpls_net_exit(struct net *net)
1189 {
1190 struct mpls_route __rcu **platform_label;
1191 size_t platform_labels;
1192 struct ctl_table *table;
1193 unsigned int index;
1194
1195 table = net->mpls.ctl->ctl_table_arg;
1196 unregister_net_sysctl_table(net->mpls.ctl);
1197 kfree(table);
1198
1199 /* An rcu grace period has passed since there was a device in
1200 * the network namespace (and thus the last in flight packet)
1201 * left this network namespace. This is because
1202 * unregister_netdevice_many and netdev_run_todo has completed
1203 * for each network device that was in this network namespace.
1204 *
1205 * As such no additional rcu synchronization is necessary when
1206 * freeing the platform_label table.
1207 */
1208 rtnl_lock();
1209 platform_label = rtnl_dereference(net->mpls.platform_label);
1210 platform_labels = net->mpls.platform_labels;
1211 for (index = 0; index < platform_labels; index++) {
1212 struct mpls_route *rt = rtnl_dereference(platform_label[index]);
1213 RCU_INIT_POINTER(platform_label[index], NULL);
1214 mpls_rt_free(rt);
1215 }
1216 rtnl_unlock();
1217
1218 kvfree(platform_label);
1219 }
1220
1221 static struct pernet_operations mpls_net_ops = {
1222 .init = mpls_net_init,
1223 .exit = mpls_net_exit,
1224 };
1225
1226 static int __init mpls_init(void)
1227 {
1228 int err;
1229
1230 BUILD_BUG_ON(sizeof(struct mpls_shim_hdr) != 4);
1231
1232 err = register_pernet_subsys(&mpls_net_ops);
1233 if (err)
1234 goto out;
1235
1236 err = register_netdevice_notifier(&mpls_dev_notifier);
1237 if (err)
1238 goto out_unregister_pernet;
1239
1240 dev_add_pack(&mpls_packet_type);
1241
1242 rtnl_register(PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL, NULL);
1243 rtnl_register(PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL, NULL);
1244 rtnl_register(PF_MPLS, RTM_GETROUTE, NULL, mpls_dump_routes, NULL);
1245 err = 0;
1246 out:
1247 return err;
1248
1249 out_unregister_pernet:
1250 unregister_pernet_subsys(&mpls_net_ops);
1251 goto out;
1252 }
1253 module_init(mpls_init);
1254
1255 static void __exit mpls_exit(void)
1256 {
1257 rtnl_unregister_all(PF_MPLS);
1258 dev_remove_pack(&mpls_packet_type);
1259 unregister_netdevice_notifier(&mpls_dev_notifier);
1260 unregister_pernet_subsys(&mpls_net_ops);
1261 }
1262 module_exit(mpls_exit);
1263
1264 MODULE_DESCRIPTION("MultiProtocol Label Switching");
1265 MODULE_LICENSE("GPL v2");
1266 MODULE_ALIAS_NETPROTO(PF_MPLS);
This page took 0.066208 seconds and 6 git commands to generate.