ipv6: Various cleanups in route.c
[deliverable/linux.git] / net / ipv6 / route.c
... / ...
CommitLineData
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
25 */
26
27#include <linux/capability.h>
28#include <linux/errno.h>
29#include <linux/export.h>
30#include <linux/types.h>
31#include <linux/times.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/route.h>
36#include <linux/netdevice.h>
37#include <linux/in6.h>
38#include <linux/mroute6.h>
39#include <linux/init.h>
40#include <linux/if_arp.h>
41#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
43#include <linux/nsproxy.h>
44#include <linux/slab.h>
45#include <net/net_namespace.h>
46#include <net/snmp.h>
47#include <net/ipv6.h>
48#include <net/ip6_fib.h>
49#include <net/ip6_route.h>
50#include <net/ndisc.h>
51#include <net/addrconf.h>
52#include <net/tcp.h>
53#include <linux/rtnetlink.h>
54#include <net/dst.h>
55#include <net/xfrm.h>
56#include <net/netevent.h>
57#include <net/netlink.h>
58
59#include <asm/uaccess.h>
60
61#ifdef CONFIG_SYSCTL
62#include <linux/sysctl.h>
63#endif
64
65/* Set to 3 to get tracing. */
66#define RT6_DEBUG 2
67
68#if RT6_DEBUG >= 3
69#define RDBG(x) printk x
70#define RT6_TRACE(x...) printk(KERN_DEBUG x)
71#else
72#define RDBG(x)
73#define RT6_TRACE(x...) do { ; } while (0)
74#endif
75
76static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
77 const struct in6_addr *dest);
78static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
79static unsigned int ip6_default_advmss(const struct dst_entry *dst);
80static unsigned int ip6_mtu(const struct dst_entry *dst);
81static struct dst_entry *ip6_negative_advice(struct dst_entry *);
82static void ip6_dst_destroy(struct dst_entry *);
83static void ip6_dst_ifdown(struct dst_entry *,
84 struct net_device *dev, int how);
85static int ip6_dst_gc(struct dst_ops *ops);
86
87static int ip6_pkt_discard(struct sk_buff *skb);
88static int ip6_pkt_discard_out(struct sk_buff *skb);
89static void ip6_link_failure(struct sk_buff *skb);
90static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
91
92#ifdef CONFIG_IPV6_ROUTE_INFO
93static struct rt6_info *rt6_add_route_info(struct net *net,
94 const struct in6_addr *prefix, int prefixlen,
95 const struct in6_addr *gwaddr, int ifindex,
96 unsigned pref);
97static struct rt6_info *rt6_get_route_info(struct net *net,
98 const struct in6_addr *prefix, int prefixlen,
99 const struct in6_addr *gwaddr, int ifindex);
100#endif
101
102static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
103{
104 struct rt6_info *rt = (struct rt6_info *) dst;
105 struct inet_peer *peer;
106 u32 *p = NULL;
107
108 if (!(rt->dst.flags & DST_HOST))
109 return NULL;
110
111 if (!rt->rt6i_peer)
112 rt6_bind_peer(rt, 1);
113
114 peer = rt->rt6i_peer;
115 if (peer) {
116 u32 *old_p = __DST_METRICS_PTR(old);
117 unsigned long prev, new;
118
119 p = peer->metrics;
120 if (inet_metrics_new(peer))
121 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
122
123 new = (unsigned long) p;
124 prev = cmpxchg(&dst->_metrics, old, new);
125
126 if (prev != old) {
127 p = __DST_METRICS_PTR(prev);
128 if (prev & DST_METRICS_READ_ONLY)
129 p = NULL;
130 }
131 }
132 return p;
133}
134
135static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
136{
137 return __neigh_lookup_errno(&nd_tbl, daddr, dst->dev);
138}
139
140static struct dst_ops ip6_dst_ops_template = {
141 .family = AF_INET6,
142 .protocol = cpu_to_be16(ETH_P_IPV6),
143 .gc = ip6_dst_gc,
144 .gc_thresh = 1024,
145 .check = ip6_dst_check,
146 .default_advmss = ip6_default_advmss,
147 .mtu = ip6_mtu,
148 .cow_metrics = ipv6_cow_metrics,
149 .destroy = ip6_dst_destroy,
150 .ifdown = ip6_dst_ifdown,
151 .negative_advice = ip6_negative_advice,
152 .link_failure = ip6_link_failure,
153 .update_pmtu = ip6_rt_update_pmtu,
154 .local_out = __ip6_local_out,
155 .neigh_lookup = ip6_neigh_lookup,
156};
157
158static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
159{
160 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
161
162 return mtu ? : dst->dev->mtu;
163}
164
165static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
166{
167}
168
169static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
170 unsigned long old)
171{
172 return NULL;
173}
174
175static struct dst_ops ip6_dst_blackhole_ops = {
176 .family = AF_INET6,
177 .protocol = cpu_to_be16(ETH_P_IPV6),
178 .destroy = ip6_dst_destroy,
179 .check = ip6_dst_check,
180 .mtu = ip6_blackhole_mtu,
181 .default_advmss = ip6_default_advmss,
182 .update_pmtu = ip6_rt_blackhole_update_pmtu,
183 .cow_metrics = ip6_rt_blackhole_cow_metrics,
184 .neigh_lookup = ip6_neigh_lookup,
185};
186
187static const u32 ip6_template_metrics[RTAX_MAX] = {
188 [RTAX_HOPLIMIT - 1] = 255,
189};
190
191static struct rt6_info ip6_null_entry_template = {
192 .dst = {
193 .__refcnt = ATOMIC_INIT(1),
194 .__use = 1,
195 .obsolete = -1,
196 .error = -ENETUNREACH,
197 .input = ip6_pkt_discard,
198 .output = ip6_pkt_discard_out,
199 },
200 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
201 .rt6i_protocol = RTPROT_KERNEL,
202 .rt6i_metric = ~(u32) 0,
203 .rt6i_ref = ATOMIC_INIT(1),
204};
205
206#ifdef CONFIG_IPV6_MULTIPLE_TABLES
207
208static int ip6_pkt_prohibit(struct sk_buff *skb);
209static int ip6_pkt_prohibit_out(struct sk_buff *skb);
210
211static struct rt6_info ip6_prohibit_entry_template = {
212 .dst = {
213 .__refcnt = ATOMIC_INIT(1),
214 .__use = 1,
215 .obsolete = -1,
216 .error = -EACCES,
217 .input = ip6_pkt_prohibit,
218 .output = ip6_pkt_prohibit_out,
219 },
220 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
221 .rt6i_protocol = RTPROT_KERNEL,
222 .rt6i_metric = ~(u32) 0,
223 .rt6i_ref = ATOMIC_INIT(1),
224};
225
226static struct rt6_info ip6_blk_hole_entry_template = {
227 .dst = {
228 .__refcnt = ATOMIC_INIT(1),
229 .__use = 1,
230 .obsolete = -1,
231 .error = -EINVAL,
232 .input = dst_discard,
233 .output = dst_discard,
234 },
235 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
236 .rt6i_protocol = RTPROT_KERNEL,
237 .rt6i_metric = ~(u32) 0,
238 .rt6i_ref = ATOMIC_INIT(1),
239};
240
241#endif
242
243/* allocate dst with ip6_dst_ops */
244static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
245 struct net_device *dev,
246 int flags)
247{
248 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
249
250 if (rt)
251 memset(&rt->rt6i_table, 0,
252 sizeof(*rt) - sizeof(struct dst_entry));
253
254 return rt;
255}
256
257static void ip6_dst_destroy(struct dst_entry *dst)
258{
259 struct rt6_info *rt = (struct rt6_info *)dst;
260 struct inet6_dev *idev = rt->rt6i_idev;
261 struct inet_peer *peer = rt->rt6i_peer;
262
263 if (!(rt->dst.flags & DST_HOST))
264 dst_destroy_metrics_generic(dst);
265
266 if (idev) {
267 rt->rt6i_idev = NULL;
268 in6_dev_put(idev);
269 }
270 if (peer) {
271 rt->rt6i_peer = NULL;
272 inet_putpeer(peer);
273 }
274}
275
276static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
277
278static u32 rt6_peer_genid(void)
279{
280 return atomic_read(&__rt6_peer_genid);
281}
282
283void rt6_bind_peer(struct rt6_info *rt, int create)
284{
285 struct inet_peer *peer;
286
287 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
288 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
289 inet_putpeer(peer);
290 else
291 rt->rt6i_peer_genid = rt6_peer_genid();
292}
293
294static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
295 int how)
296{
297 struct rt6_info *rt = (struct rt6_info *)dst;
298 struct inet6_dev *idev = rt->rt6i_idev;
299 struct net_device *loopback_dev =
300 dev_net(dev)->loopback_dev;
301
302 if (dev != loopback_dev && idev && idev->dev == dev) {
303 struct inet6_dev *loopback_idev =
304 in6_dev_get(loopback_dev);
305 if (loopback_idev) {
306 rt->rt6i_idev = loopback_idev;
307 in6_dev_put(idev);
308 }
309 }
310}
311
312static __inline__ int rt6_check_expired(const struct rt6_info *rt)
313{
314 return (rt->rt6i_flags & RTF_EXPIRES) &&
315 time_after(jiffies, rt->rt6i_expires);
316}
317
318static inline int rt6_need_strict(const struct in6_addr *daddr)
319{
320 return ipv6_addr_type(daddr) &
321 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
322}
323
324/*
325 * Route lookup. Any table->tb6_lock is implied.
326 */
327
328static inline struct rt6_info *rt6_device_match(struct net *net,
329 struct rt6_info *rt,
330 const struct in6_addr *saddr,
331 int oif,
332 int flags)
333{
334 struct rt6_info *local = NULL;
335 struct rt6_info *sprt;
336
337 if (!oif && ipv6_addr_any(saddr))
338 goto out;
339
340 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
341 struct net_device *dev = sprt->rt6i_dev;
342
343 if (oif) {
344 if (dev->ifindex == oif)
345 return sprt;
346 if (dev->flags & IFF_LOOPBACK) {
347 if (!sprt->rt6i_idev ||
348 sprt->rt6i_idev->dev->ifindex != oif) {
349 if (flags & RT6_LOOKUP_F_IFACE && oif)
350 continue;
351 if (local && (!oif ||
352 local->rt6i_idev->dev->ifindex == oif))
353 continue;
354 }
355 local = sprt;
356 }
357 } else {
358 if (ipv6_chk_addr(net, saddr, dev,
359 flags & RT6_LOOKUP_F_IFACE))
360 return sprt;
361 }
362 }
363
364 if (oif) {
365 if (local)
366 return local;
367
368 if (flags & RT6_LOOKUP_F_IFACE)
369 return net->ipv6.ip6_null_entry;
370 }
371out:
372 return rt;
373}
374
375#ifdef CONFIG_IPV6_ROUTER_PREF
376static void rt6_probe(struct rt6_info *rt)
377{
378 struct neighbour *neigh;
379 /*
380 * Okay, this does not seem to be appropriate
381 * for now, however, we need to check if it
382 * is really so; aka Router Reachability Probing.
383 *
384 * Router Reachability Probe MUST be rate-limited
385 * to no more than one per minute.
386 */
387 rcu_read_lock();
388 neigh = rt ? dst_get_neighbour(&rt->dst) : NULL;
389 if (!neigh || (neigh->nud_state & NUD_VALID))
390 goto out;
391 read_lock_bh(&neigh->lock);
392 if (!(neigh->nud_state & NUD_VALID) &&
393 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
394 struct in6_addr mcaddr;
395 struct in6_addr *target;
396
397 neigh->updated = jiffies;
398 read_unlock_bh(&neigh->lock);
399
400 target = (struct in6_addr *)&neigh->primary_key;
401 addrconf_addr_solict_mult(target, &mcaddr);
402 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
403 } else {
404 read_unlock_bh(&neigh->lock);
405 }
406out:
407 rcu_read_unlock();
408}
409#else
410static inline void rt6_probe(struct rt6_info *rt)
411{
412}
413#endif
414
415/*
416 * Default Router Selection (RFC 2461 6.3.6)
417 */
418static inline int rt6_check_dev(struct rt6_info *rt, int oif)
419{
420 struct net_device *dev = rt->rt6i_dev;
421 if (!oif || dev->ifindex == oif)
422 return 2;
423 if ((dev->flags & IFF_LOOPBACK) &&
424 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
425 return 1;
426 return 0;
427}
428
429static inline int rt6_check_neigh(struct rt6_info *rt)
430{
431 struct neighbour *neigh;
432 int m;
433
434 rcu_read_lock();
435 neigh = dst_get_neighbour(&rt->dst);
436 if (rt->rt6i_flags & RTF_NONEXTHOP ||
437 !(rt->rt6i_flags & RTF_GATEWAY))
438 m = 1;
439 else if (neigh) {
440 read_lock_bh(&neigh->lock);
441 if (neigh->nud_state & NUD_VALID)
442 m = 2;
443#ifdef CONFIG_IPV6_ROUTER_PREF
444 else if (neigh->nud_state & NUD_FAILED)
445 m = 0;
446#endif
447 else
448 m = 1;
449 read_unlock_bh(&neigh->lock);
450 } else
451 m = 0;
452 rcu_read_unlock();
453 return m;
454}
455
456static int rt6_score_route(struct rt6_info *rt, int oif,
457 int strict)
458{
459 int m, n;
460
461 m = rt6_check_dev(rt, oif);
462 if (!m && (strict & RT6_LOOKUP_F_IFACE))
463 return -1;
464#ifdef CONFIG_IPV6_ROUTER_PREF
465 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
466#endif
467 n = rt6_check_neigh(rt);
468 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
469 return -1;
470 return m;
471}
472
473static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
474 int *mpri, struct rt6_info *match)
475{
476 int m;
477
478 if (rt6_check_expired(rt))
479 goto out;
480
481 m = rt6_score_route(rt, oif, strict);
482 if (m < 0)
483 goto out;
484
485 if (m > *mpri) {
486 if (strict & RT6_LOOKUP_F_REACHABLE)
487 rt6_probe(match);
488 *mpri = m;
489 match = rt;
490 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
491 rt6_probe(rt);
492 }
493
494out:
495 return match;
496}
497
498static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
499 struct rt6_info *rr_head,
500 u32 metric, int oif, int strict)
501{
502 struct rt6_info *rt, *match;
503 int mpri = -1;
504
505 match = NULL;
506 for (rt = rr_head; rt && rt->rt6i_metric == metric;
507 rt = rt->dst.rt6_next)
508 match = find_match(rt, oif, strict, &mpri, match);
509 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
510 rt = rt->dst.rt6_next)
511 match = find_match(rt, oif, strict, &mpri, match);
512
513 return match;
514}
515
516static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
517{
518 struct rt6_info *match, *rt0;
519 struct net *net;
520
521 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
522 __func__, fn->leaf, oif);
523
524 rt0 = fn->rr_ptr;
525 if (!rt0)
526 fn->rr_ptr = rt0 = fn->leaf;
527
528 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
529
530 if (!match &&
531 (strict & RT6_LOOKUP_F_REACHABLE)) {
532 struct rt6_info *next = rt0->dst.rt6_next;
533
534 /* no entries matched; do round-robin */
535 if (!next || next->rt6i_metric != rt0->rt6i_metric)
536 next = fn->leaf;
537
538 if (next != rt0)
539 fn->rr_ptr = next;
540 }
541
542 RT6_TRACE("%s() => %p\n",
543 __func__, match);
544
545 net = dev_net(rt0->rt6i_dev);
546 return match ? match : net->ipv6.ip6_null_entry;
547}
548
549#ifdef CONFIG_IPV6_ROUTE_INFO
550int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
551 const struct in6_addr *gwaddr)
552{
553 struct net *net = dev_net(dev);
554 struct route_info *rinfo = (struct route_info *) opt;
555 struct in6_addr prefix_buf, *prefix;
556 unsigned int pref;
557 unsigned long lifetime;
558 struct rt6_info *rt;
559
560 if (len < sizeof(struct route_info)) {
561 return -EINVAL;
562 }
563
564 /* Sanity check for prefix_len and length */
565 if (rinfo->length > 3) {
566 return -EINVAL;
567 } else if (rinfo->prefix_len > 128) {
568 return -EINVAL;
569 } else if (rinfo->prefix_len > 64) {
570 if (rinfo->length < 2) {
571 return -EINVAL;
572 }
573 } else if (rinfo->prefix_len > 0) {
574 if (rinfo->length < 1) {
575 return -EINVAL;
576 }
577 }
578
579 pref = rinfo->route_pref;
580 if (pref == ICMPV6_ROUTER_PREF_INVALID)
581 return -EINVAL;
582
583 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
584
585 if (rinfo->length == 3)
586 prefix = (struct in6_addr *)rinfo->prefix;
587 else {
588 /* this function is safe */
589 ipv6_addr_prefix(&prefix_buf,
590 (struct in6_addr *)rinfo->prefix,
591 rinfo->prefix_len);
592 prefix = &prefix_buf;
593 }
594
595 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
596 dev->ifindex);
597
598 if (rt && !lifetime) {
599 ip6_del_rt(rt);
600 rt = NULL;
601 }
602
603 if (!rt && lifetime)
604 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
605 pref);
606 else if (rt)
607 rt->rt6i_flags = RTF_ROUTEINFO |
608 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
609
610 if (rt) {
611 if (!addrconf_finite_timeout(lifetime)) {
612 rt->rt6i_flags &= ~RTF_EXPIRES;
613 } else {
614 rt->rt6i_expires = jiffies + HZ * lifetime;
615 rt->rt6i_flags |= RTF_EXPIRES;
616 }
617 dst_release(&rt->dst);
618 }
619 return 0;
620}
621#endif
622
623#define BACKTRACK(__net, saddr) \
624do { \
625 if (rt == __net->ipv6.ip6_null_entry) { \
626 struct fib6_node *pn; \
627 while (1) { \
628 if (fn->fn_flags & RTN_TL_ROOT) \
629 goto out; \
630 pn = fn->parent; \
631 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
632 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
633 else \
634 fn = pn; \
635 if (fn->fn_flags & RTN_RTINFO) \
636 goto restart; \
637 } \
638 } \
639} while (0)
640
641static struct rt6_info *ip6_pol_route_lookup(struct net *net,
642 struct fib6_table *table,
643 struct flowi6 *fl6, int flags)
644{
645 struct fib6_node *fn;
646 struct rt6_info *rt;
647
648 read_lock_bh(&table->tb6_lock);
649 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
650restart:
651 rt = fn->leaf;
652 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
653 BACKTRACK(net, &fl6->saddr);
654out:
655 dst_use(&rt->dst, jiffies);
656 read_unlock_bh(&table->tb6_lock);
657 return rt;
658
659}
660
661struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
662 const struct in6_addr *saddr, int oif, int strict)
663{
664 struct flowi6 fl6 = {
665 .flowi6_oif = oif,
666 .daddr = *daddr,
667 };
668 struct dst_entry *dst;
669 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
670
671 if (saddr) {
672 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
673 flags |= RT6_LOOKUP_F_HAS_SADDR;
674 }
675
676 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
677 if (dst->error == 0)
678 return (struct rt6_info *) dst;
679
680 dst_release(dst);
681
682 return NULL;
683}
684
685EXPORT_SYMBOL(rt6_lookup);
686
687/* ip6_ins_rt is called with FREE table->tb6_lock.
688 It takes new route entry, the addition fails by any reason the
689 route is freed. In any case, if caller does not hold it, it may
690 be destroyed.
691 */
692
693static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
694{
695 int err;
696 struct fib6_table *table;
697
698 table = rt->rt6i_table;
699 write_lock_bh(&table->tb6_lock);
700 err = fib6_add(&table->tb6_root, rt, info);
701 write_unlock_bh(&table->tb6_lock);
702
703 return err;
704}
705
706int ip6_ins_rt(struct rt6_info *rt)
707{
708 struct nl_info info = {
709 .nl_net = dev_net(rt->rt6i_dev),
710 };
711 return __ip6_ins_rt(rt, &info);
712}
713
714static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
715 const struct in6_addr *daddr,
716 const struct in6_addr *saddr)
717{
718 struct rt6_info *rt;
719
720 /*
721 * Clone the route.
722 */
723
724 rt = ip6_rt_copy(ort, daddr);
725
726 if (rt) {
727 struct neighbour *neigh;
728 int attempts = !in_softirq();
729
730 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
731 if (rt->rt6i_dst.plen != 128 &&
732 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
733 rt->rt6i_flags |= RTF_ANYCAST;
734 rt->rt6i_gateway = *daddr;
735 }
736
737 rt->rt6i_flags |= RTF_CACHE;
738
739#ifdef CONFIG_IPV6_SUBTREES
740 if (rt->rt6i_src.plen && saddr) {
741 rt->rt6i_src.addr = *saddr;
742 rt->rt6i_src.plen = 128;
743 }
744#endif
745
746 retry:
747 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
748 if (IS_ERR(neigh)) {
749 struct net *net = dev_net(rt->rt6i_dev);
750 int saved_rt_min_interval =
751 net->ipv6.sysctl.ip6_rt_gc_min_interval;
752 int saved_rt_elasticity =
753 net->ipv6.sysctl.ip6_rt_gc_elasticity;
754
755 if (attempts-- > 0) {
756 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
757 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
758
759 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
760
761 net->ipv6.sysctl.ip6_rt_gc_elasticity =
762 saved_rt_elasticity;
763 net->ipv6.sysctl.ip6_rt_gc_min_interval =
764 saved_rt_min_interval;
765 goto retry;
766 }
767
768 if (net_ratelimit())
769 printk(KERN_WARNING
770 "ipv6: Neighbour table overflow.\n");
771 dst_free(&rt->dst);
772 return NULL;
773 }
774 dst_set_neighbour(&rt->dst, neigh);
775
776 }
777
778 return rt;
779}
780
781static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
782 const struct in6_addr *daddr)
783{
784 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
785
786 if (rt) {
787 rt->rt6i_flags |= RTF_CACHE;
788 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst)));
789 }
790 return rt;
791}
792
793static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
794 struct flowi6 *fl6, int flags)
795{
796 struct fib6_node *fn;
797 struct rt6_info *rt, *nrt;
798 int strict = 0;
799 int attempts = 3;
800 int err;
801 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
802
803 strict |= flags & RT6_LOOKUP_F_IFACE;
804
805relookup:
806 read_lock_bh(&table->tb6_lock);
807
808restart_2:
809 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
810
811restart:
812 rt = rt6_select(fn, oif, strict | reachable);
813
814 BACKTRACK(net, &fl6->saddr);
815 if (rt == net->ipv6.ip6_null_entry ||
816 rt->rt6i_flags & RTF_CACHE)
817 goto out;
818
819 dst_hold(&rt->dst);
820 read_unlock_bh(&table->tb6_lock);
821
822 if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
823 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
824 else if (!(rt->dst.flags & DST_HOST))
825 nrt = rt6_alloc_clone(rt, &fl6->daddr);
826 else
827 goto out2;
828
829 dst_release(&rt->dst);
830 rt = nrt ? : net->ipv6.ip6_null_entry;
831
832 dst_hold(&rt->dst);
833 if (nrt) {
834 err = ip6_ins_rt(nrt);
835 if (!err)
836 goto out2;
837 }
838
839 if (--attempts <= 0)
840 goto out2;
841
842 /*
843 * Race condition! In the gap, when table->tb6_lock was
844 * released someone could insert this route. Relookup.
845 */
846 dst_release(&rt->dst);
847 goto relookup;
848
849out:
850 if (reachable) {
851 reachable = 0;
852 goto restart_2;
853 }
854 dst_hold(&rt->dst);
855 read_unlock_bh(&table->tb6_lock);
856out2:
857 rt->dst.lastuse = jiffies;
858 rt->dst.__use++;
859
860 return rt;
861}
862
863static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
864 struct flowi6 *fl6, int flags)
865{
866 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
867}
868
869void ip6_route_input(struct sk_buff *skb)
870{
871 const struct ipv6hdr *iph = ipv6_hdr(skb);
872 struct net *net = dev_net(skb->dev);
873 int flags = RT6_LOOKUP_F_HAS_SADDR;
874 struct flowi6 fl6 = {
875 .flowi6_iif = skb->dev->ifindex,
876 .daddr = iph->daddr,
877 .saddr = iph->saddr,
878 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
879 .flowi6_mark = skb->mark,
880 .flowi6_proto = iph->nexthdr,
881 };
882
883 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
884 flags |= RT6_LOOKUP_F_IFACE;
885
886 skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
887}
888
889static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
890 struct flowi6 *fl6, int flags)
891{
892 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
893}
894
895struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
896 struct flowi6 *fl6)
897{
898 int flags = 0;
899
900 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
901 flags |= RT6_LOOKUP_F_IFACE;
902
903 if (!ipv6_addr_any(&fl6->saddr))
904 flags |= RT6_LOOKUP_F_HAS_SADDR;
905 else if (sk)
906 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
907
908 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
909}
910
911EXPORT_SYMBOL(ip6_route_output);
912
913struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
914{
915 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
916 struct dst_entry *new = NULL;
917
918 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
919 if (rt) {
920 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
921
922 new = &rt->dst;
923
924 new->__use = 1;
925 new->input = dst_discard;
926 new->output = dst_discard;
927
928 if (dst_metrics_read_only(&ort->dst))
929 new->_metrics = ort->dst._metrics;
930 else
931 dst_copy_metrics(new, &ort->dst);
932 rt->rt6i_idev = ort->rt6i_idev;
933 if (rt->rt6i_idev)
934 in6_dev_hold(rt->rt6i_idev);
935 rt->rt6i_expires = 0;
936
937 rt->rt6i_gateway = ort->rt6i_gateway;
938 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
939 rt->rt6i_metric = 0;
940
941 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
942#ifdef CONFIG_IPV6_SUBTREES
943 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
944#endif
945
946 dst_free(new);
947 }
948
949 dst_release(dst_orig);
950 return new ? new : ERR_PTR(-ENOMEM);
951}
952
953/*
954 * Destination cache support functions
955 */
956
957static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
958{
959 struct rt6_info *rt;
960
961 rt = (struct rt6_info *) dst;
962
963 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
964 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
965 if (!rt->rt6i_peer)
966 rt6_bind_peer(rt, 0);
967 rt->rt6i_peer_genid = rt6_peer_genid();
968 }
969 return dst;
970 }
971 return NULL;
972}
973
974static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
975{
976 struct rt6_info *rt = (struct rt6_info *) dst;
977
978 if (rt) {
979 if (rt->rt6i_flags & RTF_CACHE) {
980 if (rt6_check_expired(rt)) {
981 ip6_del_rt(rt);
982 dst = NULL;
983 }
984 } else {
985 dst_release(dst);
986 dst = NULL;
987 }
988 }
989 return dst;
990}
991
992static void ip6_link_failure(struct sk_buff *skb)
993{
994 struct rt6_info *rt;
995
996 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
997
998 rt = (struct rt6_info *) skb_dst(skb);
999 if (rt) {
1000 if (rt->rt6i_flags & RTF_CACHE) {
1001 dst_set_expires(&rt->dst, 0);
1002 rt->rt6i_flags |= RTF_EXPIRES;
1003 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1004 rt->rt6i_node->fn_sernum = -1;
1005 }
1006}
1007
1008static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1009{
1010 struct rt6_info *rt6 = (struct rt6_info*)dst;
1011
1012 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1013 rt6->rt6i_flags |= RTF_MODIFIED;
1014 if (mtu < IPV6_MIN_MTU) {
1015 u32 features = dst_metric(dst, RTAX_FEATURES);
1016 mtu = IPV6_MIN_MTU;
1017 features |= RTAX_FEATURE_ALLFRAG;
1018 dst_metric_set(dst, RTAX_FEATURES, features);
1019 }
1020 dst_metric_set(dst, RTAX_MTU, mtu);
1021 }
1022}
1023
1024static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1025{
1026 struct net_device *dev = dst->dev;
1027 unsigned int mtu = dst_mtu(dst);
1028 struct net *net = dev_net(dev);
1029
1030 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1031
1032 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1033 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1034
1035 /*
1036 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1037 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1038 * IPV6_MAXPLEN is also valid and means: "any MSS,
1039 * rely only on pmtu discovery"
1040 */
1041 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1042 mtu = IPV6_MAXPLEN;
1043 return mtu;
1044}
1045
1046static unsigned int ip6_mtu(const struct dst_entry *dst)
1047{
1048 struct inet6_dev *idev;
1049 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1050
1051 if (mtu)
1052 return mtu;
1053
1054 mtu = IPV6_MIN_MTU;
1055
1056 rcu_read_lock();
1057 idev = __in6_dev_get(dst->dev);
1058 if (idev)
1059 mtu = idev->cnf.mtu6;
1060 rcu_read_unlock();
1061
1062 return mtu;
1063}
1064
1065static struct dst_entry *icmp6_dst_gc_list;
1066static DEFINE_SPINLOCK(icmp6_dst_lock);
1067
1068struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1069 struct neighbour *neigh,
1070 const struct in6_addr *addr)
1071{
1072 struct rt6_info *rt;
1073 struct inet6_dev *idev = in6_dev_get(dev);
1074 struct net *net = dev_net(dev);
1075
1076 if (unlikely(!idev))
1077 return NULL;
1078
1079 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
1080 if (unlikely(!rt)) {
1081 in6_dev_put(idev);
1082 goto out;
1083 }
1084
1085 if (neigh)
1086 neigh_hold(neigh);
1087 else {
1088 neigh = ndisc_get_neigh(dev, addr);
1089 if (IS_ERR(neigh))
1090 neigh = NULL;
1091 }
1092
1093 rt->dst.flags |= DST_HOST;
1094 rt->dst.output = ip6_output;
1095 dst_set_neighbour(&rt->dst, neigh);
1096 atomic_set(&rt->dst.__refcnt, 1);
1097 rt->rt6i_dst.addr = *addr;
1098 rt->rt6i_dst.plen = 128;
1099 rt->rt6i_idev = idev;
1100 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1101
1102 spin_lock_bh(&icmp6_dst_lock);
1103 rt->dst.next = icmp6_dst_gc_list;
1104 icmp6_dst_gc_list = &rt->dst;
1105 spin_unlock_bh(&icmp6_dst_lock);
1106
1107 fib6_force_start_gc(net);
1108
1109out:
1110 return &rt->dst;
1111}
1112
1113int icmp6_dst_gc(void)
1114{
1115 struct dst_entry *dst, **pprev;
1116 int more = 0;
1117
1118 spin_lock_bh(&icmp6_dst_lock);
1119 pprev = &icmp6_dst_gc_list;
1120
1121 while ((dst = *pprev) != NULL) {
1122 if (!atomic_read(&dst->__refcnt)) {
1123 *pprev = dst->next;
1124 dst_free(dst);
1125 } else {
1126 pprev = &dst->next;
1127 ++more;
1128 }
1129 }
1130
1131 spin_unlock_bh(&icmp6_dst_lock);
1132
1133 return more;
1134}
1135
1136static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1137 void *arg)
1138{
1139 struct dst_entry *dst, **pprev;
1140
1141 spin_lock_bh(&icmp6_dst_lock);
1142 pprev = &icmp6_dst_gc_list;
1143 while ((dst = *pprev) != NULL) {
1144 struct rt6_info *rt = (struct rt6_info *) dst;
1145 if (func(rt, arg)) {
1146 *pprev = dst->next;
1147 dst_free(dst);
1148 } else {
1149 pprev = &dst->next;
1150 }
1151 }
1152 spin_unlock_bh(&icmp6_dst_lock);
1153}
1154
1155static int ip6_dst_gc(struct dst_ops *ops)
1156{
1157 unsigned long now = jiffies;
1158 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1159 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1160 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1161 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1162 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1163 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1164 int entries;
1165
1166 entries = dst_entries_get_fast(ops);
1167 if (time_after(rt_last_gc + rt_min_interval, now) &&
1168 entries <= rt_max_size)
1169 goto out;
1170
1171 net->ipv6.ip6_rt_gc_expire++;
1172 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1173 net->ipv6.ip6_rt_last_gc = now;
1174 entries = dst_entries_get_slow(ops);
1175 if (entries < ops->gc_thresh)
1176 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1177out:
1178 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1179 return entries > rt_max_size;
1180}
1181
1182/* Clean host part of a prefix. Not necessary in radix tree,
1183 but results in cleaner routing tables.
1184
1185 Remove it only when all the things will work!
1186 */
1187
1188int ip6_dst_hoplimit(struct dst_entry *dst)
1189{
1190 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1191 if (hoplimit == 0) {
1192 struct net_device *dev = dst->dev;
1193 struct inet6_dev *idev;
1194
1195 rcu_read_lock();
1196 idev = __in6_dev_get(dev);
1197 if (idev)
1198 hoplimit = idev->cnf.hop_limit;
1199 else
1200 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1201 rcu_read_unlock();
1202 }
1203 return hoplimit;
1204}
1205EXPORT_SYMBOL(ip6_dst_hoplimit);
1206
1207/*
1208 *
1209 */
1210
1211int ip6_route_add(struct fib6_config *cfg)
1212{
1213 int err;
1214 struct net *net = cfg->fc_nlinfo.nl_net;
1215 struct rt6_info *rt = NULL;
1216 struct net_device *dev = NULL;
1217 struct inet6_dev *idev = NULL;
1218 struct fib6_table *table;
1219 int addr_type;
1220
1221 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1222 return -EINVAL;
1223#ifndef CONFIG_IPV6_SUBTREES
1224 if (cfg->fc_src_len)
1225 return -EINVAL;
1226#endif
1227 if (cfg->fc_ifindex) {
1228 err = -ENODEV;
1229 dev = dev_get_by_index(net, cfg->fc_ifindex);
1230 if (!dev)
1231 goto out;
1232 idev = in6_dev_get(dev);
1233 if (!idev)
1234 goto out;
1235 }
1236
1237 if (cfg->fc_metric == 0)
1238 cfg->fc_metric = IP6_RT_PRIO_USER;
1239
1240 err = -ENOBUFS;
1241 if (cfg->fc_nlinfo.nlh &&
1242 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1243 table = fib6_get_table(net, cfg->fc_table);
1244 if (!table) {
1245 printk(KERN_WARNING "IPv6: NLM_F_CREATE should be specified when creating new route\n");
1246 table = fib6_new_table(net, cfg->fc_table);
1247 }
1248 } else {
1249 table = fib6_new_table(net, cfg->fc_table);
1250 }
1251
1252 if (!table)
1253 goto out;
1254
1255 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1256
1257 if (!rt) {
1258 err = -ENOMEM;
1259 goto out;
1260 }
1261
1262 rt->dst.obsolete = -1;
1263 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1264 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1265 0;
1266
1267 if (cfg->fc_protocol == RTPROT_UNSPEC)
1268 cfg->fc_protocol = RTPROT_BOOT;
1269 rt->rt6i_protocol = cfg->fc_protocol;
1270
1271 addr_type = ipv6_addr_type(&cfg->fc_dst);
1272
1273 if (addr_type & IPV6_ADDR_MULTICAST)
1274 rt->dst.input = ip6_mc_input;
1275 else if (cfg->fc_flags & RTF_LOCAL)
1276 rt->dst.input = ip6_input;
1277 else
1278 rt->dst.input = ip6_forward;
1279
1280 rt->dst.output = ip6_output;
1281
1282 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1283 rt->rt6i_dst.plen = cfg->fc_dst_len;
1284 if (rt->rt6i_dst.plen == 128)
1285 rt->dst.flags |= DST_HOST;
1286
1287 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1288 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1289 if (!metrics) {
1290 err = -ENOMEM;
1291 goto out;
1292 }
1293 dst_init_metrics(&rt->dst, metrics, 0);
1294 }
1295#ifdef CONFIG_IPV6_SUBTREES
1296 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1297 rt->rt6i_src.plen = cfg->fc_src_len;
1298#endif
1299
1300 rt->rt6i_metric = cfg->fc_metric;
1301
1302 /* We cannot add true routes via loopback here,
1303 they would result in kernel looping; promote them to reject routes
1304 */
1305 if ((cfg->fc_flags & RTF_REJECT) ||
1306 (dev && (dev->flags & IFF_LOOPBACK) &&
1307 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1308 !(cfg->fc_flags & RTF_LOCAL))) {
1309 /* hold loopback dev/idev if we haven't done so. */
1310 if (dev != net->loopback_dev) {
1311 if (dev) {
1312 dev_put(dev);
1313 in6_dev_put(idev);
1314 }
1315 dev = net->loopback_dev;
1316 dev_hold(dev);
1317 idev = in6_dev_get(dev);
1318 if (!idev) {
1319 err = -ENODEV;
1320 goto out;
1321 }
1322 }
1323 rt->dst.output = ip6_pkt_discard_out;
1324 rt->dst.input = ip6_pkt_discard;
1325 rt->dst.error = -ENETUNREACH;
1326 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1327 goto install_route;
1328 }
1329
1330 if (cfg->fc_flags & RTF_GATEWAY) {
1331 const struct in6_addr *gw_addr;
1332 int gwa_type;
1333
1334 gw_addr = &cfg->fc_gateway;
1335 rt->rt6i_gateway = *gw_addr;
1336 gwa_type = ipv6_addr_type(gw_addr);
1337
1338 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1339 struct rt6_info *grt;
1340
1341 /* IPv6 strictly inhibits using not link-local
1342 addresses as nexthop address.
1343 Otherwise, router will not able to send redirects.
1344 It is very good, but in some (rare!) circumstances
1345 (SIT, PtP, NBMA NOARP links) it is handy to allow
1346 some exceptions. --ANK
1347 */
1348 err = -EINVAL;
1349 if (!(gwa_type & IPV6_ADDR_UNICAST))
1350 goto out;
1351
1352 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1353
1354 err = -EHOSTUNREACH;
1355 if (!grt)
1356 goto out;
1357 if (dev) {
1358 if (dev != grt->rt6i_dev) {
1359 dst_release(&grt->dst);
1360 goto out;
1361 }
1362 } else {
1363 dev = grt->rt6i_dev;
1364 idev = grt->rt6i_idev;
1365 dev_hold(dev);
1366 in6_dev_hold(grt->rt6i_idev);
1367 }
1368 if (!(grt->rt6i_flags & RTF_GATEWAY))
1369 err = 0;
1370 dst_release(&grt->dst);
1371
1372 if (err)
1373 goto out;
1374 }
1375 err = -EINVAL;
1376 if (!dev || (dev->flags & IFF_LOOPBACK))
1377 goto out;
1378 }
1379
1380 err = -ENODEV;
1381 if (!dev)
1382 goto out;
1383
1384 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1385 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1386 err = -EINVAL;
1387 goto out;
1388 }
1389 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1390 rt->rt6i_prefsrc.plen = 128;
1391 } else
1392 rt->rt6i_prefsrc.plen = 0;
1393
1394 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1395 struct neighbour *n = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1396 if (IS_ERR(n)) {
1397 err = PTR_ERR(n);
1398 goto out;
1399 }
1400 dst_set_neighbour(&rt->dst, n);
1401 }
1402
1403 rt->rt6i_flags = cfg->fc_flags;
1404
1405install_route:
1406 if (cfg->fc_mx) {
1407 struct nlattr *nla;
1408 int remaining;
1409
1410 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1411 int type = nla_type(nla);
1412
1413 if (type) {
1414 if (type > RTAX_MAX) {
1415 err = -EINVAL;
1416 goto out;
1417 }
1418
1419 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1420 }
1421 }
1422 }
1423
1424 rt->dst.dev = dev;
1425 rt->rt6i_idev = idev;
1426 rt->rt6i_table = table;
1427
1428 cfg->fc_nlinfo.nl_net = dev_net(dev);
1429
1430 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1431
1432out:
1433 if (dev)
1434 dev_put(dev);
1435 if (idev)
1436 in6_dev_put(idev);
1437 if (rt)
1438 dst_free(&rt->dst);
1439 return err;
1440}
1441
1442static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1443{
1444 int err;
1445 struct fib6_table *table;
1446 struct net *net = dev_net(rt->rt6i_dev);
1447
1448 if (rt == net->ipv6.ip6_null_entry)
1449 return -ENOENT;
1450
1451 table = rt->rt6i_table;
1452 write_lock_bh(&table->tb6_lock);
1453
1454 err = fib6_del(rt, info);
1455 dst_release(&rt->dst);
1456
1457 write_unlock_bh(&table->tb6_lock);
1458
1459 return err;
1460}
1461
1462int ip6_del_rt(struct rt6_info *rt)
1463{
1464 struct nl_info info = {
1465 .nl_net = dev_net(rt->rt6i_dev),
1466 };
1467 return __ip6_del_rt(rt, &info);
1468}
1469
1470static int ip6_route_del(struct fib6_config *cfg)
1471{
1472 struct fib6_table *table;
1473 struct fib6_node *fn;
1474 struct rt6_info *rt;
1475 int err = -ESRCH;
1476
1477 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1478 if (!table)
1479 return err;
1480
1481 read_lock_bh(&table->tb6_lock);
1482
1483 fn = fib6_locate(&table->tb6_root,
1484 &cfg->fc_dst, cfg->fc_dst_len,
1485 &cfg->fc_src, cfg->fc_src_len);
1486
1487 if (fn) {
1488 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1489 if (cfg->fc_ifindex &&
1490 (!rt->rt6i_dev ||
1491 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1492 continue;
1493 if (cfg->fc_flags & RTF_GATEWAY &&
1494 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1495 continue;
1496 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1497 continue;
1498 dst_hold(&rt->dst);
1499 read_unlock_bh(&table->tb6_lock);
1500
1501 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1502 }
1503 }
1504 read_unlock_bh(&table->tb6_lock);
1505
1506 return err;
1507}
1508
1509/*
1510 * Handle redirects
1511 */
1512struct ip6rd_flowi {
1513 struct flowi6 fl6;
1514 struct in6_addr gateway;
1515};
1516
1517static struct rt6_info *__ip6_route_redirect(struct net *net,
1518 struct fib6_table *table,
1519 struct flowi6 *fl6,
1520 int flags)
1521{
1522 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1523 struct rt6_info *rt;
1524 struct fib6_node *fn;
1525
1526 /*
1527 * Get the "current" route for this destination and
1528 * check if the redirect has come from approriate router.
1529 *
1530 * RFC 2461 specifies that redirects should only be
1531 * accepted if they come from the nexthop to the target.
1532 * Due to the way the routes are chosen, this notion
1533 * is a bit fuzzy and one might need to check all possible
1534 * routes.
1535 */
1536
1537 read_lock_bh(&table->tb6_lock);
1538 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1539restart:
1540 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1541 /*
1542 * Current route is on-link; redirect is always invalid.
1543 *
1544 * Seems, previous statement is not true. It could
1545 * be node, which looks for us as on-link (f.e. proxy ndisc)
1546 * But then router serving it might decide, that we should
1547 * know truth 8)8) --ANK (980726).
1548 */
1549 if (rt6_check_expired(rt))
1550 continue;
1551 if (!(rt->rt6i_flags & RTF_GATEWAY))
1552 continue;
1553 if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
1554 continue;
1555 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1556 continue;
1557 break;
1558 }
1559
1560 if (!rt)
1561 rt = net->ipv6.ip6_null_entry;
1562 BACKTRACK(net, &fl6->saddr);
1563out:
1564 dst_hold(&rt->dst);
1565
1566 read_unlock_bh(&table->tb6_lock);
1567
1568 return rt;
1569};
1570
1571static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1572 const struct in6_addr *src,
1573 const struct in6_addr *gateway,
1574 struct net_device *dev)
1575{
1576 int flags = RT6_LOOKUP_F_HAS_SADDR;
1577 struct net *net = dev_net(dev);
1578 struct ip6rd_flowi rdfl = {
1579 .fl6 = {
1580 .flowi6_oif = dev->ifindex,
1581 .daddr = *dest,
1582 .saddr = *src,
1583 },
1584 };
1585
1586 rdfl.gateway = *gateway;
1587
1588 if (rt6_need_strict(dest))
1589 flags |= RT6_LOOKUP_F_IFACE;
1590
1591 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1592 flags, __ip6_route_redirect);
1593}
1594
1595void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1596 const struct in6_addr *saddr,
1597 struct neighbour *neigh, u8 *lladdr, int on_link)
1598{
1599 struct rt6_info *rt, *nrt = NULL;
1600 struct netevent_redirect netevent;
1601 struct net *net = dev_net(neigh->dev);
1602
1603 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1604
1605 if (rt == net->ipv6.ip6_null_entry) {
1606 if (net_ratelimit())
1607 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1608 "for redirect target\n");
1609 goto out;
1610 }
1611
1612 /*
1613 * We have finally decided to accept it.
1614 */
1615
1616 neigh_update(neigh, lladdr, NUD_STALE,
1617 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1618 NEIGH_UPDATE_F_OVERRIDE|
1619 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1620 NEIGH_UPDATE_F_ISROUTER))
1621 );
1622
1623 /*
1624 * Redirect received -> path was valid.
1625 * Look, redirects are sent only in response to data packets,
1626 * so that this nexthop apparently is reachable. --ANK
1627 */
1628 dst_confirm(&rt->dst);
1629
1630 /* Duplicate redirect: silently ignore. */
1631 if (neigh == dst_get_neighbour_raw(&rt->dst))
1632 goto out;
1633
1634 nrt = ip6_rt_copy(rt, dest);
1635 if (!nrt)
1636 goto out;
1637
1638 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1639 if (on_link)
1640 nrt->rt6i_flags &= ~RTF_GATEWAY;
1641
1642 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1643 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1644
1645 if (ip6_ins_rt(nrt))
1646 goto out;
1647
1648 netevent.old = &rt->dst;
1649 netevent.new = &nrt->dst;
1650 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1651
1652 if (rt->rt6i_flags & RTF_CACHE) {
1653 ip6_del_rt(rt);
1654 return;
1655 }
1656
1657out:
1658 dst_release(&rt->dst);
1659}
1660
1661/*
1662 * Handle ICMP "packet too big" messages
1663 * i.e. Path MTU discovery
1664 */
1665
1666static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
1667 struct net *net, u32 pmtu, int ifindex)
1668{
1669 struct rt6_info *rt, *nrt;
1670 int allfrag = 0;
1671again:
1672 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1673 if (!rt)
1674 return;
1675
1676 if (rt6_check_expired(rt)) {
1677 ip6_del_rt(rt);
1678 goto again;
1679 }
1680
1681 if (pmtu >= dst_mtu(&rt->dst))
1682 goto out;
1683
1684 if (pmtu < IPV6_MIN_MTU) {
1685 /*
1686 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1687 * MTU (1280) and a fragment header should always be included
1688 * after a node receiving Too Big message reporting PMTU is
1689 * less than the IPv6 Minimum Link MTU.
1690 */
1691 pmtu = IPV6_MIN_MTU;
1692 allfrag = 1;
1693 }
1694
1695 /* New mtu received -> path was valid.
1696 They are sent only in response to data packets,
1697 so that this nexthop apparently is reachable. --ANK
1698 */
1699 dst_confirm(&rt->dst);
1700
1701 /* Host route. If it is static, it would be better
1702 not to override it, but add new one, so that
1703 when cache entry will expire old pmtu
1704 would return automatically.
1705 */
1706 if (rt->rt6i_flags & RTF_CACHE) {
1707 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1708 if (allfrag) {
1709 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1710 features |= RTAX_FEATURE_ALLFRAG;
1711 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1712 }
1713 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1714 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1715 goto out;
1716 }
1717
1718 /* Network route.
1719 Two cases are possible:
1720 1. It is connected route. Action: COW
1721 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1722 */
1723 if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
1724 nrt = rt6_alloc_cow(rt, daddr, saddr);
1725 else
1726 nrt = rt6_alloc_clone(rt, daddr);
1727
1728 if (nrt) {
1729 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1730 if (allfrag) {
1731 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1732 features |= RTAX_FEATURE_ALLFRAG;
1733 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1734 }
1735
1736 /* According to RFC 1981, detecting PMTU increase shouldn't be
1737 * happened within 5 mins, the recommended timer is 10 mins.
1738 * Here this route expiration time is set to ip6_rt_mtu_expires
1739 * which is 10 mins. After 10 mins the decreased pmtu is expired
1740 * and detecting PMTU increase will be automatically happened.
1741 */
1742 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1743 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1744
1745 ip6_ins_rt(nrt);
1746 }
1747out:
1748 dst_release(&rt->dst);
1749}
1750
1751void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
1752 struct net_device *dev, u32 pmtu)
1753{
1754 struct net *net = dev_net(dev);
1755
1756 /*
1757 * RFC 1981 states that a node "MUST reduce the size of the packets it
1758 * is sending along the path" that caused the Packet Too Big message.
1759 * Since it's not possible in the general case to determine which
1760 * interface was used to send the original packet, we update the MTU
1761 * on the interface that will be used to send future packets. We also
1762 * update the MTU on the interface that received the Packet Too Big in
1763 * case the original packet was forced out that interface with
1764 * SO_BINDTODEVICE or similar. This is the next best thing to the
1765 * correct behaviour, which would be to update the MTU on all
1766 * interfaces.
1767 */
1768 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1769 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1770}
1771
1772/*
1773 * Misc support functions
1774 */
1775
1776static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1777 const struct in6_addr *dest)
1778{
1779 struct net *net = dev_net(ort->rt6i_dev);
1780 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
1781 ort->dst.dev, 0);
1782
1783 if (rt) {
1784 rt->dst.input = ort->dst.input;
1785 rt->dst.output = ort->dst.output;
1786 rt->dst.flags |= DST_HOST;
1787
1788 rt->rt6i_dst.addr = *dest;
1789 rt->rt6i_dst.plen = 128;
1790 dst_copy_metrics(&rt->dst, &ort->dst);
1791 rt->dst.error = ort->dst.error;
1792 rt->rt6i_idev = ort->rt6i_idev;
1793 if (rt->rt6i_idev)
1794 in6_dev_hold(rt->rt6i_idev);
1795 rt->dst.lastuse = jiffies;
1796 rt->rt6i_expires = 0;
1797
1798 rt->rt6i_gateway = ort->rt6i_gateway;
1799 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1800 rt->rt6i_metric = 0;
1801
1802#ifdef CONFIG_IPV6_SUBTREES
1803 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1804#endif
1805 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1806 rt->rt6i_table = ort->rt6i_table;
1807 }
1808 return rt;
1809}
1810
1811#ifdef CONFIG_IPV6_ROUTE_INFO
1812static struct rt6_info *rt6_get_route_info(struct net *net,
1813 const struct in6_addr *prefix, int prefixlen,
1814 const struct in6_addr *gwaddr, int ifindex)
1815{
1816 struct fib6_node *fn;
1817 struct rt6_info *rt = NULL;
1818 struct fib6_table *table;
1819
1820 table = fib6_get_table(net, RT6_TABLE_INFO);
1821 if (!table)
1822 return NULL;
1823
1824 write_lock_bh(&table->tb6_lock);
1825 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1826 if (!fn)
1827 goto out;
1828
1829 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1830 if (rt->rt6i_dev->ifindex != ifindex)
1831 continue;
1832 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1833 continue;
1834 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1835 continue;
1836 dst_hold(&rt->dst);
1837 break;
1838 }
1839out:
1840 write_unlock_bh(&table->tb6_lock);
1841 return rt;
1842}
1843
1844static struct rt6_info *rt6_add_route_info(struct net *net,
1845 const struct in6_addr *prefix, int prefixlen,
1846 const struct in6_addr *gwaddr, int ifindex,
1847 unsigned pref)
1848{
1849 struct fib6_config cfg = {
1850 .fc_table = RT6_TABLE_INFO,
1851 .fc_metric = IP6_RT_PRIO_USER,
1852 .fc_ifindex = ifindex,
1853 .fc_dst_len = prefixlen,
1854 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1855 RTF_UP | RTF_PREF(pref),
1856 .fc_nlinfo.pid = 0,
1857 .fc_nlinfo.nlh = NULL,
1858 .fc_nlinfo.nl_net = net,
1859 };
1860
1861 cfg.fc_dst = *prefix;
1862 cfg.fc_gateway = *gwaddr;
1863
1864 /* We should treat it as a default route if prefix length is 0. */
1865 if (!prefixlen)
1866 cfg.fc_flags |= RTF_DEFAULT;
1867
1868 ip6_route_add(&cfg);
1869
1870 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1871}
1872#endif
1873
1874struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1875{
1876 struct rt6_info *rt;
1877 struct fib6_table *table;
1878
1879 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1880 if (!table)
1881 return NULL;
1882
1883 write_lock_bh(&table->tb6_lock);
1884 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1885 if (dev == rt->rt6i_dev &&
1886 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1887 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1888 break;
1889 }
1890 if (rt)
1891 dst_hold(&rt->dst);
1892 write_unlock_bh(&table->tb6_lock);
1893 return rt;
1894}
1895
1896struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1897 struct net_device *dev,
1898 unsigned int pref)
1899{
1900 struct fib6_config cfg = {
1901 .fc_table = RT6_TABLE_DFLT,
1902 .fc_metric = IP6_RT_PRIO_USER,
1903 .fc_ifindex = dev->ifindex,
1904 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1905 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1906 .fc_nlinfo.pid = 0,
1907 .fc_nlinfo.nlh = NULL,
1908 .fc_nlinfo.nl_net = dev_net(dev),
1909 };
1910
1911 cfg.fc_gateway = *gwaddr;
1912
1913 ip6_route_add(&cfg);
1914
1915 return rt6_get_dflt_router(gwaddr, dev);
1916}
1917
1918void rt6_purge_dflt_routers(struct net *net)
1919{
1920 struct rt6_info *rt;
1921 struct fib6_table *table;
1922
1923 /* NOTE: Keep consistent with rt6_get_dflt_router */
1924 table = fib6_get_table(net, RT6_TABLE_DFLT);
1925 if (!table)
1926 return;
1927
1928restart:
1929 read_lock_bh(&table->tb6_lock);
1930 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1931 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1932 dst_hold(&rt->dst);
1933 read_unlock_bh(&table->tb6_lock);
1934 ip6_del_rt(rt);
1935 goto restart;
1936 }
1937 }
1938 read_unlock_bh(&table->tb6_lock);
1939}
1940
1941static void rtmsg_to_fib6_config(struct net *net,
1942 struct in6_rtmsg *rtmsg,
1943 struct fib6_config *cfg)
1944{
1945 memset(cfg, 0, sizeof(*cfg));
1946
1947 cfg->fc_table = RT6_TABLE_MAIN;
1948 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1949 cfg->fc_metric = rtmsg->rtmsg_metric;
1950 cfg->fc_expires = rtmsg->rtmsg_info;
1951 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1952 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1953 cfg->fc_flags = rtmsg->rtmsg_flags;
1954
1955 cfg->fc_nlinfo.nl_net = net;
1956
1957 cfg->fc_dst = rtmsg->rtmsg_dst;
1958 cfg->fc_src = rtmsg->rtmsg_src;
1959 cfg->fc_gateway = rtmsg->rtmsg_gateway;
1960}
1961
1962int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1963{
1964 struct fib6_config cfg;
1965 struct in6_rtmsg rtmsg;
1966 int err;
1967
1968 switch(cmd) {
1969 case SIOCADDRT: /* Add a route */
1970 case SIOCDELRT: /* Delete a route */
1971 if (!capable(CAP_NET_ADMIN))
1972 return -EPERM;
1973 err = copy_from_user(&rtmsg, arg,
1974 sizeof(struct in6_rtmsg));
1975 if (err)
1976 return -EFAULT;
1977
1978 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1979
1980 rtnl_lock();
1981 switch (cmd) {
1982 case SIOCADDRT:
1983 err = ip6_route_add(&cfg);
1984 break;
1985 case SIOCDELRT:
1986 err = ip6_route_del(&cfg);
1987 break;
1988 default:
1989 err = -EINVAL;
1990 }
1991 rtnl_unlock();
1992
1993 return err;
1994 }
1995
1996 return -EINVAL;
1997}
1998
1999/*
2000 * Drop the packet on the floor
2001 */
2002
2003static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2004{
2005 int type;
2006 struct dst_entry *dst = skb_dst(skb);
2007 switch (ipstats_mib_noroutes) {
2008 case IPSTATS_MIB_INNOROUTES:
2009 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2010 if (type == IPV6_ADDR_ANY) {
2011 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2012 IPSTATS_MIB_INADDRERRORS);
2013 break;
2014 }
2015 /* FALLTHROUGH */
2016 case IPSTATS_MIB_OUTNOROUTES:
2017 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2018 ipstats_mib_noroutes);
2019 break;
2020 }
2021 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2022 kfree_skb(skb);
2023 return 0;
2024}
2025
2026static int ip6_pkt_discard(struct sk_buff *skb)
2027{
2028 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2029}
2030
2031static int ip6_pkt_discard_out(struct sk_buff *skb)
2032{
2033 skb->dev = skb_dst(skb)->dev;
2034 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2035}
2036
2037#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2038
2039static int ip6_pkt_prohibit(struct sk_buff *skb)
2040{
2041 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2042}
2043
2044static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2045{
2046 skb->dev = skb_dst(skb)->dev;
2047 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2048}
2049
2050#endif
2051
2052/*
2053 * Allocate a dst for local (unicast / anycast) address.
2054 */
2055
2056struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2057 const struct in6_addr *addr,
2058 int anycast)
2059{
2060 struct net *net = dev_net(idev->dev);
2061 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
2062 net->loopback_dev, 0);
2063 struct neighbour *neigh;
2064
2065 if (!rt) {
2066 if (net_ratelimit())
2067 pr_warning("IPv6: Maximum number of routes reached,"
2068 " consider increasing route/max_size.\n");
2069 return ERR_PTR(-ENOMEM);
2070 }
2071
2072 in6_dev_hold(idev);
2073
2074 rt->dst.flags |= DST_HOST;
2075 rt->dst.input = ip6_input;
2076 rt->dst.output = ip6_output;
2077 rt->rt6i_idev = idev;
2078 rt->dst.obsolete = -1;
2079
2080 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2081 if (anycast)
2082 rt->rt6i_flags |= RTF_ANYCAST;
2083 else
2084 rt->rt6i_flags |= RTF_LOCAL;
2085 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2086 if (IS_ERR(neigh)) {
2087 dst_free(&rt->dst);
2088
2089 return ERR_CAST(neigh);
2090 }
2091 dst_set_neighbour(&rt->dst, neigh);
2092
2093 rt->rt6i_dst.addr = *addr;
2094 rt->rt6i_dst.plen = 128;
2095 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2096
2097 atomic_set(&rt->dst.__refcnt, 1);
2098
2099 return rt;
2100}
2101
2102int ip6_route_get_saddr(struct net *net,
2103 struct rt6_info *rt,
2104 const struct in6_addr *daddr,
2105 unsigned int prefs,
2106 struct in6_addr *saddr)
2107{
2108 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2109 int err = 0;
2110 if (rt->rt6i_prefsrc.plen)
2111 *saddr = rt->rt6i_prefsrc.addr;
2112 else
2113 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2114 daddr, prefs, saddr);
2115 return err;
2116}
2117
2118/* remove deleted ip from prefsrc entries */
2119struct arg_dev_net_ip {
2120 struct net_device *dev;
2121 struct net *net;
2122 struct in6_addr *addr;
2123};
2124
2125static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2126{
2127 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2128 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2129 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2130
2131 if (((void *)rt->rt6i_dev == dev || !dev) &&
2132 rt != net->ipv6.ip6_null_entry &&
2133 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2134 /* remove prefsrc entry */
2135 rt->rt6i_prefsrc.plen = 0;
2136 }
2137 return 0;
2138}
2139
2140void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2141{
2142 struct net *net = dev_net(ifp->idev->dev);
2143 struct arg_dev_net_ip adni = {
2144 .dev = ifp->idev->dev,
2145 .net = net,
2146 .addr = &ifp->addr,
2147 };
2148 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2149}
2150
2151struct arg_dev_net {
2152 struct net_device *dev;
2153 struct net *net;
2154};
2155
2156static int fib6_ifdown(struct rt6_info *rt, void *arg)
2157{
2158 const struct arg_dev_net *adn = arg;
2159 const struct net_device *dev = adn->dev;
2160
2161 if ((rt->rt6i_dev == dev || !dev) &&
2162 rt != adn->net->ipv6.ip6_null_entry) {
2163 RT6_TRACE("deleted by ifdown %p\n", rt);
2164 return -1;
2165 }
2166 return 0;
2167}
2168
2169void rt6_ifdown(struct net *net, struct net_device *dev)
2170{
2171 struct arg_dev_net adn = {
2172 .dev = dev,
2173 .net = net,
2174 };
2175
2176 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2177 icmp6_clean_all(fib6_ifdown, &adn);
2178}
2179
2180struct rt6_mtu_change_arg
2181{
2182 struct net_device *dev;
2183 unsigned mtu;
2184};
2185
2186static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2187{
2188 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2189 struct inet6_dev *idev;
2190
2191 /* In IPv6 pmtu discovery is not optional,
2192 so that RTAX_MTU lock cannot disable it.
2193 We still use this lock to block changes
2194 caused by addrconf/ndisc.
2195 */
2196
2197 idev = __in6_dev_get(arg->dev);
2198 if (!idev)
2199 return 0;
2200
2201 /* For administrative MTU increase, there is no way to discover
2202 IPv6 PMTU increase, so PMTU increase should be updated here.
2203 Since RFC 1981 doesn't include administrative MTU increase
2204 update PMTU increase is a MUST. (i.e. jumbo frame)
2205 */
2206 /*
2207 If new MTU is less than route PMTU, this new MTU will be the
2208 lowest MTU in the path, update the route PMTU to reflect PMTU
2209 decreases; if new MTU is greater than route PMTU, and the
2210 old MTU is the lowest MTU in the path, update the route PMTU
2211 to reflect the increase. In this case if the other nodes' MTU
2212 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2213 PMTU discouvery.
2214 */
2215 if (rt->rt6i_dev == arg->dev &&
2216 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2217 (dst_mtu(&rt->dst) >= arg->mtu ||
2218 (dst_mtu(&rt->dst) < arg->mtu &&
2219 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2220 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2221 }
2222 return 0;
2223}
2224
2225void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2226{
2227 struct rt6_mtu_change_arg arg = {
2228 .dev = dev,
2229 .mtu = mtu,
2230 };
2231
2232 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2233}
2234
2235static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2236 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2237 [RTA_OIF] = { .type = NLA_U32 },
2238 [RTA_IIF] = { .type = NLA_U32 },
2239 [RTA_PRIORITY] = { .type = NLA_U32 },
2240 [RTA_METRICS] = { .type = NLA_NESTED },
2241};
2242
2243static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2244 struct fib6_config *cfg)
2245{
2246 struct rtmsg *rtm;
2247 struct nlattr *tb[RTA_MAX+1];
2248 int err;
2249
2250 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2251 if (err < 0)
2252 goto errout;
2253
2254 err = -EINVAL;
2255 rtm = nlmsg_data(nlh);
2256 memset(cfg, 0, sizeof(*cfg));
2257
2258 cfg->fc_table = rtm->rtm_table;
2259 cfg->fc_dst_len = rtm->rtm_dst_len;
2260 cfg->fc_src_len = rtm->rtm_src_len;
2261 cfg->fc_flags = RTF_UP;
2262 cfg->fc_protocol = rtm->rtm_protocol;
2263
2264 if (rtm->rtm_type == RTN_UNREACHABLE)
2265 cfg->fc_flags |= RTF_REJECT;
2266
2267 if (rtm->rtm_type == RTN_LOCAL)
2268 cfg->fc_flags |= RTF_LOCAL;
2269
2270 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2271 cfg->fc_nlinfo.nlh = nlh;
2272 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2273
2274 if (tb[RTA_GATEWAY]) {
2275 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2276 cfg->fc_flags |= RTF_GATEWAY;
2277 }
2278
2279 if (tb[RTA_DST]) {
2280 int plen = (rtm->rtm_dst_len + 7) >> 3;
2281
2282 if (nla_len(tb[RTA_DST]) < plen)
2283 goto errout;
2284
2285 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2286 }
2287
2288 if (tb[RTA_SRC]) {
2289 int plen = (rtm->rtm_src_len + 7) >> 3;
2290
2291 if (nla_len(tb[RTA_SRC]) < plen)
2292 goto errout;
2293
2294 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2295 }
2296
2297 if (tb[RTA_PREFSRC])
2298 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2299
2300 if (tb[RTA_OIF])
2301 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2302
2303 if (tb[RTA_PRIORITY])
2304 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2305
2306 if (tb[RTA_METRICS]) {
2307 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2308 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2309 }
2310
2311 if (tb[RTA_TABLE])
2312 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2313
2314 err = 0;
2315errout:
2316 return err;
2317}
2318
2319static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2320{
2321 struct fib6_config cfg;
2322 int err;
2323
2324 err = rtm_to_fib6_config(skb, nlh, &cfg);
2325 if (err < 0)
2326 return err;
2327
2328 return ip6_route_del(&cfg);
2329}
2330
2331static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2332{
2333 struct fib6_config cfg;
2334 int err;
2335
2336 err = rtm_to_fib6_config(skb, nlh, &cfg);
2337 if (err < 0)
2338 return err;
2339
2340 return ip6_route_add(&cfg);
2341}
2342
2343static inline size_t rt6_nlmsg_size(void)
2344{
2345 return NLMSG_ALIGN(sizeof(struct rtmsg))
2346 + nla_total_size(16) /* RTA_SRC */
2347 + nla_total_size(16) /* RTA_DST */
2348 + nla_total_size(16) /* RTA_GATEWAY */
2349 + nla_total_size(16) /* RTA_PREFSRC */
2350 + nla_total_size(4) /* RTA_TABLE */
2351 + nla_total_size(4) /* RTA_IIF */
2352 + nla_total_size(4) /* RTA_OIF */
2353 + nla_total_size(4) /* RTA_PRIORITY */
2354 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2355 + nla_total_size(sizeof(struct rta_cacheinfo));
2356}
2357
2358static int rt6_fill_node(struct net *net,
2359 struct sk_buff *skb, struct rt6_info *rt,
2360 struct in6_addr *dst, struct in6_addr *src,
2361 int iif, int type, u32 pid, u32 seq,
2362 int prefix, int nowait, unsigned int flags)
2363{
2364 struct rtmsg *rtm;
2365 struct nlmsghdr *nlh;
2366 long expires;
2367 u32 table;
2368 struct neighbour *n;
2369
2370 if (prefix) { /* user wants prefix routes only */
2371 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2372 /* success since this is not a prefix route */
2373 return 1;
2374 }
2375 }
2376
2377 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2378 if (!nlh)
2379 return -EMSGSIZE;
2380
2381 rtm = nlmsg_data(nlh);
2382 rtm->rtm_family = AF_INET6;
2383 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2384 rtm->rtm_src_len = rt->rt6i_src.plen;
2385 rtm->rtm_tos = 0;
2386 if (rt->rt6i_table)
2387 table = rt->rt6i_table->tb6_id;
2388 else
2389 table = RT6_TABLE_UNSPEC;
2390 rtm->rtm_table = table;
2391 NLA_PUT_U32(skb, RTA_TABLE, table);
2392 if (rt->rt6i_flags & RTF_REJECT)
2393 rtm->rtm_type = RTN_UNREACHABLE;
2394 else if (rt->rt6i_flags & RTF_LOCAL)
2395 rtm->rtm_type = RTN_LOCAL;
2396 else if (rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
2397 rtm->rtm_type = RTN_LOCAL;
2398 else
2399 rtm->rtm_type = RTN_UNICAST;
2400 rtm->rtm_flags = 0;
2401 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2402 rtm->rtm_protocol = rt->rt6i_protocol;
2403 if (rt->rt6i_flags & RTF_DYNAMIC)
2404 rtm->rtm_protocol = RTPROT_REDIRECT;
2405 else if (rt->rt6i_flags & RTF_ADDRCONF)
2406 rtm->rtm_protocol = RTPROT_KERNEL;
2407 else if (rt->rt6i_flags & RTF_DEFAULT)
2408 rtm->rtm_protocol = RTPROT_RA;
2409
2410 if (rt->rt6i_flags & RTF_CACHE)
2411 rtm->rtm_flags |= RTM_F_CLONED;
2412
2413 if (dst) {
2414 NLA_PUT(skb, RTA_DST, 16, dst);
2415 rtm->rtm_dst_len = 128;
2416 } else if (rtm->rtm_dst_len)
2417 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2418#ifdef CONFIG_IPV6_SUBTREES
2419 if (src) {
2420 NLA_PUT(skb, RTA_SRC, 16, src);
2421 rtm->rtm_src_len = 128;
2422 } else if (rtm->rtm_src_len)
2423 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2424#endif
2425 if (iif) {
2426#ifdef CONFIG_IPV6_MROUTE
2427 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2428 int err = ip6mr_get_route(net, skb, rtm, nowait);
2429 if (err <= 0) {
2430 if (!nowait) {
2431 if (err == 0)
2432 return 0;
2433 goto nla_put_failure;
2434 } else {
2435 if (err == -EMSGSIZE)
2436 goto nla_put_failure;
2437 }
2438 }
2439 } else
2440#endif
2441 NLA_PUT_U32(skb, RTA_IIF, iif);
2442 } else if (dst) {
2443 struct in6_addr saddr_buf;
2444 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2445 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2446 }
2447
2448 if (rt->rt6i_prefsrc.plen) {
2449 struct in6_addr saddr_buf;
2450 saddr_buf = rt->rt6i_prefsrc.addr;
2451 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2452 }
2453
2454 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2455 goto nla_put_failure;
2456
2457 rcu_read_lock();
2458 n = dst_get_neighbour(&rt->dst);
2459 if (n)
2460 NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
2461 rcu_read_unlock();
2462
2463 if (rt->dst.dev)
2464 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2465
2466 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2467
2468 if (!(rt->rt6i_flags & RTF_EXPIRES))
2469 expires = 0;
2470 else if (rt->rt6i_expires - jiffies < INT_MAX)
2471 expires = rt->rt6i_expires - jiffies;
2472 else
2473 expires = INT_MAX;
2474
2475 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2476 expires, rt->dst.error) < 0)
2477 goto nla_put_failure;
2478
2479 return nlmsg_end(skb, nlh);
2480
2481nla_put_failure:
2482 nlmsg_cancel(skb, nlh);
2483 return -EMSGSIZE;
2484}
2485
2486int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2487{
2488 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2489 int prefix;
2490
2491 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2492 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2493 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2494 } else
2495 prefix = 0;
2496
2497 return rt6_fill_node(arg->net,
2498 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2499 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2500 prefix, 0, NLM_F_MULTI);
2501}
2502
2503static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2504{
2505 struct net *net = sock_net(in_skb->sk);
2506 struct nlattr *tb[RTA_MAX+1];
2507 struct rt6_info *rt;
2508 struct sk_buff *skb;
2509 struct rtmsg *rtm;
2510 struct flowi6 fl6;
2511 int err, iif = 0;
2512
2513 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2514 if (err < 0)
2515 goto errout;
2516
2517 err = -EINVAL;
2518 memset(&fl6, 0, sizeof(fl6));
2519
2520 if (tb[RTA_SRC]) {
2521 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2522 goto errout;
2523
2524 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2525 }
2526
2527 if (tb[RTA_DST]) {
2528 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2529 goto errout;
2530
2531 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2532 }
2533
2534 if (tb[RTA_IIF])
2535 iif = nla_get_u32(tb[RTA_IIF]);
2536
2537 if (tb[RTA_OIF])
2538 fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
2539
2540 if (iif) {
2541 struct net_device *dev;
2542 dev = __dev_get_by_index(net, iif);
2543 if (!dev) {
2544 err = -ENODEV;
2545 goto errout;
2546 }
2547 }
2548
2549 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2550 if (!skb) {
2551 err = -ENOBUFS;
2552 goto errout;
2553 }
2554
2555 /* Reserve room for dummy headers, this skb can pass
2556 through good chunk of routing engine.
2557 */
2558 skb_reset_mac_header(skb);
2559 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2560
2561 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
2562 skb_dst_set(skb, &rt->dst);
2563
2564 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2565 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2566 nlh->nlmsg_seq, 0, 0, 0);
2567 if (err < 0) {
2568 kfree_skb(skb);
2569 goto errout;
2570 }
2571
2572 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2573errout:
2574 return err;
2575}
2576
2577void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2578{
2579 struct sk_buff *skb;
2580 struct net *net = info->nl_net;
2581 u32 seq;
2582 int err;
2583
2584 err = -ENOBUFS;
2585 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2586
2587 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2588 if (!skb)
2589 goto errout;
2590
2591 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2592 event, info->pid, seq, 0, 0, 0);
2593 if (err < 0) {
2594 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2595 WARN_ON(err == -EMSGSIZE);
2596 kfree_skb(skb);
2597 goto errout;
2598 }
2599 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2600 info->nlh, gfp_any());
2601 return;
2602errout:
2603 if (err < 0)
2604 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2605}
2606
2607static int ip6_route_dev_notify(struct notifier_block *this,
2608 unsigned long event, void *data)
2609{
2610 struct net_device *dev = (struct net_device *)data;
2611 struct net *net = dev_net(dev);
2612
2613 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2614 net->ipv6.ip6_null_entry->dst.dev = dev;
2615 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2616#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2617 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2618 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2619 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2620 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2621#endif
2622 }
2623
2624 return NOTIFY_OK;
2625}
2626
2627/*
2628 * /proc
2629 */
2630
2631#ifdef CONFIG_PROC_FS
2632
2633struct rt6_proc_arg
2634{
2635 char *buffer;
2636 int offset;
2637 int length;
2638 int skip;
2639 int len;
2640};
2641
2642static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2643{
2644 struct seq_file *m = p_arg;
2645 struct neighbour *n;
2646
2647 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2648
2649#ifdef CONFIG_IPV6_SUBTREES
2650 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2651#else
2652 seq_puts(m, "00000000000000000000000000000000 00 ");
2653#endif
2654 rcu_read_lock();
2655 n = dst_get_neighbour(&rt->dst);
2656 if (n) {
2657 seq_printf(m, "%pi6", n->primary_key);
2658 } else {
2659 seq_puts(m, "00000000000000000000000000000000");
2660 }
2661 rcu_read_unlock();
2662 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2663 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2664 rt->dst.__use, rt->rt6i_flags,
2665 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2666 return 0;
2667}
2668
2669static int ipv6_route_show(struct seq_file *m, void *v)
2670{
2671 struct net *net = (struct net *)m->private;
2672 fib6_clean_all(net, rt6_info_route, 0, m);
2673 return 0;
2674}
2675
2676static int ipv6_route_open(struct inode *inode, struct file *file)
2677{
2678 return single_open_net(inode, file, ipv6_route_show);
2679}
2680
2681static const struct file_operations ipv6_route_proc_fops = {
2682 .owner = THIS_MODULE,
2683 .open = ipv6_route_open,
2684 .read = seq_read,
2685 .llseek = seq_lseek,
2686 .release = single_release_net,
2687};
2688
2689static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2690{
2691 struct net *net = (struct net *)seq->private;
2692 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2693 net->ipv6.rt6_stats->fib_nodes,
2694 net->ipv6.rt6_stats->fib_route_nodes,
2695 net->ipv6.rt6_stats->fib_rt_alloc,
2696 net->ipv6.rt6_stats->fib_rt_entries,
2697 net->ipv6.rt6_stats->fib_rt_cache,
2698 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2699 net->ipv6.rt6_stats->fib_discarded_routes);
2700
2701 return 0;
2702}
2703
2704static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2705{
2706 return single_open_net(inode, file, rt6_stats_seq_show);
2707}
2708
2709static const struct file_operations rt6_stats_seq_fops = {
2710 .owner = THIS_MODULE,
2711 .open = rt6_stats_seq_open,
2712 .read = seq_read,
2713 .llseek = seq_lseek,
2714 .release = single_release_net,
2715};
2716#endif /* CONFIG_PROC_FS */
2717
2718#ifdef CONFIG_SYSCTL
2719
2720static
2721int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2722 void __user *buffer, size_t *lenp, loff_t *ppos)
2723{
2724 struct net *net;
2725 int delay;
2726 if (!write)
2727 return -EINVAL;
2728
2729 net = (struct net *)ctl->extra1;
2730 delay = net->ipv6.sysctl.flush_delay;
2731 proc_dointvec(ctl, write, buffer, lenp, ppos);
2732 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2733 return 0;
2734}
2735
2736ctl_table ipv6_route_table_template[] = {
2737 {
2738 .procname = "flush",
2739 .data = &init_net.ipv6.sysctl.flush_delay,
2740 .maxlen = sizeof(int),
2741 .mode = 0200,
2742 .proc_handler = ipv6_sysctl_rtcache_flush
2743 },
2744 {
2745 .procname = "gc_thresh",
2746 .data = &ip6_dst_ops_template.gc_thresh,
2747 .maxlen = sizeof(int),
2748 .mode = 0644,
2749 .proc_handler = proc_dointvec,
2750 },
2751 {
2752 .procname = "max_size",
2753 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2754 .maxlen = sizeof(int),
2755 .mode = 0644,
2756 .proc_handler = proc_dointvec,
2757 },
2758 {
2759 .procname = "gc_min_interval",
2760 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2761 .maxlen = sizeof(int),
2762 .mode = 0644,
2763 .proc_handler = proc_dointvec_jiffies,
2764 },
2765 {
2766 .procname = "gc_timeout",
2767 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2768 .maxlen = sizeof(int),
2769 .mode = 0644,
2770 .proc_handler = proc_dointvec_jiffies,
2771 },
2772 {
2773 .procname = "gc_interval",
2774 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2775 .maxlen = sizeof(int),
2776 .mode = 0644,
2777 .proc_handler = proc_dointvec_jiffies,
2778 },
2779 {
2780 .procname = "gc_elasticity",
2781 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2782 .maxlen = sizeof(int),
2783 .mode = 0644,
2784 .proc_handler = proc_dointvec,
2785 },
2786 {
2787 .procname = "mtu_expires",
2788 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2789 .maxlen = sizeof(int),
2790 .mode = 0644,
2791 .proc_handler = proc_dointvec_jiffies,
2792 },
2793 {
2794 .procname = "min_adv_mss",
2795 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2796 .maxlen = sizeof(int),
2797 .mode = 0644,
2798 .proc_handler = proc_dointvec,
2799 },
2800 {
2801 .procname = "gc_min_interval_ms",
2802 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2803 .maxlen = sizeof(int),
2804 .mode = 0644,
2805 .proc_handler = proc_dointvec_ms_jiffies,
2806 },
2807 { }
2808};
2809
2810struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2811{
2812 struct ctl_table *table;
2813
2814 table = kmemdup(ipv6_route_table_template,
2815 sizeof(ipv6_route_table_template),
2816 GFP_KERNEL);
2817
2818 if (table) {
2819 table[0].data = &net->ipv6.sysctl.flush_delay;
2820 table[0].extra1 = net;
2821 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2822 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2823 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2824 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2825 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2826 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2827 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2828 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2829 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2830 }
2831
2832 return table;
2833}
2834#endif
2835
2836static int __net_init ip6_route_net_init(struct net *net)
2837{
2838 int ret = -ENOMEM;
2839
2840 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2841 sizeof(net->ipv6.ip6_dst_ops));
2842
2843 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2844 goto out_ip6_dst_ops;
2845
2846 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2847 sizeof(*net->ipv6.ip6_null_entry),
2848 GFP_KERNEL);
2849 if (!net->ipv6.ip6_null_entry)
2850 goto out_ip6_dst_entries;
2851 net->ipv6.ip6_null_entry->dst.path =
2852 (struct dst_entry *)net->ipv6.ip6_null_entry;
2853 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2854 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2855 ip6_template_metrics, true);
2856
2857#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2858 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2859 sizeof(*net->ipv6.ip6_prohibit_entry),
2860 GFP_KERNEL);
2861 if (!net->ipv6.ip6_prohibit_entry)
2862 goto out_ip6_null_entry;
2863 net->ipv6.ip6_prohibit_entry->dst.path =
2864 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2865 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2866 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2867 ip6_template_metrics, true);
2868
2869 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2870 sizeof(*net->ipv6.ip6_blk_hole_entry),
2871 GFP_KERNEL);
2872 if (!net->ipv6.ip6_blk_hole_entry)
2873 goto out_ip6_prohibit_entry;
2874 net->ipv6.ip6_blk_hole_entry->dst.path =
2875 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2876 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2877 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2878 ip6_template_metrics, true);
2879#endif
2880
2881 net->ipv6.sysctl.flush_delay = 0;
2882 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2883 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2884 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2885 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2886 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2887 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2888 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2889
2890#ifdef CONFIG_PROC_FS
2891 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2892 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2893#endif
2894 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2895
2896 ret = 0;
2897out:
2898 return ret;
2899
2900#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2901out_ip6_prohibit_entry:
2902 kfree(net->ipv6.ip6_prohibit_entry);
2903out_ip6_null_entry:
2904 kfree(net->ipv6.ip6_null_entry);
2905#endif
2906out_ip6_dst_entries:
2907 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2908out_ip6_dst_ops:
2909 goto out;
2910}
2911
2912static void __net_exit ip6_route_net_exit(struct net *net)
2913{
2914#ifdef CONFIG_PROC_FS
2915 proc_net_remove(net, "ipv6_route");
2916 proc_net_remove(net, "rt6_stats");
2917#endif
2918 kfree(net->ipv6.ip6_null_entry);
2919#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2920 kfree(net->ipv6.ip6_prohibit_entry);
2921 kfree(net->ipv6.ip6_blk_hole_entry);
2922#endif
2923 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2924}
2925
2926static struct pernet_operations ip6_route_net_ops = {
2927 .init = ip6_route_net_init,
2928 .exit = ip6_route_net_exit,
2929};
2930
2931static struct notifier_block ip6_route_dev_notifier = {
2932 .notifier_call = ip6_route_dev_notify,
2933 .priority = 0,
2934};
2935
2936int __init ip6_route_init(void)
2937{
2938 int ret;
2939
2940 ret = -ENOMEM;
2941 ip6_dst_ops_template.kmem_cachep =
2942 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2943 SLAB_HWCACHE_ALIGN, NULL);
2944 if (!ip6_dst_ops_template.kmem_cachep)
2945 goto out;
2946
2947 ret = dst_entries_init(&ip6_dst_blackhole_ops);
2948 if (ret)
2949 goto out_kmem_cache;
2950
2951 ret = register_pernet_subsys(&ip6_route_net_ops);
2952 if (ret)
2953 goto out_dst_entries;
2954
2955 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2956
2957 /* Registering of the loopback is done before this portion of code,
2958 * the loopback reference in rt6_info will not be taken, do it
2959 * manually for init_net */
2960 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
2961 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2962 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2963 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
2964 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2965 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
2966 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2967 #endif
2968 ret = fib6_init();
2969 if (ret)
2970 goto out_register_subsys;
2971
2972 ret = xfrm6_init();
2973 if (ret)
2974 goto out_fib6_init;
2975
2976 ret = fib6_rules_init();
2977 if (ret)
2978 goto xfrm6_init;
2979
2980 ret = -ENOBUFS;
2981 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
2982 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
2983 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
2984 goto fib6_rules_init;
2985
2986 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2987 if (ret)
2988 goto fib6_rules_init;
2989
2990out:
2991 return ret;
2992
2993fib6_rules_init:
2994 fib6_rules_cleanup();
2995xfrm6_init:
2996 xfrm6_fini();
2997out_fib6_init:
2998 fib6_gc_cleanup();
2999out_register_subsys:
3000 unregister_pernet_subsys(&ip6_route_net_ops);
3001out_dst_entries:
3002 dst_entries_destroy(&ip6_dst_blackhole_ops);
3003out_kmem_cache:
3004 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3005 goto out;
3006}
3007
3008void ip6_route_cleanup(void)
3009{
3010 unregister_netdevice_notifier(&ip6_route_dev_notifier);
3011 fib6_rules_cleanup();
3012 xfrm6_fini();
3013 fib6_gc_cleanup();
3014 unregister_pernet_subsys(&ip6_route_net_ops);
3015 dst_entries_destroy(&ip6_dst_blackhole_ops);
3016 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3017}
This page took 0.044072 seconds and 5 git commands to generate.