ipv6: Various cleanups in route.c
[deliverable/linux.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4 28#include <linux/errno.h>
bc3b2d7f 29#include <linux/export.h>
1da177e4
LT
30#include <linux/types.h>
31#include <linux/times.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/route.h>
36#include <linux/netdevice.h>
37#include <linux/in6.h>
7bc570c8 38#include <linux/mroute6.h>
1da177e4 39#include <linux/init.h>
1da177e4 40#include <linux/if_arp.h>
1da177e4
LT
41#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
5b7c931d 43#include <linux/nsproxy.h>
5a0e3ad6 44#include <linux/slab.h>
457c4cbc 45#include <net/net_namespace.h>
1da177e4
LT
46#include <net/snmp.h>
47#include <net/ipv6.h>
48#include <net/ip6_fib.h>
49#include <net/ip6_route.h>
50#include <net/ndisc.h>
51#include <net/addrconf.h>
52#include <net/tcp.h>
53#include <linux/rtnetlink.h>
54#include <net/dst.h>
55#include <net/xfrm.h>
8d71740c 56#include <net/netevent.h>
21713ebc 57#include <net/netlink.h>
1da177e4
LT
58
59#include <asm/uaccess.h>
60
61#ifdef CONFIG_SYSCTL
62#include <linux/sysctl.h>
63#endif
64
65/* Set to 3 to get tracing. */
66#define RT6_DEBUG 2
67
68#if RT6_DEBUG >= 3
69#define RDBG(x) printk x
70#define RT6_TRACE(x...) printk(KERN_DEBUG x)
71#else
72#define RDBG(x)
73#define RT6_TRACE(x...) do { ; } while (0)
74#endif
75
21efcfa0
ED
76static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
77 const struct in6_addr *dest);
1da177e4 78static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 79static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 80static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
81static struct dst_entry *ip6_negative_advice(struct dst_entry *);
82static void ip6_dst_destroy(struct dst_entry *);
83static void ip6_dst_ifdown(struct dst_entry *,
84 struct net_device *dev, int how);
569d3645 85static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
86
87static int ip6_pkt_discard(struct sk_buff *skb);
88static int ip6_pkt_discard_out(struct sk_buff *skb);
89static void ip6_link_failure(struct sk_buff *skb);
90static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
91
70ceb4f5 92#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 93static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
94 const struct in6_addr *prefix, int prefixlen,
95 const struct in6_addr *gwaddr, int ifindex,
70ceb4f5 96 unsigned pref);
efa2cea0 97static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
98 const struct in6_addr *prefix, int prefixlen,
99 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
100#endif
101
06582540
DM
102static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
103{
104 struct rt6_info *rt = (struct rt6_info *) dst;
105 struct inet_peer *peer;
106 u32 *p = NULL;
107
8e2ec639
YZ
108 if (!(rt->dst.flags & DST_HOST))
109 return NULL;
110
06582540
DM
111 if (!rt->rt6i_peer)
112 rt6_bind_peer(rt, 1);
113
114 peer = rt->rt6i_peer;
115 if (peer) {
116 u32 *old_p = __DST_METRICS_PTR(old);
117 unsigned long prev, new;
118
119 p = peer->metrics;
120 if (inet_metrics_new(peer))
121 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
122
123 new = (unsigned long) p;
124 prev = cmpxchg(&dst->_metrics, old, new);
125
126 if (prev != old) {
127 p = __DST_METRICS_PTR(prev);
128 if (prev & DST_METRICS_READ_ONLY)
129 p = NULL;
130 }
131 }
132 return p;
133}
134
d3aaeb38
DM
135static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
136{
137 return __neigh_lookup_errno(&nd_tbl, daddr, dst->dev);
138}
139
9a7ec3a9 140static struct dst_ops ip6_dst_ops_template = {
1da177e4 141 .family = AF_INET6,
09640e63 142 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
143 .gc = ip6_dst_gc,
144 .gc_thresh = 1024,
145 .check = ip6_dst_check,
0dbaee3b 146 .default_advmss = ip6_default_advmss,
ebb762f2 147 .mtu = ip6_mtu,
06582540 148 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
149 .destroy = ip6_dst_destroy,
150 .ifdown = ip6_dst_ifdown,
151 .negative_advice = ip6_negative_advice,
152 .link_failure = ip6_link_failure,
153 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 154 .local_out = __ip6_local_out,
d3aaeb38 155 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
156};
157
ebb762f2 158static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 159{
618f9bc7
SK
160 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
161
162 return mtu ? : dst->dev->mtu;
ec831ea7
RD
163}
164
14e50e57
DM
165static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
166{
167}
168
0972ddb2
HB
169static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
170 unsigned long old)
171{
172 return NULL;
173}
174
14e50e57
DM
175static struct dst_ops ip6_dst_blackhole_ops = {
176 .family = AF_INET6,
09640e63 177 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
178 .destroy = ip6_dst_destroy,
179 .check = ip6_dst_check,
ebb762f2 180 .mtu = ip6_blackhole_mtu,
214f45c9 181 .default_advmss = ip6_default_advmss,
14e50e57 182 .update_pmtu = ip6_rt_blackhole_update_pmtu,
0972ddb2 183 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 184 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
185};
186
62fa8a84
DM
187static const u32 ip6_template_metrics[RTAX_MAX] = {
188 [RTAX_HOPLIMIT - 1] = 255,
189};
190
bdb3289f 191static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
192 .dst = {
193 .__refcnt = ATOMIC_INIT(1),
194 .__use = 1,
195 .obsolete = -1,
196 .error = -ENETUNREACH,
d8d1f30b
CG
197 .input = ip6_pkt_discard,
198 .output = ip6_pkt_discard_out,
1da177e4
LT
199 },
200 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 201 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
202 .rt6i_metric = ~(u32) 0,
203 .rt6i_ref = ATOMIC_INIT(1),
204};
205
101367c2
TG
206#ifdef CONFIG_IPV6_MULTIPLE_TABLES
207
6723ab54
DM
208static int ip6_pkt_prohibit(struct sk_buff *skb);
209static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 210
280a34c8 211static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
212 .dst = {
213 .__refcnt = ATOMIC_INIT(1),
214 .__use = 1,
215 .obsolete = -1,
216 .error = -EACCES,
d8d1f30b
CG
217 .input = ip6_pkt_prohibit,
218 .output = ip6_pkt_prohibit_out,
101367c2
TG
219 },
220 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 221 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
222 .rt6i_metric = ~(u32) 0,
223 .rt6i_ref = ATOMIC_INIT(1),
224};
225
bdb3289f 226static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
227 .dst = {
228 .__refcnt = ATOMIC_INIT(1),
229 .__use = 1,
230 .obsolete = -1,
231 .error = -EINVAL,
d8d1f30b
CG
232 .input = dst_discard,
233 .output = dst_discard,
101367c2
TG
234 },
235 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 236 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
237 .rt6i_metric = ~(u32) 0,
238 .rt6i_ref = ATOMIC_INIT(1),
239};
240
241#endif
242
1da177e4 243/* allocate dst with ip6_dst_ops */
5c1e6aa3 244static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
957c665f
DM
245 struct net_device *dev,
246 int flags)
1da177e4 247{
957c665f 248 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
cf911662 249
38308473 250 if (rt)
fbe58186 251 memset(&rt->rt6i_table, 0,
38308473 252 sizeof(*rt) - sizeof(struct dst_entry));
cf911662
DM
253
254 return rt;
1da177e4
LT
255}
256
257static void ip6_dst_destroy(struct dst_entry *dst)
258{
259 struct rt6_info *rt = (struct rt6_info *)dst;
260 struct inet6_dev *idev = rt->rt6i_idev;
b3419363 261 struct inet_peer *peer = rt->rt6i_peer;
1da177e4 262
8e2ec639
YZ
263 if (!(rt->dst.flags & DST_HOST))
264 dst_destroy_metrics_generic(dst);
265
38308473 266 if (idev) {
1da177e4
LT
267 rt->rt6i_idev = NULL;
268 in6_dev_put(idev);
1ab1457c 269 }
b3419363 270 if (peer) {
b3419363
DM
271 rt->rt6i_peer = NULL;
272 inet_putpeer(peer);
273 }
274}
275
6431cbc2
DM
276static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
277
278static u32 rt6_peer_genid(void)
279{
280 return atomic_read(&__rt6_peer_genid);
281}
282
b3419363
DM
283void rt6_bind_peer(struct rt6_info *rt, int create)
284{
285 struct inet_peer *peer;
286
b3419363
DM
287 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
288 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
289 inet_putpeer(peer);
6431cbc2
DM
290 else
291 rt->rt6i_peer_genid = rt6_peer_genid();
1da177e4
LT
292}
293
294static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
295 int how)
296{
297 struct rt6_info *rt = (struct rt6_info *)dst;
298 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 299 struct net_device *loopback_dev =
c346dca1 300 dev_net(dev)->loopback_dev;
1da177e4 301
38308473 302 if (dev != loopback_dev && idev && idev->dev == dev) {
5a3e55d6
DL
303 struct inet6_dev *loopback_idev =
304 in6_dev_get(loopback_dev);
38308473 305 if (loopback_idev) {
1da177e4
LT
306 rt->rt6i_idev = loopback_idev;
307 in6_dev_put(idev);
308 }
309 }
310}
311
312static __inline__ int rt6_check_expired(const struct rt6_info *rt)
313{
a02cec21
ED
314 return (rt->rt6i_flags & RTF_EXPIRES) &&
315 time_after(jiffies, rt->rt6i_expires);
1da177e4
LT
316}
317
b71d1d42 318static inline int rt6_need_strict(const struct in6_addr *daddr)
c71099ac 319{
a02cec21
ED
320 return ipv6_addr_type(daddr) &
321 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
322}
323
1da177e4 324/*
c71099ac 325 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
326 */
327
8ed67789
DL
328static inline struct rt6_info *rt6_device_match(struct net *net,
329 struct rt6_info *rt,
b71d1d42 330 const struct in6_addr *saddr,
1da177e4 331 int oif,
d420895e 332 int flags)
1da177e4
LT
333{
334 struct rt6_info *local = NULL;
335 struct rt6_info *sprt;
336
dd3abc4e
YH
337 if (!oif && ipv6_addr_any(saddr))
338 goto out;
339
d8d1f30b 340 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
dd3abc4e
YH
341 struct net_device *dev = sprt->rt6i_dev;
342
343 if (oif) {
1da177e4
LT
344 if (dev->ifindex == oif)
345 return sprt;
346 if (dev->flags & IFF_LOOPBACK) {
38308473 347 if (!sprt->rt6i_idev ||
1da177e4 348 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 349 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 350 continue;
1ab1457c 351 if (local && (!oif ||
1da177e4
LT
352 local->rt6i_idev->dev->ifindex == oif))
353 continue;
354 }
355 local = sprt;
356 }
dd3abc4e
YH
357 } else {
358 if (ipv6_chk_addr(net, saddr, dev,
359 flags & RT6_LOOKUP_F_IFACE))
360 return sprt;
1da177e4 361 }
dd3abc4e 362 }
1da177e4 363
dd3abc4e 364 if (oif) {
1da177e4
LT
365 if (local)
366 return local;
367
d420895e 368 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 369 return net->ipv6.ip6_null_entry;
1da177e4 370 }
dd3abc4e 371out:
1da177e4
LT
372 return rt;
373}
374
27097255
YH
375#ifdef CONFIG_IPV6_ROUTER_PREF
376static void rt6_probe(struct rt6_info *rt)
377{
f2c31e32 378 struct neighbour *neigh;
27097255
YH
379 /*
380 * Okay, this does not seem to be appropriate
381 * for now, however, we need to check if it
382 * is really so; aka Router Reachability Probing.
383 *
384 * Router Reachability Probe MUST be rate-limited
385 * to no more than one per minute.
386 */
f2c31e32
ED
387 rcu_read_lock();
388 neigh = rt ? dst_get_neighbour(&rt->dst) : NULL;
27097255 389 if (!neigh || (neigh->nud_state & NUD_VALID))
f2c31e32 390 goto out;
27097255
YH
391 read_lock_bh(&neigh->lock);
392 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 393 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
394 struct in6_addr mcaddr;
395 struct in6_addr *target;
396
397 neigh->updated = jiffies;
398 read_unlock_bh(&neigh->lock);
399
400 target = (struct in6_addr *)&neigh->primary_key;
401 addrconf_addr_solict_mult(target, &mcaddr);
402 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
f2c31e32 403 } else {
27097255 404 read_unlock_bh(&neigh->lock);
f2c31e32
ED
405 }
406out:
407 rcu_read_unlock();
27097255
YH
408}
409#else
410static inline void rt6_probe(struct rt6_info *rt)
411{
27097255
YH
412}
413#endif
414
1da177e4 415/*
554cfb7e 416 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 417 */
b6f99a21 418static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e
YH
419{
420 struct net_device *dev = rt->rt6i_dev;
161980f4 421 if (!oif || dev->ifindex == oif)
554cfb7e 422 return 2;
161980f4
DM
423 if ((dev->flags & IFF_LOOPBACK) &&
424 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
425 return 1;
426 return 0;
554cfb7e 427}
1da177e4 428
b6f99a21 429static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 430{
f2c31e32 431 struct neighbour *neigh;
398bcbeb 432 int m;
f2c31e32
ED
433
434 rcu_read_lock();
435 neigh = dst_get_neighbour(&rt->dst);
4d0c5911
YH
436 if (rt->rt6i_flags & RTF_NONEXTHOP ||
437 !(rt->rt6i_flags & RTF_GATEWAY))
438 m = 1;
439 else if (neigh) {
554cfb7e
YH
440 read_lock_bh(&neigh->lock);
441 if (neigh->nud_state & NUD_VALID)
4d0c5911 442 m = 2;
398bcbeb
YH
443#ifdef CONFIG_IPV6_ROUTER_PREF
444 else if (neigh->nud_state & NUD_FAILED)
445 m = 0;
446#endif
447 else
ea73ee23 448 m = 1;
554cfb7e 449 read_unlock_bh(&neigh->lock);
398bcbeb
YH
450 } else
451 m = 0;
f2c31e32 452 rcu_read_unlock();
554cfb7e 453 return m;
1da177e4
LT
454}
455
554cfb7e
YH
456static int rt6_score_route(struct rt6_info *rt, int oif,
457 int strict)
1da177e4 458{
4d0c5911 459 int m, n;
1ab1457c 460
4d0c5911 461 m = rt6_check_dev(rt, oif);
77d16f45 462 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 463 return -1;
ebacaaa0
YH
464#ifdef CONFIG_IPV6_ROUTER_PREF
465 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
466#endif
4d0c5911 467 n = rt6_check_neigh(rt);
557e92ef 468 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
469 return -1;
470 return m;
471}
472
f11e6659
DM
473static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
474 int *mpri, struct rt6_info *match)
554cfb7e 475{
f11e6659
DM
476 int m;
477
478 if (rt6_check_expired(rt))
479 goto out;
480
481 m = rt6_score_route(rt, oif, strict);
482 if (m < 0)
483 goto out;
484
485 if (m > *mpri) {
486 if (strict & RT6_LOOKUP_F_REACHABLE)
487 rt6_probe(match);
488 *mpri = m;
489 match = rt;
490 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
491 rt6_probe(rt);
492 }
493
494out:
495 return match;
496}
497
498static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
499 struct rt6_info *rr_head,
500 u32 metric, int oif, int strict)
501{
502 struct rt6_info *rt, *match;
554cfb7e 503 int mpri = -1;
1da177e4 504
f11e6659
DM
505 match = NULL;
506 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 507 rt = rt->dst.rt6_next)
f11e6659
DM
508 match = find_match(rt, oif, strict, &mpri, match);
509 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 510 rt = rt->dst.rt6_next)
f11e6659 511 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 512
f11e6659
DM
513 return match;
514}
1da177e4 515
f11e6659
DM
516static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
517{
518 struct rt6_info *match, *rt0;
8ed67789 519 struct net *net;
1da177e4 520
f11e6659 521 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
0dc47877 522 __func__, fn->leaf, oif);
554cfb7e 523
f11e6659
DM
524 rt0 = fn->rr_ptr;
525 if (!rt0)
526 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 527
f11e6659 528 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 529
554cfb7e 530 if (!match &&
f11e6659 531 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 532 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 533
554cfb7e 534 /* no entries matched; do round-robin */
f11e6659
DM
535 if (!next || next->rt6i_metric != rt0->rt6i_metric)
536 next = fn->leaf;
537
538 if (next != rt0)
539 fn->rr_ptr = next;
1da177e4 540 }
1da177e4 541
f11e6659 542 RT6_TRACE("%s() => %p\n",
0dc47877 543 __func__, match);
1da177e4 544
c346dca1 545 net = dev_net(rt0->rt6i_dev);
a02cec21 546 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
547}
548
70ceb4f5
YH
549#ifdef CONFIG_IPV6_ROUTE_INFO
550int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 551 const struct in6_addr *gwaddr)
70ceb4f5 552{
c346dca1 553 struct net *net = dev_net(dev);
70ceb4f5
YH
554 struct route_info *rinfo = (struct route_info *) opt;
555 struct in6_addr prefix_buf, *prefix;
556 unsigned int pref;
4bed72e4 557 unsigned long lifetime;
70ceb4f5
YH
558 struct rt6_info *rt;
559
560 if (len < sizeof(struct route_info)) {
561 return -EINVAL;
562 }
563
564 /* Sanity check for prefix_len and length */
565 if (rinfo->length > 3) {
566 return -EINVAL;
567 } else if (rinfo->prefix_len > 128) {
568 return -EINVAL;
569 } else if (rinfo->prefix_len > 64) {
570 if (rinfo->length < 2) {
571 return -EINVAL;
572 }
573 } else if (rinfo->prefix_len > 0) {
574 if (rinfo->length < 1) {
575 return -EINVAL;
576 }
577 }
578
579 pref = rinfo->route_pref;
580 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 581 return -EINVAL;
70ceb4f5 582
4bed72e4 583 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
584
585 if (rinfo->length == 3)
586 prefix = (struct in6_addr *)rinfo->prefix;
587 else {
588 /* this function is safe */
589 ipv6_addr_prefix(&prefix_buf,
590 (struct in6_addr *)rinfo->prefix,
591 rinfo->prefix_len);
592 prefix = &prefix_buf;
593 }
594
efa2cea0
DL
595 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
596 dev->ifindex);
70ceb4f5
YH
597
598 if (rt && !lifetime) {
e0a1ad73 599 ip6_del_rt(rt);
70ceb4f5
YH
600 rt = NULL;
601 }
602
603 if (!rt && lifetime)
efa2cea0 604 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
605 pref);
606 else if (rt)
607 rt->rt6i_flags = RTF_ROUTEINFO |
608 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
609
610 if (rt) {
4bed72e4 611 if (!addrconf_finite_timeout(lifetime)) {
70ceb4f5
YH
612 rt->rt6i_flags &= ~RTF_EXPIRES;
613 } else {
614 rt->rt6i_expires = jiffies + HZ * lifetime;
615 rt->rt6i_flags |= RTF_EXPIRES;
616 }
d8d1f30b 617 dst_release(&rt->dst);
70ceb4f5
YH
618 }
619 return 0;
620}
621#endif
622
8ed67789 623#define BACKTRACK(__net, saddr) \
982f56f3 624do { \
8ed67789 625 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 626 struct fib6_node *pn; \
e0eda7bb 627 while (1) { \
982f56f3
YH
628 if (fn->fn_flags & RTN_TL_ROOT) \
629 goto out; \
630 pn = fn->parent; \
631 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 632 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
633 else \
634 fn = pn; \
635 if (fn->fn_flags & RTN_RTINFO) \
636 goto restart; \
c71099ac 637 } \
c71099ac 638 } \
38308473 639} while (0)
c71099ac 640
8ed67789
DL
641static struct rt6_info *ip6_pol_route_lookup(struct net *net,
642 struct fib6_table *table,
4c9483b2 643 struct flowi6 *fl6, int flags)
1da177e4
LT
644{
645 struct fib6_node *fn;
646 struct rt6_info *rt;
647
c71099ac 648 read_lock_bh(&table->tb6_lock);
4c9483b2 649 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
650restart:
651 rt = fn->leaf;
4c9483b2
DM
652 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
653 BACKTRACK(net, &fl6->saddr);
c71099ac 654out:
d8d1f30b 655 dst_use(&rt->dst, jiffies);
c71099ac 656 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
657 return rt;
658
659}
660
9acd9f3a
YH
661struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
662 const struct in6_addr *saddr, int oif, int strict)
c71099ac 663{
4c9483b2
DM
664 struct flowi6 fl6 = {
665 .flowi6_oif = oif,
666 .daddr = *daddr,
c71099ac
TG
667 };
668 struct dst_entry *dst;
77d16f45 669 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 670
adaa70bb 671 if (saddr) {
4c9483b2 672 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
673 flags |= RT6_LOOKUP_F_HAS_SADDR;
674 }
675
4c9483b2 676 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
677 if (dst->error == 0)
678 return (struct rt6_info *) dst;
679
680 dst_release(dst);
681
1da177e4
LT
682 return NULL;
683}
684
7159039a
YH
685EXPORT_SYMBOL(rt6_lookup);
686
c71099ac 687/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
688 It takes new route entry, the addition fails by any reason the
689 route is freed. In any case, if caller does not hold it, it may
690 be destroyed.
691 */
692
86872cb5 693static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
694{
695 int err;
c71099ac 696 struct fib6_table *table;
1da177e4 697
c71099ac
TG
698 table = rt->rt6i_table;
699 write_lock_bh(&table->tb6_lock);
86872cb5 700 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 701 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
702
703 return err;
704}
705
40e22e8f
TG
706int ip6_ins_rt(struct rt6_info *rt)
707{
4d1169c1 708 struct nl_info info = {
c346dca1 709 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 710 };
528c4ceb 711 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
712}
713
21efcfa0
ED
714static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
715 const struct in6_addr *daddr,
b71d1d42 716 const struct in6_addr *saddr)
1da177e4 717{
1da177e4
LT
718 struct rt6_info *rt;
719
720 /*
721 * Clone the route.
722 */
723
21efcfa0 724 rt = ip6_rt_copy(ort, daddr);
1da177e4
LT
725
726 if (rt) {
14deae41
DM
727 struct neighbour *neigh;
728 int attempts = !in_softirq();
729
38308473 730 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
58c4fb86 731 if (rt->rt6i_dst.plen != 128 &&
21efcfa0 732 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
58c4fb86 733 rt->rt6i_flags |= RTF_ANYCAST;
4e3fd7a0 734 rt->rt6i_gateway = *daddr;
58c4fb86 735 }
1da177e4 736
1da177e4 737 rt->rt6i_flags |= RTF_CACHE;
1da177e4
LT
738
739#ifdef CONFIG_IPV6_SUBTREES
740 if (rt->rt6i_src.plen && saddr) {
4e3fd7a0 741 rt->rt6i_src.addr = *saddr;
1da177e4
LT
742 rt->rt6i_src.plen = 128;
743 }
744#endif
745
14deae41
DM
746 retry:
747 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
748 if (IS_ERR(neigh)) {
749 struct net *net = dev_net(rt->rt6i_dev);
750 int saved_rt_min_interval =
751 net->ipv6.sysctl.ip6_rt_gc_min_interval;
752 int saved_rt_elasticity =
753 net->ipv6.sysctl.ip6_rt_gc_elasticity;
754
755 if (attempts-- > 0) {
756 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
757 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
758
86393e52 759 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
760
761 net->ipv6.sysctl.ip6_rt_gc_elasticity =
762 saved_rt_elasticity;
763 net->ipv6.sysctl.ip6_rt_gc_min_interval =
764 saved_rt_min_interval;
765 goto retry;
766 }
767
768 if (net_ratelimit())
769 printk(KERN_WARNING
7e1b33e5 770 "ipv6: Neighbour table overflow.\n");
d8d1f30b 771 dst_free(&rt->dst);
14deae41
DM
772 return NULL;
773 }
69cce1d1 774 dst_set_neighbour(&rt->dst, neigh);
1da177e4 775
95a9a5ba 776 }
1da177e4 777
95a9a5ba
YH
778 return rt;
779}
1da177e4 780
21efcfa0
ED
781static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
782 const struct in6_addr *daddr)
299d9939 783{
21efcfa0
ED
784 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
785
299d9939 786 if (rt) {
299d9939 787 rt->rt6i_flags |= RTF_CACHE;
f2c31e32 788 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst)));
299d9939
YH
789 }
790 return rt;
791}
792
8ed67789 793static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 794 struct flowi6 *fl6, int flags)
1da177e4
LT
795{
796 struct fib6_node *fn;
519fbd87 797 struct rt6_info *rt, *nrt;
c71099ac 798 int strict = 0;
1da177e4 799 int attempts = 3;
519fbd87 800 int err;
53b7997f 801 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 802
77d16f45 803 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
804
805relookup:
c71099ac 806 read_lock_bh(&table->tb6_lock);
1da177e4 807
8238dd06 808restart_2:
4c9483b2 809 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1da177e4
LT
810
811restart:
4acad72d 812 rt = rt6_select(fn, oif, strict | reachable);
8ed67789 813
4c9483b2 814 BACKTRACK(net, &fl6->saddr);
8ed67789 815 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 816 rt->rt6i_flags & RTF_CACHE)
1ddef044 817 goto out;
1da177e4 818
d8d1f30b 819 dst_hold(&rt->dst);
c71099ac 820 read_unlock_bh(&table->tb6_lock);
fb9de91e 821
f2c31e32 822 if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
4c9483b2 823 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
7343ff31 824 else if (!(rt->dst.flags & DST_HOST))
4c9483b2 825 nrt = rt6_alloc_clone(rt, &fl6->daddr);
7343ff31
DM
826 else
827 goto out2;
e40cf353 828
d8d1f30b 829 dst_release(&rt->dst);
8ed67789 830 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 831
d8d1f30b 832 dst_hold(&rt->dst);
519fbd87 833 if (nrt) {
40e22e8f 834 err = ip6_ins_rt(nrt);
519fbd87 835 if (!err)
1da177e4 836 goto out2;
1da177e4 837 }
1da177e4 838
519fbd87
YH
839 if (--attempts <= 0)
840 goto out2;
841
842 /*
c71099ac 843 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
844 * released someone could insert this route. Relookup.
845 */
d8d1f30b 846 dst_release(&rt->dst);
519fbd87
YH
847 goto relookup;
848
849out:
8238dd06
YH
850 if (reachable) {
851 reachable = 0;
852 goto restart_2;
853 }
d8d1f30b 854 dst_hold(&rt->dst);
c71099ac 855 read_unlock_bh(&table->tb6_lock);
1da177e4 856out2:
d8d1f30b
CG
857 rt->dst.lastuse = jiffies;
858 rt->dst.__use++;
c71099ac
TG
859
860 return rt;
1da177e4
LT
861}
862
8ed67789 863static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 864 struct flowi6 *fl6, int flags)
4acad72d 865{
4c9483b2 866 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
867}
868
c71099ac
TG
869void ip6_route_input(struct sk_buff *skb)
870{
b71d1d42 871 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 872 struct net *net = dev_net(skb->dev);
adaa70bb 873 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
874 struct flowi6 fl6 = {
875 .flowi6_iif = skb->dev->ifindex,
876 .daddr = iph->daddr,
877 .saddr = iph->saddr,
38308473 878 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
4c9483b2
DM
879 .flowi6_mark = skb->mark,
880 .flowi6_proto = iph->nexthdr,
c71099ac 881 };
adaa70bb 882
1d6e55f1 883 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
adaa70bb 884 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 885
4c9483b2 886 skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
c71099ac
TG
887}
888
8ed67789 889static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 890 struct flowi6 *fl6, int flags)
1da177e4 891{
4c9483b2 892 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
893}
894
9c7a4f9c 895struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 896 struct flowi6 *fl6)
c71099ac
TG
897{
898 int flags = 0;
899
4c9483b2 900 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 901 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 902
4c9483b2 903 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 904 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
905 else if (sk)
906 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 907
4c9483b2 908 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4
LT
909}
910
7159039a 911EXPORT_SYMBOL(ip6_route_output);
1da177e4 912
2774c131 913struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 914{
5c1e6aa3 915 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
916 struct dst_entry *new = NULL;
917
5c1e6aa3 918 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
14e50e57 919 if (rt) {
cf911662
DM
920 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
921
d8d1f30b 922 new = &rt->dst;
14e50e57 923
14e50e57 924 new->__use = 1;
352e512c
HX
925 new->input = dst_discard;
926 new->output = dst_discard;
14e50e57 927
21efcfa0
ED
928 if (dst_metrics_read_only(&ort->dst))
929 new->_metrics = ort->dst._metrics;
930 else
931 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
932 rt->rt6i_idev = ort->rt6i_idev;
933 if (rt->rt6i_idev)
934 in6_dev_hold(rt->rt6i_idev);
935 rt->rt6i_expires = 0;
936
4e3fd7a0 937 rt->rt6i_gateway = ort->rt6i_gateway;
14e50e57
DM
938 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
939 rt->rt6i_metric = 0;
940
941 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
942#ifdef CONFIG_IPV6_SUBTREES
943 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
944#endif
945
946 dst_free(new);
947 }
948
69ead7af
DM
949 dst_release(dst_orig);
950 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 951}
14e50e57 952
1da177e4
LT
953/*
954 * Destination cache support functions
955 */
956
957static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
958{
959 struct rt6_info *rt;
960
961 rt = (struct rt6_info *) dst;
962
6431cbc2
DM
963 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
964 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
965 if (!rt->rt6i_peer)
966 rt6_bind_peer(rt, 0);
967 rt->rt6i_peer_genid = rt6_peer_genid();
968 }
1da177e4 969 return dst;
6431cbc2 970 }
1da177e4
LT
971 return NULL;
972}
973
974static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
975{
976 struct rt6_info *rt = (struct rt6_info *) dst;
977
978 if (rt) {
54c1a859
YH
979 if (rt->rt6i_flags & RTF_CACHE) {
980 if (rt6_check_expired(rt)) {
981 ip6_del_rt(rt);
982 dst = NULL;
983 }
984 } else {
1da177e4 985 dst_release(dst);
54c1a859
YH
986 dst = NULL;
987 }
1da177e4 988 }
54c1a859 989 return dst;
1da177e4
LT
990}
991
992static void ip6_link_failure(struct sk_buff *skb)
993{
994 struct rt6_info *rt;
995
3ffe533c 996 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 997
adf30907 998 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 999 if (rt) {
38308473 1000 if (rt->rt6i_flags & RTF_CACHE) {
d8d1f30b 1001 dst_set_expires(&rt->dst, 0);
1da177e4
LT
1002 rt->rt6i_flags |= RTF_EXPIRES;
1003 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1004 rt->rt6i_node->fn_sernum = -1;
1005 }
1006}
1007
1008static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1009{
1010 struct rt6_info *rt6 = (struct rt6_info*)dst;
1011
1012 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1013 rt6->rt6i_flags |= RTF_MODIFIED;
1014 if (mtu < IPV6_MIN_MTU) {
defb3519 1015 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 1016 mtu = IPV6_MIN_MTU;
defb3519
DM
1017 features |= RTAX_FEATURE_ALLFRAG;
1018 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 1019 }
defb3519 1020 dst_metric_set(dst, RTAX_MTU, mtu);
1da177e4
LT
1021 }
1022}
1023
0dbaee3b 1024static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1025{
0dbaee3b
DM
1026 struct net_device *dev = dst->dev;
1027 unsigned int mtu = dst_mtu(dst);
1028 struct net *net = dev_net(dev);
1029
1da177e4
LT
1030 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1031
5578689a
DL
1032 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1033 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1034
1035 /*
1ab1457c
YH
1036 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1037 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1038 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1039 * rely only on pmtu discovery"
1040 */
1041 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1042 mtu = IPV6_MAXPLEN;
1043 return mtu;
1044}
1045
ebb762f2 1046static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1047{
d33e4553 1048 struct inet6_dev *idev;
618f9bc7
SK
1049 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1050
1051 if (mtu)
1052 return mtu;
1053
1054 mtu = IPV6_MIN_MTU;
d33e4553
DM
1055
1056 rcu_read_lock();
1057 idev = __in6_dev_get(dst->dev);
1058 if (idev)
1059 mtu = idev->cnf.mtu6;
1060 rcu_read_unlock();
1061
1062 return mtu;
1063}
1064
3b00944c
YH
1065static struct dst_entry *icmp6_dst_gc_list;
1066static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1067
3b00944c 1068struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1069 struct neighbour *neigh,
9acd9f3a 1070 const struct in6_addr *addr)
1da177e4
LT
1071{
1072 struct rt6_info *rt;
1073 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1074 struct net *net = dev_net(dev);
1da177e4 1075
38308473 1076 if (unlikely(!idev))
1da177e4
LT
1077 return NULL;
1078
957c665f 1079 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
38308473 1080 if (unlikely(!rt)) {
1da177e4
LT
1081 in6_dev_put(idev);
1082 goto out;
1083 }
1084
1da177e4
LT
1085 if (neigh)
1086 neigh_hold(neigh);
14deae41 1087 else {
1da177e4 1088 neigh = ndisc_get_neigh(dev, addr);
14deae41
DM
1089 if (IS_ERR(neigh))
1090 neigh = NULL;
1091 }
1da177e4 1092
8e2ec639
YZ
1093 rt->dst.flags |= DST_HOST;
1094 rt->dst.output = ip6_output;
69cce1d1 1095 dst_set_neighbour(&rt->dst, neigh);
d8d1f30b 1096 atomic_set(&rt->dst.__refcnt, 1);
4e3fd7a0 1097 rt->rt6i_dst.addr = *addr;
8e2ec639
YZ
1098 rt->rt6i_dst.plen = 128;
1099 rt->rt6i_idev = idev;
7011687f 1100 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1da177e4 1101
3b00944c 1102 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1103 rt->dst.next = icmp6_dst_gc_list;
1104 icmp6_dst_gc_list = &rt->dst;
3b00944c 1105 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1106
5578689a 1107 fib6_force_start_gc(net);
1da177e4
LT
1108
1109out:
d8d1f30b 1110 return &rt->dst;
1da177e4
LT
1111}
1112
3d0f24a7 1113int icmp6_dst_gc(void)
1da177e4 1114{
e9476e95 1115 struct dst_entry *dst, **pprev;
3d0f24a7 1116 int more = 0;
1da177e4 1117
3b00944c
YH
1118 spin_lock_bh(&icmp6_dst_lock);
1119 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1120
1da177e4
LT
1121 while ((dst = *pprev) != NULL) {
1122 if (!atomic_read(&dst->__refcnt)) {
1123 *pprev = dst->next;
1124 dst_free(dst);
1da177e4
LT
1125 } else {
1126 pprev = &dst->next;
3d0f24a7 1127 ++more;
1da177e4
LT
1128 }
1129 }
1130
3b00944c 1131 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1132
3d0f24a7 1133 return more;
1da177e4
LT
1134}
1135
1e493d19
DM
1136static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1137 void *arg)
1138{
1139 struct dst_entry *dst, **pprev;
1140
1141 spin_lock_bh(&icmp6_dst_lock);
1142 pprev = &icmp6_dst_gc_list;
1143 while ((dst = *pprev) != NULL) {
1144 struct rt6_info *rt = (struct rt6_info *) dst;
1145 if (func(rt, arg)) {
1146 *pprev = dst->next;
1147 dst_free(dst);
1148 } else {
1149 pprev = &dst->next;
1150 }
1151 }
1152 spin_unlock_bh(&icmp6_dst_lock);
1153}
1154
569d3645 1155static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1156{
1da177e4 1157 unsigned long now = jiffies;
86393e52 1158 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1159 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1160 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1161 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1162 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1163 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1164 int entries;
7019b78e 1165
fc66f95c 1166 entries = dst_entries_get_fast(ops);
7019b78e 1167 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1168 entries <= rt_max_size)
1da177e4
LT
1169 goto out;
1170
6891a346
BT
1171 net->ipv6.ip6_rt_gc_expire++;
1172 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1173 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1174 entries = dst_entries_get_slow(ops);
1175 if (entries < ops->gc_thresh)
7019b78e 1176 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1177out:
7019b78e 1178 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1179 return entries > rt_max_size;
1da177e4
LT
1180}
1181
1182/* Clean host part of a prefix. Not necessary in radix tree,
1183 but results in cleaner routing tables.
1184
1185 Remove it only when all the things will work!
1186 */
1187
6b75d090 1188int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1189{
5170ae82 1190 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1191 if (hoplimit == 0) {
6b75d090 1192 struct net_device *dev = dst->dev;
c68f24cc
ED
1193 struct inet6_dev *idev;
1194
1195 rcu_read_lock();
1196 idev = __in6_dev_get(dev);
1197 if (idev)
6b75d090 1198 hoplimit = idev->cnf.hop_limit;
c68f24cc 1199 else
53b7997f 1200 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1201 rcu_read_unlock();
1da177e4
LT
1202 }
1203 return hoplimit;
1204}
abbf46ae 1205EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1206
1207/*
1208 *
1209 */
1210
86872cb5 1211int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1212{
1213 int err;
5578689a 1214 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1215 struct rt6_info *rt = NULL;
1216 struct net_device *dev = NULL;
1217 struct inet6_dev *idev = NULL;
c71099ac 1218 struct fib6_table *table;
1da177e4
LT
1219 int addr_type;
1220
86872cb5 1221 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1222 return -EINVAL;
1223#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1224 if (cfg->fc_src_len)
1da177e4
LT
1225 return -EINVAL;
1226#endif
86872cb5 1227 if (cfg->fc_ifindex) {
1da177e4 1228 err = -ENODEV;
5578689a 1229 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1230 if (!dev)
1231 goto out;
1232 idev = in6_dev_get(dev);
1233 if (!idev)
1234 goto out;
1235 }
1236
86872cb5
TG
1237 if (cfg->fc_metric == 0)
1238 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1239
d71314b4 1240 err = -ENOBUFS;
38308473
DM
1241 if (cfg->fc_nlinfo.nlh &&
1242 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1243 table = fib6_get_table(net, cfg->fc_table);
38308473 1244 if (!table) {
d71314b4
MV
1245 printk(KERN_WARNING "IPv6: NLM_F_CREATE should be specified when creating new route\n");
1246 table = fib6_new_table(net, cfg->fc_table);
1247 }
1248 } else {
1249 table = fib6_new_table(net, cfg->fc_table);
1250 }
38308473
DM
1251
1252 if (!table)
c71099ac 1253 goto out;
c71099ac 1254
957c665f 1255 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1da177e4 1256
38308473 1257 if (!rt) {
1da177e4
LT
1258 err = -ENOMEM;
1259 goto out;
1260 }
1261
d8d1f30b 1262 rt->dst.obsolete = -1;
6f704992
YH
1263 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1264 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1265 0;
1da177e4 1266
86872cb5
TG
1267 if (cfg->fc_protocol == RTPROT_UNSPEC)
1268 cfg->fc_protocol = RTPROT_BOOT;
1269 rt->rt6i_protocol = cfg->fc_protocol;
1270
1271 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1272
1273 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1274 rt->dst.input = ip6_mc_input;
ab79ad14
1275 else if (cfg->fc_flags & RTF_LOCAL)
1276 rt->dst.input = ip6_input;
1da177e4 1277 else
d8d1f30b 1278 rt->dst.input = ip6_forward;
1da177e4 1279
d8d1f30b 1280 rt->dst.output = ip6_output;
1da177e4 1281
86872cb5
TG
1282 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1283 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1284 if (rt->rt6i_dst.plen == 128)
11d53b49 1285 rt->dst.flags |= DST_HOST;
1da177e4 1286
8e2ec639
YZ
1287 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1288 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1289 if (!metrics) {
1290 err = -ENOMEM;
1291 goto out;
1292 }
1293 dst_init_metrics(&rt->dst, metrics, 0);
1294 }
1da177e4 1295#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1296 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1297 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1298#endif
1299
86872cb5 1300 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1301
1302 /* We cannot add true routes via loopback here,
1303 they would result in kernel looping; promote them to reject routes
1304 */
86872cb5 1305 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1306 (dev && (dev->flags & IFF_LOOPBACK) &&
1307 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1308 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1309 /* hold loopback dev/idev if we haven't done so. */
5578689a 1310 if (dev != net->loopback_dev) {
1da177e4
LT
1311 if (dev) {
1312 dev_put(dev);
1313 in6_dev_put(idev);
1314 }
5578689a 1315 dev = net->loopback_dev;
1da177e4
LT
1316 dev_hold(dev);
1317 idev = in6_dev_get(dev);
1318 if (!idev) {
1319 err = -ENODEV;
1320 goto out;
1321 }
1322 }
d8d1f30b
CG
1323 rt->dst.output = ip6_pkt_discard_out;
1324 rt->dst.input = ip6_pkt_discard;
1325 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1326 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1327 goto install_route;
1328 }
1329
86872cb5 1330 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1331 const struct in6_addr *gw_addr;
1da177e4
LT
1332 int gwa_type;
1333
86872cb5 1334 gw_addr = &cfg->fc_gateway;
4e3fd7a0 1335 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1336 gwa_type = ipv6_addr_type(gw_addr);
1337
1338 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1339 struct rt6_info *grt;
1340
1341 /* IPv6 strictly inhibits using not link-local
1342 addresses as nexthop address.
1343 Otherwise, router will not able to send redirects.
1344 It is very good, but in some (rare!) circumstances
1345 (SIT, PtP, NBMA NOARP links) it is handy to allow
1346 some exceptions. --ANK
1347 */
1348 err = -EINVAL;
38308473 1349 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1350 goto out;
1351
5578689a 1352 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1353
1354 err = -EHOSTUNREACH;
38308473 1355 if (!grt)
1da177e4
LT
1356 goto out;
1357 if (dev) {
1358 if (dev != grt->rt6i_dev) {
d8d1f30b 1359 dst_release(&grt->dst);
1da177e4
LT
1360 goto out;
1361 }
1362 } else {
1363 dev = grt->rt6i_dev;
1364 idev = grt->rt6i_idev;
1365 dev_hold(dev);
1366 in6_dev_hold(grt->rt6i_idev);
1367 }
38308473 1368 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1369 err = 0;
d8d1f30b 1370 dst_release(&grt->dst);
1da177e4
LT
1371
1372 if (err)
1373 goto out;
1374 }
1375 err = -EINVAL;
38308473 1376 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1377 goto out;
1378 }
1379
1380 err = -ENODEV;
38308473 1381 if (!dev)
1da177e4
LT
1382 goto out;
1383
c3968a85
DW
1384 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1385 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1386 err = -EINVAL;
1387 goto out;
1388 }
4e3fd7a0 1389 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1390 rt->rt6i_prefsrc.plen = 128;
1391 } else
1392 rt->rt6i_prefsrc.plen = 0;
1393
86872cb5 1394 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
69cce1d1
DM
1395 struct neighbour *n = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1396 if (IS_ERR(n)) {
1397 err = PTR_ERR(n);
1da177e4
LT
1398 goto out;
1399 }
69cce1d1 1400 dst_set_neighbour(&rt->dst, n);
1da177e4
LT
1401 }
1402
86872cb5 1403 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1404
1405install_route:
86872cb5
TG
1406 if (cfg->fc_mx) {
1407 struct nlattr *nla;
1408 int remaining;
1409
1410 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1411 int type = nla_type(nla);
86872cb5
TG
1412
1413 if (type) {
1414 if (type > RTAX_MAX) {
1da177e4
LT
1415 err = -EINVAL;
1416 goto out;
1417 }
86872cb5 1418
defb3519 1419 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1420 }
1da177e4
LT
1421 }
1422 }
1423
d8d1f30b 1424 rt->dst.dev = dev;
1da177e4 1425 rt->rt6i_idev = idev;
c71099ac 1426 rt->rt6i_table = table;
63152fc0 1427
c346dca1 1428 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1429
86872cb5 1430 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1431
1432out:
1433 if (dev)
1434 dev_put(dev);
1435 if (idev)
1436 in6_dev_put(idev);
1437 if (rt)
d8d1f30b 1438 dst_free(&rt->dst);
1da177e4
LT
1439 return err;
1440}
1441
86872cb5 1442static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1443{
1444 int err;
c71099ac 1445 struct fib6_table *table;
c346dca1 1446 struct net *net = dev_net(rt->rt6i_dev);
1da177e4 1447
8ed67789 1448 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1449 return -ENOENT;
1450
c71099ac
TG
1451 table = rt->rt6i_table;
1452 write_lock_bh(&table->tb6_lock);
1da177e4 1453
86872cb5 1454 err = fib6_del(rt, info);
d8d1f30b 1455 dst_release(&rt->dst);
1da177e4 1456
c71099ac 1457 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1458
1459 return err;
1460}
1461
e0a1ad73
TG
1462int ip6_del_rt(struct rt6_info *rt)
1463{
4d1169c1 1464 struct nl_info info = {
c346dca1 1465 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 1466 };
528c4ceb 1467 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1468}
1469
86872cb5 1470static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1471{
c71099ac 1472 struct fib6_table *table;
1da177e4
LT
1473 struct fib6_node *fn;
1474 struct rt6_info *rt;
1475 int err = -ESRCH;
1476
5578689a 1477 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 1478 if (!table)
c71099ac
TG
1479 return err;
1480
1481 read_lock_bh(&table->tb6_lock);
1da177e4 1482
c71099ac 1483 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1484 &cfg->fc_dst, cfg->fc_dst_len,
1485 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1486
1da177e4 1487 if (fn) {
d8d1f30b 1488 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1489 if (cfg->fc_ifindex &&
38308473 1490 (!rt->rt6i_dev ||
86872cb5 1491 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1492 continue;
86872cb5
TG
1493 if (cfg->fc_flags & RTF_GATEWAY &&
1494 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1495 continue;
86872cb5 1496 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1497 continue;
d8d1f30b 1498 dst_hold(&rt->dst);
c71099ac 1499 read_unlock_bh(&table->tb6_lock);
1da177e4 1500
86872cb5 1501 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1502 }
1503 }
c71099ac 1504 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1505
1506 return err;
1507}
1508
1509/*
1510 * Handle redirects
1511 */
a6279458 1512struct ip6rd_flowi {
4c9483b2 1513 struct flowi6 fl6;
a6279458
YH
1514 struct in6_addr gateway;
1515};
1516
8ed67789
DL
1517static struct rt6_info *__ip6_route_redirect(struct net *net,
1518 struct fib6_table *table,
4c9483b2 1519 struct flowi6 *fl6,
a6279458 1520 int flags)
1da177e4 1521{
4c9483b2 1522 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
a6279458 1523 struct rt6_info *rt;
e843b9e1 1524 struct fib6_node *fn;
c71099ac 1525
1da177e4 1526 /*
e843b9e1
YH
1527 * Get the "current" route for this destination and
1528 * check if the redirect has come from approriate router.
1529 *
1530 * RFC 2461 specifies that redirects should only be
1531 * accepted if they come from the nexthop to the target.
1532 * Due to the way the routes are chosen, this notion
1533 * is a bit fuzzy and one might need to check all possible
1534 * routes.
1da177e4 1535 */
1da177e4 1536
c71099ac 1537 read_lock_bh(&table->tb6_lock);
4c9483b2 1538 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
e843b9e1 1539restart:
d8d1f30b 1540 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
e843b9e1
YH
1541 /*
1542 * Current route is on-link; redirect is always invalid.
1543 *
1544 * Seems, previous statement is not true. It could
1545 * be node, which looks for us as on-link (f.e. proxy ndisc)
1546 * But then router serving it might decide, that we should
1547 * know truth 8)8) --ANK (980726).
1548 */
1549 if (rt6_check_expired(rt))
1550 continue;
1551 if (!(rt->rt6i_flags & RTF_GATEWAY))
1552 continue;
4c9483b2 1553 if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
e843b9e1 1554 continue;
a6279458 1555 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1556 continue;
1557 break;
1558 }
a6279458 1559
cb15d9c2 1560 if (!rt)
8ed67789 1561 rt = net->ipv6.ip6_null_entry;
4c9483b2 1562 BACKTRACK(net, &fl6->saddr);
cb15d9c2 1563out:
d8d1f30b 1564 dst_hold(&rt->dst);
a6279458 1565
c71099ac 1566 read_unlock_bh(&table->tb6_lock);
e843b9e1 1567
a6279458
YH
1568 return rt;
1569};
1570
b71d1d42
ED
1571static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1572 const struct in6_addr *src,
1573 const struct in6_addr *gateway,
a6279458
YH
1574 struct net_device *dev)
1575{
adaa70bb 1576 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1577 struct net *net = dev_net(dev);
a6279458 1578 struct ip6rd_flowi rdfl = {
4c9483b2
DM
1579 .fl6 = {
1580 .flowi6_oif = dev->ifindex,
1581 .daddr = *dest,
1582 .saddr = *src,
a6279458 1583 },
a6279458 1584 };
adaa70bb 1585
4e3fd7a0 1586 rdfl.gateway = *gateway;
86c36ce4 1587
adaa70bb
TG
1588 if (rt6_need_strict(dest))
1589 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1590
4c9483b2 1591 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
58f09b78 1592 flags, __ip6_route_redirect);
a6279458
YH
1593}
1594
b71d1d42
ED
1595void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1596 const struct in6_addr *saddr,
a6279458
YH
1597 struct neighbour *neigh, u8 *lladdr, int on_link)
1598{
1599 struct rt6_info *rt, *nrt = NULL;
1600 struct netevent_redirect netevent;
c346dca1 1601 struct net *net = dev_net(neigh->dev);
a6279458
YH
1602
1603 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1604
8ed67789 1605 if (rt == net->ipv6.ip6_null_entry) {
1da177e4
LT
1606 if (net_ratelimit())
1607 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1608 "for redirect target\n");
a6279458 1609 goto out;
1da177e4
LT
1610 }
1611
1da177e4
LT
1612 /*
1613 * We have finally decided to accept it.
1614 */
1615
1ab1457c 1616 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1617 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1618 NEIGH_UPDATE_F_OVERRIDE|
1619 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1620 NEIGH_UPDATE_F_ISROUTER))
1621 );
1622
1623 /*
1624 * Redirect received -> path was valid.
1625 * Look, redirects are sent only in response to data packets,
1626 * so that this nexthop apparently is reachable. --ANK
1627 */
d8d1f30b 1628 dst_confirm(&rt->dst);
1da177e4
LT
1629
1630 /* Duplicate redirect: silently ignore. */
f2c31e32 1631 if (neigh == dst_get_neighbour_raw(&rt->dst))
1da177e4
LT
1632 goto out;
1633
21efcfa0 1634 nrt = ip6_rt_copy(rt, dest);
38308473 1635 if (!nrt)
1da177e4
LT
1636 goto out;
1637
1638 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1639 if (on_link)
1640 nrt->rt6i_flags &= ~RTF_GATEWAY;
1641
4e3fd7a0 1642 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
69cce1d1 1643 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1da177e4 1644
40e22e8f 1645 if (ip6_ins_rt(nrt))
1da177e4
LT
1646 goto out;
1647
d8d1f30b
CG
1648 netevent.old = &rt->dst;
1649 netevent.new = &nrt->dst;
8d71740c
TT
1650 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1651
38308473 1652 if (rt->rt6i_flags & RTF_CACHE) {
e0a1ad73 1653 ip6_del_rt(rt);
1da177e4
LT
1654 return;
1655 }
1656
1657out:
d8d1f30b 1658 dst_release(&rt->dst);
1da177e4
LT
1659}
1660
1661/*
1662 * Handle ICMP "packet too big" messages
1663 * i.e. Path MTU discovery
1664 */
1665
b71d1d42 1666static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2 1667 struct net *net, u32 pmtu, int ifindex)
1da177e4
LT
1668{
1669 struct rt6_info *rt, *nrt;
1670 int allfrag = 0;
d3052b55 1671again:
ae878ae2 1672 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
38308473 1673 if (!rt)
1da177e4
LT
1674 return;
1675
d3052b55
AV
1676 if (rt6_check_expired(rt)) {
1677 ip6_del_rt(rt);
1678 goto again;
1679 }
1680
d8d1f30b 1681 if (pmtu >= dst_mtu(&rt->dst))
1da177e4
LT
1682 goto out;
1683
1684 if (pmtu < IPV6_MIN_MTU) {
1685 /*
1ab1457c 1686 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1687 * MTU (1280) and a fragment header should always be included
1688 * after a node receiving Too Big message reporting PMTU is
1689 * less than the IPv6 Minimum Link MTU.
1690 */
1691 pmtu = IPV6_MIN_MTU;
1692 allfrag = 1;
1693 }
1694
1695 /* New mtu received -> path was valid.
1696 They are sent only in response to data packets,
1697 so that this nexthop apparently is reachable. --ANK
1698 */
d8d1f30b 1699 dst_confirm(&rt->dst);
1da177e4
LT
1700
1701 /* Host route. If it is static, it would be better
1702 not to override it, but add new one, so that
1703 when cache entry will expire old pmtu
1704 would return automatically.
1705 */
1706 if (rt->rt6i_flags & RTF_CACHE) {
defb3519
DM
1707 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1708 if (allfrag) {
1709 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1710 features |= RTAX_FEATURE_ALLFRAG;
1711 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1712 }
d8d1f30b 1713 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1714 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1715 goto out;
1716 }
1717
1718 /* Network route.
1719 Two cases are possible:
1720 1. It is connected route. Action: COW
1721 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1722 */
f2c31e32 1723 if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1724 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1725 else
1726 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1727
d5315b50 1728 if (nrt) {
defb3519
DM
1729 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1730 if (allfrag) {
1731 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1732 features |= RTAX_FEATURE_ALLFRAG;
1733 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1734 }
a1e78363
YH
1735
1736 /* According to RFC 1981, detecting PMTU increase shouldn't be
1737 * happened within 5 mins, the recommended timer is 10 mins.
1738 * Here this route expiration time is set to ip6_rt_mtu_expires
1739 * which is 10 mins. After 10 mins the decreased pmtu is expired
1740 * and detecting PMTU increase will be automatically happened.
1741 */
d8d1f30b 1742 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1743 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1744
40e22e8f 1745 ip6_ins_rt(nrt);
1da177e4 1746 }
1da177e4 1747out:
d8d1f30b 1748 dst_release(&rt->dst);
1da177e4
LT
1749}
1750
b71d1d42 1751void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2
1752 struct net_device *dev, u32 pmtu)
1753{
1754 struct net *net = dev_net(dev);
1755
1756 /*
1757 * RFC 1981 states that a node "MUST reduce the size of the packets it
1758 * is sending along the path" that caused the Packet Too Big message.
1759 * Since it's not possible in the general case to determine which
1760 * interface was used to send the original packet, we update the MTU
1761 * on the interface that will be used to send future packets. We also
1762 * update the MTU on the interface that received the Packet Too Big in
1763 * case the original packet was forced out that interface with
1764 * SO_BINDTODEVICE or similar. This is the next best thing to the
1765 * correct behaviour, which would be to update the MTU on all
1766 * interfaces.
1767 */
1768 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1769 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1770}
1771
1da177e4
LT
1772/*
1773 * Misc support functions
1774 */
1775
21efcfa0
ED
1776static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1777 const struct in6_addr *dest)
1da177e4 1778{
c346dca1 1779 struct net *net = dev_net(ort->rt6i_dev);
5c1e6aa3 1780 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
957c665f 1781 ort->dst.dev, 0);
1da177e4
LT
1782
1783 if (rt) {
d8d1f30b
CG
1784 rt->dst.input = ort->dst.input;
1785 rt->dst.output = ort->dst.output;
8e2ec639 1786 rt->dst.flags |= DST_HOST;
d8d1f30b 1787
4e3fd7a0 1788 rt->rt6i_dst.addr = *dest;
8e2ec639 1789 rt->rt6i_dst.plen = 128;
defb3519 1790 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b 1791 rt->dst.error = ort->dst.error;
1da177e4
LT
1792 rt->rt6i_idev = ort->rt6i_idev;
1793 if (rt->rt6i_idev)
1794 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1795 rt->dst.lastuse = jiffies;
1da177e4
LT
1796 rt->rt6i_expires = 0;
1797
4e3fd7a0 1798 rt->rt6i_gateway = ort->rt6i_gateway;
1da177e4
LT
1799 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1800 rt->rt6i_metric = 0;
1801
1da177e4
LT
1802#ifdef CONFIG_IPV6_SUBTREES
1803 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1804#endif
0f6c6392 1805 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
c71099ac 1806 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1807 }
1808 return rt;
1809}
1810
70ceb4f5 1811#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 1812static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
1813 const struct in6_addr *prefix, int prefixlen,
1814 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
1815{
1816 struct fib6_node *fn;
1817 struct rt6_info *rt = NULL;
c71099ac
TG
1818 struct fib6_table *table;
1819
efa2cea0 1820 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 1821 if (!table)
c71099ac 1822 return NULL;
70ceb4f5 1823
c71099ac
TG
1824 write_lock_bh(&table->tb6_lock);
1825 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1826 if (!fn)
1827 goto out;
1828
d8d1f30b 1829 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
70ceb4f5
YH
1830 if (rt->rt6i_dev->ifindex != ifindex)
1831 continue;
1832 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1833 continue;
1834 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1835 continue;
d8d1f30b 1836 dst_hold(&rt->dst);
70ceb4f5
YH
1837 break;
1838 }
1839out:
c71099ac 1840 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1841 return rt;
1842}
1843
efa2cea0 1844static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
1845 const struct in6_addr *prefix, int prefixlen,
1846 const struct in6_addr *gwaddr, int ifindex,
70ceb4f5
YH
1847 unsigned pref)
1848{
86872cb5
TG
1849 struct fib6_config cfg = {
1850 .fc_table = RT6_TABLE_INFO,
238fc7ea 1851 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1852 .fc_ifindex = ifindex,
1853 .fc_dst_len = prefixlen,
1854 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1855 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1856 .fc_nlinfo.pid = 0,
1857 .fc_nlinfo.nlh = NULL,
1858 .fc_nlinfo.nl_net = net,
86872cb5
TG
1859 };
1860
4e3fd7a0
AD
1861 cfg.fc_dst = *prefix;
1862 cfg.fc_gateway = *gwaddr;
70ceb4f5 1863
e317da96
YH
1864 /* We should treat it as a default route if prefix length is 0. */
1865 if (!prefixlen)
86872cb5 1866 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1867
86872cb5 1868 ip6_route_add(&cfg);
70ceb4f5 1869
efa2cea0 1870 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1871}
1872#endif
1873
b71d1d42 1874struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 1875{
1da177e4 1876 struct rt6_info *rt;
c71099ac 1877 struct fib6_table *table;
1da177e4 1878
c346dca1 1879 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 1880 if (!table)
c71099ac 1881 return NULL;
1da177e4 1882
c71099ac 1883 write_lock_bh(&table->tb6_lock);
d8d1f30b 1884 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1da177e4 1885 if (dev == rt->rt6i_dev &&
045927ff 1886 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1887 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1888 break;
1889 }
1890 if (rt)
d8d1f30b 1891 dst_hold(&rt->dst);
c71099ac 1892 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1893 return rt;
1894}
1895
b71d1d42 1896struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
1897 struct net_device *dev,
1898 unsigned int pref)
1da177e4 1899{
86872cb5
TG
1900 struct fib6_config cfg = {
1901 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1902 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1903 .fc_ifindex = dev->ifindex,
1904 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1905 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1906 .fc_nlinfo.pid = 0,
1907 .fc_nlinfo.nlh = NULL,
c346dca1 1908 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1909 };
1da177e4 1910
4e3fd7a0 1911 cfg.fc_gateway = *gwaddr;
1da177e4 1912
86872cb5 1913 ip6_route_add(&cfg);
1da177e4 1914
1da177e4
LT
1915 return rt6_get_dflt_router(gwaddr, dev);
1916}
1917
7b4da532 1918void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1919{
1920 struct rt6_info *rt;
c71099ac
TG
1921 struct fib6_table *table;
1922
1923 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1924 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 1925 if (!table)
c71099ac 1926 return;
1da177e4
LT
1927
1928restart:
c71099ac 1929 read_lock_bh(&table->tb6_lock);
d8d1f30b 1930 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1931 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1932 dst_hold(&rt->dst);
c71099ac 1933 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1934 ip6_del_rt(rt);
1da177e4
LT
1935 goto restart;
1936 }
1937 }
c71099ac 1938 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1939}
1940
5578689a
DL
1941static void rtmsg_to_fib6_config(struct net *net,
1942 struct in6_rtmsg *rtmsg,
86872cb5
TG
1943 struct fib6_config *cfg)
1944{
1945 memset(cfg, 0, sizeof(*cfg));
1946
1947 cfg->fc_table = RT6_TABLE_MAIN;
1948 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1949 cfg->fc_metric = rtmsg->rtmsg_metric;
1950 cfg->fc_expires = rtmsg->rtmsg_info;
1951 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1952 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1953 cfg->fc_flags = rtmsg->rtmsg_flags;
1954
5578689a 1955 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1956
4e3fd7a0
AD
1957 cfg->fc_dst = rtmsg->rtmsg_dst;
1958 cfg->fc_src = rtmsg->rtmsg_src;
1959 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
1960}
1961
5578689a 1962int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1963{
86872cb5 1964 struct fib6_config cfg;
1da177e4
LT
1965 struct in6_rtmsg rtmsg;
1966 int err;
1967
1968 switch(cmd) {
1969 case SIOCADDRT: /* Add a route */
1970 case SIOCDELRT: /* Delete a route */
1971 if (!capable(CAP_NET_ADMIN))
1972 return -EPERM;
1973 err = copy_from_user(&rtmsg, arg,
1974 sizeof(struct in6_rtmsg));
1975 if (err)
1976 return -EFAULT;
86872cb5 1977
5578689a 1978 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1979
1da177e4
LT
1980 rtnl_lock();
1981 switch (cmd) {
1982 case SIOCADDRT:
86872cb5 1983 err = ip6_route_add(&cfg);
1da177e4
LT
1984 break;
1985 case SIOCDELRT:
86872cb5 1986 err = ip6_route_del(&cfg);
1da177e4
LT
1987 break;
1988 default:
1989 err = -EINVAL;
1990 }
1991 rtnl_unlock();
1992
1993 return err;
3ff50b79 1994 }
1da177e4
LT
1995
1996 return -EINVAL;
1997}
1998
1999/*
2000 * Drop the packet on the floor
2001 */
2002
d5fdd6ba 2003static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2004{
612f09e8 2005 int type;
adf30907 2006 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2007 switch (ipstats_mib_noroutes) {
2008 case IPSTATS_MIB_INNOROUTES:
0660e03f 2009 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2010 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2011 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2012 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2013 break;
2014 }
2015 /* FALLTHROUGH */
2016 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2017 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2018 ipstats_mib_noroutes);
612f09e8
YH
2019 break;
2020 }
3ffe533c 2021 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2022 kfree_skb(skb);
2023 return 0;
2024}
2025
9ce8ade0
TG
2026static int ip6_pkt_discard(struct sk_buff *skb)
2027{
612f09e8 2028 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2029}
2030
20380731 2031static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 2032{
adf30907 2033 skb->dev = skb_dst(skb)->dev;
612f09e8 2034 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2035}
2036
6723ab54
DM
2037#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2038
9ce8ade0
TG
2039static int ip6_pkt_prohibit(struct sk_buff *skb)
2040{
612f09e8 2041 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2042}
2043
2044static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2045{
adf30907 2046 skb->dev = skb_dst(skb)->dev;
612f09e8 2047 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2048}
2049
6723ab54
DM
2050#endif
2051
1da177e4
LT
2052/*
2053 * Allocate a dst for local (unicast / anycast) address.
2054 */
2055
2056struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2057 const struct in6_addr *addr,
2058 int anycast)
2059{
c346dca1 2060 struct net *net = dev_net(idev->dev);
5c1e6aa3 2061 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
957c665f 2062 net->loopback_dev, 0);
14deae41 2063 struct neighbour *neigh;
1da177e4 2064
38308473 2065 if (!rt) {
40385653
BG
2066 if (net_ratelimit())
2067 pr_warning("IPv6: Maximum number of routes reached,"
2068 " consider increasing route/max_size.\n");
1da177e4 2069 return ERR_PTR(-ENOMEM);
40385653 2070 }
1da177e4 2071
1da177e4
LT
2072 in6_dev_hold(idev);
2073
11d53b49 2074 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2075 rt->dst.input = ip6_input;
2076 rt->dst.output = ip6_output;
1da177e4 2077 rt->rt6i_idev = idev;
d8d1f30b 2078 rt->dst.obsolete = -1;
1da177e4
LT
2079
2080 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2081 if (anycast)
2082 rt->rt6i_flags |= RTF_ANYCAST;
2083 else
1da177e4 2084 rt->rt6i_flags |= RTF_LOCAL;
14deae41
DM
2085 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2086 if (IS_ERR(neigh)) {
d8d1f30b 2087 dst_free(&rt->dst);
14deae41 2088
29546a64 2089 return ERR_CAST(neigh);
1da177e4 2090 }
69cce1d1 2091 dst_set_neighbour(&rt->dst, neigh);
1da177e4 2092
4e3fd7a0 2093 rt->rt6i_dst.addr = *addr;
1da177e4 2094 rt->rt6i_dst.plen = 128;
5578689a 2095 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2096
d8d1f30b 2097 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2098
2099 return rt;
2100}
2101
c3968a85
DW
2102int ip6_route_get_saddr(struct net *net,
2103 struct rt6_info *rt,
b71d1d42 2104 const struct in6_addr *daddr,
c3968a85
DW
2105 unsigned int prefs,
2106 struct in6_addr *saddr)
2107{
2108 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2109 int err = 0;
2110 if (rt->rt6i_prefsrc.plen)
4e3fd7a0 2111 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2112 else
2113 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2114 daddr, prefs, saddr);
2115 return err;
2116}
2117
2118/* remove deleted ip from prefsrc entries */
2119struct arg_dev_net_ip {
2120 struct net_device *dev;
2121 struct net *net;
2122 struct in6_addr *addr;
2123};
2124
2125static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2126{
2127 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2128 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2129 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2130
38308473 2131 if (((void *)rt->rt6i_dev == dev || !dev) &&
c3968a85
DW
2132 rt != net->ipv6.ip6_null_entry &&
2133 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2134 /* remove prefsrc entry */
2135 rt->rt6i_prefsrc.plen = 0;
2136 }
2137 return 0;
2138}
2139
2140void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2141{
2142 struct net *net = dev_net(ifp->idev->dev);
2143 struct arg_dev_net_ip adni = {
2144 .dev = ifp->idev->dev,
2145 .net = net,
2146 .addr = &ifp->addr,
2147 };
2148 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2149}
2150
8ed67789
DL
2151struct arg_dev_net {
2152 struct net_device *dev;
2153 struct net *net;
2154};
2155
1da177e4
LT
2156static int fib6_ifdown(struct rt6_info *rt, void *arg)
2157{
bc3ef660 2158 const struct arg_dev_net *adn = arg;
2159 const struct net_device *dev = adn->dev;
8ed67789 2160
38308473 2161 if ((rt->rt6i_dev == dev || !dev) &&
bc3ef660 2162 rt != adn->net->ipv6.ip6_null_entry) {
1da177e4
LT
2163 RT6_TRACE("deleted by ifdown %p\n", rt);
2164 return -1;
2165 }
2166 return 0;
2167}
2168
f3db4851 2169void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2170{
8ed67789
DL
2171 struct arg_dev_net adn = {
2172 .dev = dev,
2173 .net = net,
2174 };
2175
2176 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2177 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2178}
2179
2180struct rt6_mtu_change_arg
2181{
2182 struct net_device *dev;
2183 unsigned mtu;
2184};
2185
2186static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2187{
2188 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2189 struct inet6_dev *idev;
2190
2191 /* In IPv6 pmtu discovery is not optional,
2192 so that RTAX_MTU lock cannot disable it.
2193 We still use this lock to block changes
2194 caused by addrconf/ndisc.
2195 */
2196
2197 idev = __in6_dev_get(arg->dev);
38308473 2198 if (!idev)
1da177e4
LT
2199 return 0;
2200
2201 /* For administrative MTU increase, there is no way to discover
2202 IPv6 PMTU increase, so PMTU increase should be updated here.
2203 Since RFC 1981 doesn't include administrative MTU increase
2204 update PMTU increase is a MUST. (i.e. jumbo frame)
2205 */
2206 /*
2207 If new MTU is less than route PMTU, this new MTU will be the
2208 lowest MTU in the path, update the route PMTU to reflect PMTU
2209 decreases; if new MTU is greater than route PMTU, and the
2210 old MTU is the lowest MTU in the path, update the route PMTU
2211 to reflect the increase. In this case if the other nodes' MTU
2212 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2213 PMTU discouvery.
2214 */
2215 if (rt->rt6i_dev == arg->dev &&
d8d1f30b
CG
2216 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2217 (dst_mtu(&rt->dst) >= arg->mtu ||
2218 (dst_mtu(&rt->dst) < arg->mtu &&
2219 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2220 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2221 }
1da177e4
LT
2222 return 0;
2223}
2224
2225void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2226{
c71099ac
TG
2227 struct rt6_mtu_change_arg arg = {
2228 .dev = dev,
2229 .mtu = mtu,
2230 };
1da177e4 2231
c346dca1 2232 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2233}
2234
ef7c79ed 2235static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2236 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2237 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2238 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2239 [RTA_PRIORITY] = { .type = NLA_U32 },
2240 [RTA_METRICS] = { .type = NLA_NESTED },
2241};
2242
2243static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2244 struct fib6_config *cfg)
1da177e4 2245{
86872cb5
TG
2246 struct rtmsg *rtm;
2247 struct nlattr *tb[RTA_MAX+1];
2248 int err;
1da177e4 2249
86872cb5
TG
2250 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2251 if (err < 0)
2252 goto errout;
1da177e4 2253
86872cb5
TG
2254 err = -EINVAL;
2255 rtm = nlmsg_data(nlh);
2256 memset(cfg, 0, sizeof(*cfg));
2257
2258 cfg->fc_table = rtm->rtm_table;
2259 cfg->fc_dst_len = rtm->rtm_dst_len;
2260 cfg->fc_src_len = rtm->rtm_src_len;
2261 cfg->fc_flags = RTF_UP;
2262 cfg->fc_protocol = rtm->rtm_protocol;
2263
2264 if (rtm->rtm_type == RTN_UNREACHABLE)
2265 cfg->fc_flags |= RTF_REJECT;
2266
ab79ad14
2267 if (rtm->rtm_type == RTN_LOCAL)
2268 cfg->fc_flags |= RTF_LOCAL;
2269
86872cb5
TG
2270 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2271 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2272 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2273
2274 if (tb[RTA_GATEWAY]) {
2275 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2276 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2277 }
86872cb5
TG
2278
2279 if (tb[RTA_DST]) {
2280 int plen = (rtm->rtm_dst_len + 7) >> 3;
2281
2282 if (nla_len(tb[RTA_DST]) < plen)
2283 goto errout;
2284
2285 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2286 }
86872cb5
TG
2287
2288 if (tb[RTA_SRC]) {
2289 int plen = (rtm->rtm_src_len + 7) >> 3;
2290
2291 if (nla_len(tb[RTA_SRC]) < plen)
2292 goto errout;
2293
2294 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2295 }
86872cb5 2296
c3968a85
DW
2297 if (tb[RTA_PREFSRC])
2298 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2299
86872cb5
TG
2300 if (tb[RTA_OIF])
2301 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2302
2303 if (tb[RTA_PRIORITY])
2304 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2305
2306 if (tb[RTA_METRICS]) {
2307 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2308 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2309 }
86872cb5
TG
2310
2311 if (tb[RTA_TABLE])
2312 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2313
2314 err = 0;
2315errout:
2316 return err;
1da177e4
LT
2317}
2318
c127ea2c 2319static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2320{
86872cb5
TG
2321 struct fib6_config cfg;
2322 int err;
1da177e4 2323
86872cb5
TG
2324 err = rtm_to_fib6_config(skb, nlh, &cfg);
2325 if (err < 0)
2326 return err;
2327
2328 return ip6_route_del(&cfg);
1da177e4
LT
2329}
2330
c127ea2c 2331static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2332{
86872cb5
TG
2333 struct fib6_config cfg;
2334 int err;
1da177e4 2335
86872cb5
TG
2336 err = rtm_to_fib6_config(skb, nlh, &cfg);
2337 if (err < 0)
2338 return err;
2339
2340 return ip6_route_add(&cfg);
1da177e4
LT
2341}
2342
339bf98f
TG
2343static inline size_t rt6_nlmsg_size(void)
2344{
2345 return NLMSG_ALIGN(sizeof(struct rtmsg))
2346 + nla_total_size(16) /* RTA_SRC */
2347 + nla_total_size(16) /* RTA_DST */
2348 + nla_total_size(16) /* RTA_GATEWAY */
2349 + nla_total_size(16) /* RTA_PREFSRC */
2350 + nla_total_size(4) /* RTA_TABLE */
2351 + nla_total_size(4) /* RTA_IIF */
2352 + nla_total_size(4) /* RTA_OIF */
2353 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2354 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2355 + nla_total_size(sizeof(struct rta_cacheinfo));
2356}
2357
191cd582
BH
2358static int rt6_fill_node(struct net *net,
2359 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2360 struct in6_addr *dst, struct in6_addr *src,
2361 int iif, int type, u32 pid, u32 seq,
7bc570c8 2362 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2363{
2364 struct rtmsg *rtm;
2d7202bf 2365 struct nlmsghdr *nlh;
e3703b3d 2366 long expires;
9e762a4a 2367 u32 table;
f2c31e32 2368 struct neighbour *n;
1da177e4
LT
2369
2370 if (prefix) { /* user wants prefix routes only */
2371 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2372 /* success since this is not a prefix route */
2373 return 1;
2374 }
2375 }
2376
2d7202bf 2377 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
38308473 2378 if (!nlh)
26932566 2379 return -EMSGSIZE;
2d7202bf
TG
2380
2381 rtm = nlmsg_data(nlh);
1da177e4
LT
2382 rtm->rtm_family = AF_INET6;
2383 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2384 rtm->rtm_src_len = rt->rt6i_src.plen;
2385 rtm->rtm_tos = 0;
c71099ac 2386 if (rt->rt6i_table)
9e762a4a 2387 table = rt->rt6i_table->tb6_id;
c71099ac 2388 else
9e762a4a
PM
2389 table = RT6_TABLE_UNSPEC;
2390 rtm->rtm_table = table;
2d7202bf 2391 NLA_PUT_U32(skb, RTA_TABLE, table);
38308473 2392 if (rt->rt6i_flags & RTF_REJECT)
1da177e4 2393 rtm->rtm_type = RTN_UNREACHABLE;
38308473 2394 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 2395 rtm->rtm_type = RTN_LOCAL;
38308473 2396 else if (rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
1da177e4
LT
2397 rtm->rtm_type = RTN_LOCAL;
2398 else
2399 rtm->rtm_type = RTN_UNICAST;
2400 rtm->rtm_flags = 0;
2401 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2402 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 2403 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4
LT
2404 rtm->rtm_protocol = RTPROT_REDIRECT;
2405 else if (rt->rt6i_flags & RTF_ADDRCONF)
2406 rtm->rtm_protocol = RTPROT_KERNEL;
38308473 2407 else if (rt->rt6i_flags & RTF_DEFAULT)
1da177e4
LT
2408 rtm->rtm_protocol = RTPROT_RA;
2409
38308473 2410 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
2411 rtm->rtm_flags |= RTM_F_CLONED;
2412
2413 if (dst) {
2d7202bf 2414 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2415 rtm->rtm_dst_len = 128;
1da177e4 2416 } else if (rtm->rtm_dst_len)
2d7202bf 2417 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2418#ifdef CONFIG_IPV6_SUBTREES
2419 if (src) {
2d7202bf 2420 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2421 rtm->rtm_src_len = 128;
1da177e4 2422 } else if (rtm->rtm_src_len)
2d7202bf 2423 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4 2424#endif
7bc570c8
YH
2425 if (iif) {
2426#ifdef CONFIG_IPV6_MROUTE
2427 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2428 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2429 if (err <= 0) {
2430 if (!nowait) {
2431 if (err == 0)
2432 return 0;
2433 goto nla_put_failure;
2434 } else {
2435 if (err == -EMSGSIZE)
2436 goto nla_put_failure;
2437 }
2438 }
2439 } else
2440#endif
2441 NLA_PUT_U32(skb, RTA_IIF, iif);
2442 } else if (dst) {
1da177e4 2443 struct in6_addr saddr_buf;
c3968a85 2444 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2d7202bf 2445 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2446 }
2d7202bf 2447
c3968a85
DW
2448 if (rt->rt6i_prefsrc.plen) {
2449 struct in6_addr saddr_buf;
4e3fd7a0 2450 saddr_buf = rt->rt6i_prefsrc.addr;
c3968a85
DW
2451 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2452 }
2453
defb3519 2454 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2455 goto nla_put_failure;
2456
f2c31e32
ED
2457 rcu_read_lock();
2458 n = dst_get_neighbour(&rt->dst);
2459 if (n)
2460 NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
2461 rcu_read_unlock();
2d7202bf 2462
d8d1f30b 2463 if (rt->dst.dev)
2d7202bf
TG
2464 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2465
2466 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d 2467
36e3deae
YH
2468 if (!(rt->rt6i_flags & RTF_EXPIRES))
2469 expires = 0;
2470 else if (rt->rt6i_expires - jiffies < INT_MAX)
2471 expires = rt->rt6i_expires - jiffies;
2472 else
2473 expires = INT_MAX;
69cdf8f9 2474
d8d1f30b
CG
2475 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2476 expires, rt->dst.error) < 0)
e3703b3d 2477 goto nla_put_failure;
2d7202bf
TG
2478
2479 return nlmsg_end(skb, nlh);
2480
2481nla_put_failure:
26932566
PM
2482 nlmsg_cancel(skb, nlh);
2483 return -EMSGSIZE;
1da177e4
LT
2484}
2485
1b43af54 2486int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2487{
2488 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2489 int prefix;
2490
2d7202bf
TG
2491 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2492 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2493 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2494 } else
2495 prefix = 0;
2496
191cd582
BH
2497 return rt6_fill_node(arg->net,
2498 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2499 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2500 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2501}
2502
c127ea2c 2503static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2504{
3b1e0a65 2505 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2506 struct nlattr *tb[RTA_MAX+1];
2507 struct rt6_info *rt;
1da177e4 2508 struct sk_buff *skb;
ab364a6f 2509 struct rtmsg *rtm;
4c9483b2 2510 struct flowi6 fl6;
ab364a6f 2511 int err, iif = 0;
1da177e4 2512
ab364a6f
TG
2513 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2514 if (err < 0)
2515 goto errout;
1da177e4 2516
ab364a6f 2517 err = -EINVAL;
4c9483b2 2518 memset(&fl6, 0, sizeof(fl6));
1da177e4 2519
ab364a6f
TG
2520 if (tb[RTA_SRC]) {
2521 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2522 goto errout;
2523
4e3fd7a0 2524 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
2525 }
2526
2527 if (tb[RTA_DST]) {
2528 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2529 goto errout;
2530
4e3fd7a0 2531 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
2532 }
2533
2534 if (tb[RTA_IIF])
2535 iif = nla_get_u32(tb[RTA_IIF]);
2536
2537 if (tb[RTA_OIF])
4c9483b2 2538 fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2539
2540 if (iif) {
2541 struct net_device *dev;
5578689a 2542 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2543 if (!dev) {
2544 err = -ENODEV;
ab364a6f 2545 goto errout;
1da177e4
LT
2546 }
2547 }
2548
ab364a6f 2549 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 2550 if (!skb) {
ab364a6f
TG
2551 err = -ENOBUFS;
2552 goto errout;
2553 }
1da177e4 2554
ab364a6f
TG
2555 /* Reserve room for dummy headers, this skb can pass
2556 through good chunk of routing engine.
2557 */
459a98ed 2558 skb_reset_mac_header(skb);
ab364a6f 2559 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2560
4c9483b2 2561 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
d8d1f30b 2562 skb_dst_set(skb, &rt->dst);
1da177e4 2563
4c9483b2 2564 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
1da177e4 2565 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2566 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2567 if (err < 0) {
ab364a6f
TG
2568 kfree_skb(skb);
2569 goto errout;
1da177e4
LT
2570 }
2571
5578689a 2572 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2573errout:
1da177e4 2574 return err;
1da177e4
LT
2575}
2576
86872cb5 2577void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2578{
2579 struct sk_buff *skb;
5578689a 2580 struct net *net = info->nl_net;
528c4ceb
DL
2581 u32 seq;
2582 int err;
2583
2584 err = -ENOBUFS;
38308473 2585 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 2586
339bf98f 2587 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
38308473 2588 if (!skb)
21713ebc
TG
2589 goto errout;
2590
191cd582 2591 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2592 event, info->pid, seq, 0, 0, 0);
26932566
PM
2593 if (err < 0) {
2594 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2595 WARN_ON(err == -EMSGSIZE);
2596 kfree_skb(skb);
2597 goto errout;
2598 }
1ce85fe4
PNA
2599 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2600 info->nlh, gfp_any());
2601 return;
21713ebc
TG
2602errout:
2603 if (err < 0)
5578689a 2604 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2605}
2606
8ed67789
DL
2607static int ip6_route_dev_notify(struct notifier_block *this,
2608 unsigned long event, void *data)
2609{
2610 struct net_device *dev = (struct net_device *)data;
c346dca1 2611 struct net *net = dev_net(dev);
8ed67789
DL
2612
2613 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2614 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2615 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2616#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2617 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2618 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2619 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2620 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2621#endif
2622 }
2623
2624 return NOTIFY_OK;
2625}
2626
1da177e4
LT
2627/*
2628 * /proc
2629 */
2630
2631#ifdef CONFIG_PROC_FS
2632
1da177e4
LT
2633struct rt6_proc_arg
2634{
2635 char *buffer;
2636 int offset;
2637 int length;
2638 int skip;
2639 int len;
2640};
2641
2642static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2643{
33120b30 2644 struct seq_file *m = p_arg;
69cce1d1 2645 struct neighbour *n;
1da177e4 2646
4b7a4274 2647 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2648
2649#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2650 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2651#else
33120b30 2652 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4 2653#endif
f2c31e32 2654 rcu_read_lock();
69cce1d1
DM
2655 n = dst_get_neighbour(&rt->dst);
2656 if (n) {
2657 seq_printf(m, "%pi6", n->primary_key);
1da177e4 2658 } else {
33120b30 2659 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2660 }
f2c31e32 2661 rcu_read_unlock();
33120b30 2662 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2663 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2664 rt->dst.__use, rt->rt6i_flags,
33120b30 2665 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1da177e4
LT
2666 return 0;
2667}
2668
33120b30 2669static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2670{
f3db4851
DL
2671 struct net *net = (struct net *)m->private;
2672 fib6_clean_all(net, rt6_info_route, 0, m);
33120b30
AD
2673 return 0;
2674}
1da177e4 2675
33120b30
AD
2676static int ipv6_route_open(struct inode *inode, struct file *file)
2677{
de05c557 2678 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2679}
2680
33120b30
AD
2681static const struct file_operations ipv6_route_proc_fops = {
2682 .owner = THIS_MODULE,
2683 .open = ipv6_route_open,
2684 .read = seq_read,
2685 .llseek = seq_lseek,
b6fcbdb4 2686 .release = single_release_net,
33120b30
AD
2687};
2688
1da177e4
LT
2689static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2690{
69ddb805 2691 struct net *net = (struct net *)seq->private;
1da177e4 2692 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2693 net->ipv6.rt6_stats->fib_nodes,
2694 net->ipv6.rt6_stats->fib_route_nodes,
2695 net->ipv6.rt6_stats->fib_rt_alloc,
2696 net->ipv6.rt6_stats->fib_rt_entries,
2697 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2698 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2699 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2700
2701 return 0;
2702}
2703
2704static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2705{
de05c557 2706 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2707}
2708
9a32144e 2709static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2710 .owner = THIS_MODULE,
2711 .open = rt6_stats_seq_open,
2712 .read = seq_read,
2713 .llseek = seq_lseek,
b6fcbdb4 2714 .release = single_release_net,
1da177e4
LT
2715};
2716#endif /* CONFIG_PROC_FS */
2717
2718#ifdef CONFIG_SYSCTL
2719
1da177e4 2720static
8d65af78 2721int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2722 void __user *buffer, size_t *lenp, loff_t *ppos)
2723{
c486da34
LAG
2724 struct net *net;
2725 int delay;
2726 if (!write)
1da177e4 2727 return -EINVAL;
c486da34
LAG
2728
2729 net = (struct net *)ctl->extra1;
2730 delay = net->ipv6.sysctl.flush_delay;
2731 proc_dointvec(ctl, write, buffer, lenp, ppos);
2732 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2733 return 0;
1da177e4
LT
2734}
2735
760f2d01 2736ctl_table ipv6_route_table_template[] = {
1ab1457c 2737 {
1da177e4 2738 .procname = "flush",
4990509f 2739 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2740 .maxlen = sizeof(int),
89c8b3a1 2741 .mode = 0200,
6d9f239a 2742 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2743 },
2744 {
1da177e4 2745 .procname = "gc_thresh",
9a7ec3a9 2746 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2747 .maxlen = sizeof(int),
2748 .mode = 0644,
6d9f239a 2749 .proc_handler = proc_dointvec,
1da177e4
LT
2750 },
2751 {
1da177e4 2752 .procname = "max_size",
4990509f 2753 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2754 .maxlen = sizeof(int),
2755 .mode = 0644,
6d9f239a 2756 .proc_handler = proc_dointvec,
1da177e4
LT
2757 },
2758 {
1da177e4 2759 .procname = "gc_min_interval",
4990509f 2760 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2761 .maxlen = sizeof(int),
2762 .mode = 0644,
6d9f239a 2763 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2764 },
2765 {
1da177e4 2766 .procname = "gc_timeout",
4990509f 2767 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2768 .maxlen = sizeof(int),
2769 .mode = 0644,
6d9f239a 2770 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2771 },
2772 {
1da177e4 2773 .procname = "gc_interval",
4990509f 2774 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2775 .maxlen = sizeof(int),
2776 .mode = 0644,
6d9f239a 2777 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2778 },
2779 {
1da177e4 2780 .procname = "gc_elasticity",
4990509f 2781 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2782 .maxlen = sizeof(int),
2783 .mode = 0644,
f3d3f616 2784 .proc_handler = proc_dointvec,
1da177e4
LT
2785 },
2786 {
1da177e4 2787 .procname = "mtu_expires",
4990509f 2788 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2789 .maxlen = sizeof(int),
2790 .mode = 0644,
6d9f239a 2791 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2792 },
2793 {
1da177e4 2794 .procname = "min_adv_mss",
4990509f 2795 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2796 .maxlen = sizeof(int),
2797 .mode = 0644,
f3d3f616 2798 .proc_handler = proc_dointvec,
1da177e4
LT
2799 },
2800 {
1da177e4 2801 .procname = "gc_min_interval_ms",
4990509f 2802 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2803 .maxlen = sizeof(int),
2804 .mode = 0644,
6d9f239a 2805 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2806 },
f8572d8f 2807 { }
1da177e4
LT
2808};
2809
2c8c1e72 2810struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2811{
2812 struct ctl_table *table;
2813
2814 table = kmemdup(ipv6_route_table_template,
2815 sizeof(ipv6_route_table_template),
2816 GFP_KERNEL);
5ee09105
YH
2817
2818 if (table) {
2819 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 2820 table[0].extra1 = net;
86393e52 2821 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2822 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2823 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2824 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2825 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2826 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2827 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2828 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2829 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2830 }
2831
760f2d01
DL
2832 return table;
2833}
1da177e4
LT
2834#endif
2835
2c8c1e72 2836static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2837{
633d424b 2838 int ret = -ENOMEM;
8ed67789 2839
86393e52
AD
2840 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2841 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2842
fc66f95c
ED
2843 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2844 goto out_ip6_dst_ops;
2845
8ed67789
DL
2846 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2847 sizeof(*net->ipv6.ip6_null_entry),
2848 GFP_KERNEL);
2849 if (!net->ipv6.ip6_null_entry)
fc66f95c 2850 goto out_ip6_dst_entries;
d8d1f30b 2851 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2852 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2853 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2854 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2855 ip6_template_metrics, true);
8ed67789
DL
2856
2857#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2858 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2859 sizeof(*net->ipv6.ip6_prohibit_entry),
2860 GFP_KERNEL);
68fffc67
PZ
2861 if (!net->ipv6.ip6_prohibit_entry)
2862 goto out_ip6_null_entry;
d8d1f30b 2863 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2864 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2865 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2866 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2867 ip6_template_metrics, true);
8ed67789
DL
2868
2869 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2870 sizeof(*net->ipv6.ip6_blk_hole_entry),
2871 GFP_KERNEL);
68fffc67
PZ
2872 if (!net->ipv6.ip6_blk_hole_entry)
2873 goto out_ip6_prohibit_entry;
d8d1f30b 2874 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2875 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2876 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2877 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2878 ip6_template_metrics, true);
8ed67789
DL
2879#endif
2880
b339a47c
PZ
2881 net->ipv6.sysctl.flush_delay = 0;
2882 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2883 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2884 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2885 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2886 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2887 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2888 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2889
cdb18761
DL
2890#ifdef CONFIG_PROC_FS
2891 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2892 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2893#endif
6891a346
BT
2894 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2895
8ed67789
DL
2896 ret = 0;
2897out:
2898 return ret;
f2fc6a54 2899
68fffc67
PZ
2900#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2901out_ip6_prohibit_entry:
2902 kfree(net->ipv6.ip6_prohibit_entry);
2903out_ip6_null_entry:
2904 kfree(net->ipv6.ip6_null_entry);
2905#endif
fc66f95c
ED
2906out_ip6_dst_entries:
2907 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2908out_ip6_dst_ops:
f2fc6a54 2909 goto out;
cdb18761
DL
2910}
2911
2c8c1e72 2912static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761
DL
2913{
2914#ifdef CONFIG_PROC_FS
2915 proc_net_remove(net, "ipv6_route");
2916 proc_net_remove(net, "rt6_stats");
2917#endif
8ed67789
DL
2918 kfree(net->ipv6.ip6_null_entry);
2919#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2920 kfree(net->ipv6.ip6_prohibit_entry);
2921 kfree(net->ipv6.ip6_blk_hole_entry);
2922#endif
41bb78b4 2923 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2924}
2925
2926static struct pernet_operations ip6_route_net_ops = {
2927 .init = ip6_route_net_init,
2928 .exit = ip6_route_net_exit,
2929};
2930
8ed67789
DL
2931static struct notifier_block ip6_route_dev_notifier = {
2932 .notifier_call = ip6_route_dev_notify,
2933 .priority = 0,
2934};
2935
433d49c3 2936int __init ip6_route_init(void)
1da177e4 2937{
433d49c3
DL
2938 int ret;
2939
9a7ec3a9
DL
2940 ret = -ENOMEM;
2941 ip6_dst_ops_template.kmem_cachep =
e5d679f3 2942 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 2943 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 2944 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 2945 goto out;
14e50e57 2946
fc66f95c 2947 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 2948 if (ret)
bdb3289f 2949 goto out_kmem_cache;
bdb3289f 2950
fc66f95c
ED
2951 ret = register_pernet_subsys(&ip6_route_net_ops);
2952 if (ret)
2953 goto out_dst_entries;
2954
5dc121e9
AE
2955 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2956
8ed67789
DL
2957 /* Registering of the loopback is done before this portion of code,
2958 * the loopback reference in rt6_info will not be taken, do it
2959 * manually for init_net */
d8d1f30b 2960 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2961 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2962 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2963 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 2964 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 2965 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2966 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2967 #endif
433d49c3
DL
2968 ret = fib6_init();
2969 if (ret)
8ed67789 2970 goto out_register_subsys;
433d49c3 2971
433d49c3
DL
2972 ret = xfrm6_init();
2973 if (ret)
cdb18761 2974 goto out_fib6_init;
c35b7e72 2975
433d49c3
DL
2976 ret = fib6_rules_init();
2977 if (ret)
2978 goto xfrm6_init;
7e5449c2 2979
433d49c3 2980 ret = -ENOBUFS;
c7ac8679
GR
2981 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
2982 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
2983 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
433d49c3 2984 goto fib6_rules_init;
c127ea2c 2985
8ed67789 2986 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
2987 if (ret)
2988 goto fib6_rules_init;
8ed67789 2989
433d49c3
DL
2990out:
2991 return ret;
2992
2993fib6_rules_init:
433d49c3
DL
2994 fib6_rules_cleanup();
2995xfrm6_init:
433d49c3 2996 xfrm6_fini();
433d49c3 2997out_fib6_init:
433d49c3 2998 fib6_gc_cleanup();
8ed67789
DL
2999out_register_subsys:
3000 unregister_pernet_subsys(&ip6_route_net_ops);
fc66f95c
ED
3001out_dst_entries:
3002 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3003out_kmem_cache:
f2fc6a54 3004 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3005 goto out;
1da177e4
LT
3006}
3007
3008void ip6_route_cleanup(void)
3009{
8ed67789 3010 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 3011 fib6_rules_cleanup();
1da177e4 3012 xfrm6_fini();
1da177e4 3013 fib6_gc_cleanup();
8ed67789 3014 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3015 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3016 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3017}
This page took 0.924561 seconds and 5 git commands to generate.