net: remove ipv6_addr_copy()
[deliverable/linux.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4 28#include <linux/errno.h>
bc3b2d7f 29#include <linux/export.h>
1da177e4
LT
30#include <linux/types.h>
31#include <linux/times.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/route.h>
36#include <linux/netdevice.h>
37#include <linux/in6.h>
7bc570c8 38#include <linux/mroute6.h>
1da177e4 39#include <linux/init.h>
1da177e4 40#include <linux/if_arp.h>
1da177e4
LT
41#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
5b7c931d 43#include <linux/nsproxy.h>
5a0e3ad6 44#include <linux/slab.h>
457c4cbc 45#include <net/net_namespace.h>
1da177e4
LT
46#include <net/snmp.h>
47#include <net/ipv6.h>
48#include <net/ip6_fib.h>
49#include <net/ip6_route.h>
50#include <net/ndisc.h>
51#include <net/addrconf.h>
52#include <net/tcp.h>
53#include <linux/rtnetlink.h>
54#include <net/dst.h>
55#include <net/xfrm.h>
8d71740c 56#include <net/netevent.h>
21713ebc 57#include <net/netlink.h>
1da177e4
LT
58
59#include <asm/uaccess.h>
60
61#ifdef CONFIG_SYSCTL
62#include <linux/sysctl.h>
63#endif
64
65/* Set to 3 to get tracing. */
66#define RT6_DEBUG 2
67
68#if RT6_DEBUG >= 3
69#define RDBG(x) printk x
70#define RT6_TRACE(x...) printk(KERN_DEBUG x)
71#else
72#define RDBG(x)
73#define RT6_TRACE(x...) do { ; } while (0)
74#endif
75
21efcfa0
ED
76static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
77 const struct in6_addr *dest);
1da177e4 78static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 79static unsigned int ip6_default_advmss(const struct dst_entry *dst);
d33e4553 80static unsigned int ip6_default_mtu(const struct dst_entry *dst);
1da177e4
LT
81static struct dst_entry *ip6_negative_advice(struct dst_entry *);
82static void ip6_dst_destroy(struct dst_entry *);
83static void ip6_dst_ifdown(struct dst_entry *,
84 struct net_device *dev, int how);
569d3645 85static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
86
87static int ip6_pkt_discard(struct sk_buff *skb);
88static int ip6_pkt_discard_out(struct sk_buff *skb);
89static void ip6_link_failure(struct sk_buff *skb);
90static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
91
70ceb4f5 92#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 93static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
94 const struct in6_addr *prefix, int prefixlen,
95 const struct in6_addr *gwaddr, int ifindex,
70ceb4f5 96 unsigned pref);
efa2cea0 97static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
98 const struct in6_addr *prefix, int prefixlen,
99 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
100#endif
101
06582540
DM
102static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
103{
104 struct rt6_info *rt = (struct rt6_info *) dst;
105 struct inet_peer *peer;
106 u32 *p = NULL;
107
8e2ec639
YZ
108 if (!(rt->dst.flags & DST_HOST))
109 return NULL;
110
06582540
DM
111 if (!rt->rt6i_peer)
112 rt6_bind_peer(rt, 1);
113
114 peer = rt->rt6i_peer;
115 if (peer) {
116 u32 *old_p = __DST_METRICS_PTR(old);
117 unsigned long prev, new;
118
119 p = peer->metrics;
120 if (inet_metrics_new(peer))
121 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
122
123 new = (unsigned long) p;
124 prev = cmpxchg(&dst->_metrics, old, new);
125
126 if (prev != old) {
127 p = __DST_METRICS_PTR(prev);
128 if (prev & DST_METRICS_READ_ONLY)
129 p = NULL;
130 }
131 }
132 return p;
133}
134
d3aaeb38
DM
135static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
136{
137 return __neigh_lookup_errno(&nd_tbl, daddr, dst->dev);
138}
139
9a7ec3a9 140static struct dst_ops ip6_dst_ops_template = {
1da177e4 141 .family = AF_INET6,
09640e63 142 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
143 .gc = ip6_dst_gc,
144 .gc_thresh = 1024,
145 .check = ip6_dst_check,
0dbaee3b 146 .default_advmss = ip6_default_advmss,
d33e4553 147 .default_mtu = ip6_default_mtu,
06582540 148 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
149 .destroy = ip6_dst_destroy,
150 .ifdown = ip6_dst_ifdown,
151 .negative_advice = ip6_negative_advice,
152 .link_failure = ip6_link_failure,
153 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 154 .local_out = __ip6_local_out,
d3aaeb38 155 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
156};
157
ec831ea7
RD
158static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
159{
160 return 0;
161}
162
14e50e57
DM
163static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
164{
165}
166
0972ddb2
HB
167static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
168 unsigned long old)
169{
170 return NULL;
171}
172
14e50e57
DM
173static struct dst_ops ip6_dst_blackhole_ops = {
174 .family = AF_INET6,
09640e63 175 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
176 .destroy = ip6_dst_destroy,
177 .check = ip6_dst_check,
ec831ea7 178 .default_mtu = ip6_blackhole_default_mtu,
214f45c9 179 .default_advmss = ip6_default_advmss,
14e50e57 180 .update_pmtu = ip6_rt_blackhole_update_pmtu,
0972ddb2 181 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 182 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
183};
184
62fa8a84
DM
185static const u32 ip6_template_metrics[RTAX_MAX] = {
186 [RTAX_HOPLIMIT - 1] = 255,
187};
188
bdb3289f 189static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
190 .dst = {
191 .__refcnt = ATOMIC_INIT(1),
192 .__use = 1,
193 .obsolete = -1,
194 .error = -ENETUNREACH,
d8d1f30b
CG
195 .input = ip6_pkt_discard,
196 .output = ip6_pkt_discard_out,
1da177e4
LT
197 },
198 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 199 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
200 .rt6i_metric = ~(u32) 0,
201 .rt6i_ref = ATOMIC_INIT(1),
202};
203
101367c2
TG
204#ifdef CONFIG_IPV6_MULTIPLE_TABLES
205
6723ab54
DM
206static int ip6_pkt_prohibit(struct sk_buff *skb);
207static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 208
280a34c8 209static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
210 .dst = {
211 .__refcnt = ATOMIC_INIT(1),
212 .__use = 1,
213 .obsolete = -1,
214 .error = -EACCES,
d8d1f30b
CG
215 .input = ip6_pkt_prohibit,
216 .output = ip6_pkt_prohibit_out,
101367c2
TG
217 },
218 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 219 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
220 .rt6i_metric = ~(u32) 0,
221 .rt6i_ref = ATOMIC_INIT(1),
222};
223
bdb3289f 224static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
225 .dst = {
226 .__refcnt = ATOMIC_INIT(1),
227 .__use = 1,
228 .obsolete = -1,
229 .error = -EINVAL,
d8d1f30b
CG
230 .input = dst_discard,
231 .output = dst_discard,
101367c2
TG
232 },
233 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 234 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
235 .rt6i_metric = ~(u32) 0,
236 .rt6i_ref = ATOMIC_INIT(1),
237};
238
239#endif
240
1da177e4 241/* allocate dst with ip6_dst_ops */
5c1e6aa3 242static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
957c665f
DM
243 struct net_device *dev,
244 int flags)
1da177e4 245{
957c665f 246 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
cf911662 247
fbe58186
MB
248 if (rt != NULL)
249 memset(&rt->rt6i_table, 0,
250 sizeof(*rt) - sizeof(struct dst_entry));
cf911662
DM
251
252 return rt;
1da177e4
LT
253}
254
255static void ip6_dst_destroy(struct dst_entry *dst)
256{
257 struct rt6_info *rt = (struct rt6_info *)dst;
258 struct inet6_dev *idev = rt->rt6i_idev;
b3419363 259 struct inet_peer *peer = rt->rt6i_peer;
1da177e4 260
8e2ec639
YZ
261 if (!(rt->dst.flags & DST_HOST))
262 dst_destroy_metrics_generic(dst);
263
1da177e4
LT
264 if (idev != NULL) {
265 rt->rt6i_idev = NULL;
266 in6_dev_put(idev);
1ab1457c 267 }
b3419363 268 if (peer) {
b3419363
DM
269 rt->rt6i_peer = NULL;
270 inet_putpeer(peer);
271 }
272}
273
6431cbc2
DM
274static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
275
276static u32 rt6_peer_genid(void)
277{
278 return atomic_read(&__rt6_peer_genid);
279}
280
b3419363
DM
281void rt6_bind_peer(struct rt6_info *rt, int create)
282{
283 struct inet_peer *peer;
284
b3419363
DM
285 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
286 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
287 inet_putpeer(peer);
6431cbc2
DM
288 else
289 rt->rt6i_peer_genid = rt6_peer_genid();
1da177e4
LT
290}
291
292static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
293 int how)
294{
295 struct rt6_info *rt = (struct rt6_info *)dst;
296 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 297 struct net_device *loopback_dev =
c346dca1 298 dev_net(dev)->loopback_dev;
1da177e4 299
5a3e55d6
DL
300 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
301 struct inet6_dev *loopback_idev =
302 in6_dev_get(loopback_dev);
1da177e4
LT
303 if (loopback_idev != NULL) {
304 rt->rt6i_idev = loopback_idev;
305 in6_dev_put(idev);
306 }
307 }
308}
309
310static __inline__ int rt6_check_expired(const struct rt6_info *rt)
311{
a02cec21
ED
312 return (rt->rt6i_flags & RTF_EXPIRES) &&
313 time_after(jiffies, rt->rt6i_expires);
1da177e4
LT
314}
315
b71d1d42 316static inline int rt6_need_strict(const struct in6_addr *daddr)
c71099ac 317{
a02cec21
ED
318 return ipv6_addr_type(daddr) &
319 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
320}
321
1da177e4 322/*
c71099ac 323 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
324 */
325
8ed67789
DL
326static inline struct rt6_info *rt6_device_match(struct net *net,
327 struct rt6_info *rt,
b71d1d42 328 const struct in6_addr *saddr,
1da177e4 329 int oif,
d420895e 330 int flags)
1da177e4
LT
331{
332 struct rt6_info *local = NULL;
333 struct rt6_info *sprt;
334
dd3abc4e
YH
335 if (!oif && ipv6_addr_any(saddr))
336 goto out;
337
d8d1f30b 338 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
dd3abc4e
YH
339 struct net_device *dev = sprt->rt6i_dev;
340
341 if (oif) {
1da177e4
LT
342 if (dev->ifindex == oif)
343 return sprt;
344 if (dev->flags & IFF_LOOPBACK) {
345 if (sprt->rt6i_idev == NULL ||
346 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 347 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 348 continue;
1ab1457c 349 if (local && (!oif ||
1da177e4
LT
350 local->rt6i_idev->dev->ifindex == oif))
351 continue;
352 }
353 local = sprt;
354 }
dd3abc4e
YH
355 } else {
356 if (ipv6_chk_addr(net, saddr, dev,
357 flags & RT6_LOOKUP_F_IFACE))
358 return sprt;
1da177e4 359 }
dd3abc4e 360 }
1da177e4 361
dd3abc4e 362 if (oif) {
1da177e4
LT
363 if (local)
364 return local;
365
d420895e 366 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 367 return net->ipv6.ip6_null_entry;
1da177e4 368 }
dd3abc4e 369out:
1da177e4
LT
370 return rt;
371}
372
27097255
YH
373#ifdef CONFIG_IPV6_ROUTER_PREF
374static void rt6_probe(struct rt6_info *rt)
375{
f2c31e32 376 struct neighbour *neigh;
27097255
YH
377 /*
378 * Okay, this does not seem to be appropriate
379 * for now, however, we need to check if it
380 * is really so; aka Router Reachability Probing.
381 *
382 * Router Reachability Probe MUST be rate-limited
383 * to no more than one per minute.
384 */
f2c31e32
ED
385 rcu_read_lock();
386 neigh = rt ? dst_get_neighbour(&rt->dst) : NULL;
27097255 387 if (!neigh || (neigh->nud_state & NUD_VALID))
f2c31e32 388 goto out;
27097255
YH
389 read_lock_bh(&neigh->lock);
390 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 391 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
392 struct in6_addr mcaddr;
393 struct in6_addr *target;
394
395 neigh->updated = jiffies;
396 read_unlock_bh(&neigh->lock);
397
398 target = (struct in6_addr *)&neigh->primary_key;
399 addrconf_addr_solict_mult(target, &mcaddr);
400 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
f2c31e32 401 } else {
27097255 402 read_unlock_bh(&neigh->lock);
f2c31e32
ED
403 }
404out:
405 rcu_read_unlock();
27097255
YH
406}
407#else
408static inline void rt6_probe(struct rt6_info *rt)
409{
27097255
YH
410}
411#endif
412
1da177e4 413/*
554cfb7e 414 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 415 */
b6f99a21 416static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e
YH
417{
418 struct net_device *dev = rt->rt6i_dev;
161980f4 419 if (!oif || dev->ifindex == oif)
554cfb7e 420 return 2;
161980f4
DM
421 if ((dev->flags & IFF_LOOPBACK) &&
422 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
423 return 1;
424 return 0;
554cfb7e 425}
1da177e4 426
b6f99a21 427static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 428{
f2c31e32 429 struct neighbour *neigh;
398bcbeb 430 int m;
f2c31e32
ED
431
432 rcu_read_lock();
433 neigh = dst_get_neighbour(&rt->dst);
4d0c5911
YH
434 if (rt->rt6i_flags & RTF_NONEXTHOP ||
435 !(rt->rt6i_flags & RTF_GATEWAY))
436 m = 1;
437 else if (neigh) {
554cfb7e
YH
438 read_lock_bh(&neigh->lock);
439 if (neigh->nud_state & NUD_VALID)
4d0c5911 440 m = 2;
398bcbeb
YH
441#ifdef CONFIG_IPV6_ROUTER_PREF
442 else if (neigh->nud_state & NUD_FAILED)
443 m = 0;
444#endif
445 else
ea73ee23 446 m = 1;
554cfb7e 447 read_unlock_bh(&neigh->lock);
398bcbeb
YH
448 } else
449 m = 0;
f2c31e32 450 rcu_read_unlock();
554cfb7e 451 return m;
1da177e4
LT
452}
453
554cfb7e
YH
454static int rt6_score_route(struct rt6_info *rt, int oif,
455 int strict)
1da177e4 456{
4d0c5911 457 int m, n;
1ab1457c 458
4d0c5911 459 m = rt6_check_dev(rt, oif);
77d16f45 460 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 461 return -1;
ebacaaa0
YH
462#ifdef CONFIG_IPV6_ROUTER_PREF
463 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
464#endif
4d0c5911 465 n = rt6_check_neigh(rt);
557e92ef 466 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
467 return -1;
468 return m;
469}
470
f11e6659
DM
471static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
472 int *mpri, struct rt6_info *match)
554cfb7e 473{
f11e6659
DM
474 int m;
475
476 if (rt6_check_expired(rt))
477 goto out;
478
479 m = rt6_score_route(rt, oif, strict);
480 if (m < 0)
481 goto out;
482
483 if (m > *mpri) {
484 if (strict & RT6_LOOKUP_F_REACHABLE)
485 rt6_probe(match);
486 *mpri = m;
487 match = rt;
488 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
489 rt6_probe(rt);
490 }
491
492out:
493 return match;
494}
495
496static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
497 struct rt6_info *rr_head,
498 u32 metric, int oif, int strict)
499{
500 struct rt6_info *rt, *match;
554cfb7e 501 int mpri = -1;
1da177e4 502
f11e6659
DM
503 match = NULL;
504 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 505 rt = rt->dst.rt6_next)
f11e6659
DM
506 match = find_match(rt, oif, strict, &mpri, match);
507 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 508 rt = rt->dst.rt6_next)
f11e6659 509 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 510
f11e6659
DM
511 return match;
512}
1da177e4 513
f11e6659
DM
514static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
515{
516 struct rt6_info *match, *rt0;
8ed67789 517 struct net *net;
1da177e4 518
f11e6659 519 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
0dc47877 520 __func__, fn->leaf, oif);
554cfb7e 521
f11e6659
DM
522 rt0 = fn->rr_ptr;
523 if (!rt0)
524 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 525
f11e6659 526 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 527
554cfb7e 528 if (!match &&
f11e6659 529 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 530 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 531
554cfb7e 532 /* no entries matched; do round-robin */
f11e6659
DM
533 if (!next || next->rt6i_metric != rt0->rt6i_metric)
534 next = fn->leaf;
535
536 if (next != rt0)
537 fn->rr_ptr = next;
1da177e4 538 }
1da177e4 539
f11e6659 540 RT6_TRACE("%s() => %p\n",
0dc47877 541 __func__, match);
1da177e4 542
c346dca1 543 net = dev_net(rt0->rt6i_dev);
a02cec21 544 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
545}
546
70ceb4f5
YH
547#ifdef CONFIG_IPV6_ROUTE_INFO
548int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 549 const struct in6_addr *gwaddr)
70ceb4f5 550{
c346dca1 551 struct net *net = dev_net(dev);
70ceb4f5
YH
552 struct route_info *rinfo = (struct route_info *) opt;
553 struct in6_addr prefix_buf, *prefix;
554 unsigned int pref;
4bed72e4 555 unsigned long lifetime;
70ceb4f5
YH
556 struct rt6_info *rt;
557
558 if (len < sizeof(struct route_info)) {
559 return -EINVAL;
560 }
561
562 /* Sanity check for prefix_len and length */
563 if (rinfo->length > 3) {
564 return -EINVAL;
565 } else if (rinfo->prefix_len > 128) {
566 return -EINVAL;
567 } else if (rinfo->prefix_len > 64) {
568 if (rinfo->length < 2) {
569 return -EINVAL;
570 }
571 } else if (rinfo->prefix_len > 0) {
572 if (rinfo->length < 1) {
573 return -EINVAL;
574 }
575 }
576
577 pref = rinfo->route_pref;
578 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 579 return -EINVAL;
70ceb4f5 580
4bed72e4 581 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
582
583 if (rinfo->length == 3)
584 prefix = (struct in6_addr *)rinfo->prefix;
585 else {
586 /* this function is safe */
587 ipv6_addr_prefix(&prefix_buf,
588 (struct in6_addr *)rinfo->prefix,
589 rinfo->prefix_len);
590 prefix = &prefix_buf;
591 }
592
efa2cea0
DL
593 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
594 dev->ifindex);
70ceb4f5
YH
595
596 if (rt && !lifetime) {
e0a1ad73 597 ip6_del_rt(rt);
70ceb4f5
YH
598 rt = NULL;
599 }
600
601 if (!rt && lifetime)
efa2cea0 602 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
603 pref);
604 else if (rt)
605 rt->rt6i_flags = RTF_ROUTEINFO |
606 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
607
608 if (rt) {
4bed72e4 609 if (!addrconf_finite_timeout(lifetime)) {
70ceb4f5
YH
610 rt->rt6i_flags &= ~RTF_EXPIRES;
611 } else {
612 rt->rt6i_expires = jiffies + HZ * lifetime;
613 rt->rt6i_flags |= RTF_EXPIRES;
614 }
d8d1f30b 615 dst_release(&rt->dst);
70ceb4f5
YH
616 }
617 return 0;
618}
619#endif
620
8ed67789 621#define BACKTRACK(__net, saddr) \
982f56f3 622do { \
8ed67789 623 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 624 struct fib6_node *pn; \
e0eda7bb 625 while (1) { \
982f56f3
YH
626 if (fn->fn_flags & RTN_TL_ROOT) \
627 goto out; \
628 pn = fn->parent; \
629 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 630 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
631 else \
632 fn = pn; \
633 if (fn->fn_flags & RTN_RTINFO) \
634 goto restart; \
c71099ac 635 } \
c71099ac 636 } \
982f56f3 637} while(0)
c71099ac 638
8ed67789
DL
639static struct rt6_info *ip6_pol_route_lookup(struct net *net,
640 struct fib6_table *table,
4c9483b2 641 struct flowi6 *fl6, int flags)
1da177e4
LT
642{
643 struct fib6_node *fn;
644 struct rt6_info *rt;
645
c71099ac 646 read_lock_bh(&table->tb6_lock);
4c9483b2 647 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
648restart:
649 rt = fn->leaf;
4c9483b2
DM
650 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
651 BACKTRACK(net, &fl6->saddr);
c71099ac 652out:
d8d1f30b 653 dst_use(&rt->dst, jiffies);
c71099ac 654 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
655 return rt;
656
657}
658
9acd9f3a
YH
659struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
660 const struct in6_addr *saddr, int oif, int strict)
c71099ac 661{
4c9483b2
DM
662 struct flowi6 fl6 = {
663 .flowi6_oif = oif,
664 .daddr = *daddr,
c71099ac
TG
665 };
666 struct dst_entry *dst;
77d16f45 667 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 668
adaa70bb 669 if (saddr) {
4c9483b2 670 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
671 flags |= RT6_LOOKUP_F_HAS_SADDR;
672 }
673
4c9483b2 674 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
675 if (dst->error == 0)
676 return (struct rt6_info *) dst;
677
678 dst_release(dst);
679
1da177e4
LT
680 return NULL;
681}
682
7159039a
YH
683EXPORT_SYMBOL(rt6_lookup);
684
c71099ac 685/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
686 It takes new route entry, the addition fails by any reason the
687 route is freed. In any case, if caller does not hold it, it may
688 be destroyed.
689 */
690
86872cb5 691static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
692{
693 int err;
c71099ac 694 struct fib6_table *table;
1da177e4 695
c71099ac
TG
696 table = rt->rt6i_table;
697 write_lock_bh(&table->tb6_lock);
86872cb5 698 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 699 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
700
701 return err;
702}
703
40e22e8f
TG
704int ip6_ins_rt(struct rt6_info *rt)
705{
4d1169c1 706 struct nl_info info = {
c346dca1 707 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 708 };
528c4ceb 709 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
710}
711
21efcfa0
ED
712static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
713 const struct in6_addr *daddr,
b71d1d42 714 const struct in6_addr *saddr)
1da177e4 715{
1da177e4
LT
716 struct rt6_info *rt;
717
718 /*
719 * Clone the route.
720 */
721
21efcfa0 722 rt = ip6_rt_copy(ort, daddr);
1da177e4
LT
723
724 if (rt) {
14deae41
DM
725 struct neighbour *neigh;
726 int attempts = !in_softirq();
727
58c4fb86
YH
728 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
729 if (rt->rt6i_dst.plen != 128 &&
21efcfa0 730 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
58c4fb86 731 rt->rt6i_flags |= RTF_ANYCAST;
4e3fd7a0 732 rt->rt6i_gateway = *daddr;
58c4fb86 733 }
1da177e4 734
1da177e4 735 rt->rt6i_flags |= RTF_CACHE;
1da177e4
LT
736
737#ifdef CONFIG_IPV6_SUBTREES
738 if (rt->rt6i_src.plen && saddr) {
4e3fd7a0 739 rt->rt6i_src.addr = *saddr;
1da177e4
LT
740 rt->rt6i_src.plen = 128;
741 }
742#endif
743
14deae41
DM
744 retry:
745 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
746 if (IS_ERR(neigh)) {
747 struct net *net = dev_net(rt->rt6i_dev);
748 int saved_rt_min_interval =
749 net->ipv6.sysctl.ip6_rt_gc_min_interval;
750 int saved_rt_elasticity =
751 net->ipv6.sysctl.ip6_rt_gc_elasticity;
752
753 if (attempts-- > 0) {
754 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
755 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
756
86393e52 757 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
758
759 net->ipv6.sysctl.ip6_rt_gc_elasticity =
760 saved_rt_elasticity;
761 net->ipv6.sysctl.ip6_rt_gc_min_interval =
762 saved_rt_min_interval;
763 goto retry;
764 }
765
766 if (net_ratelimit())
767 printk(KERN_WARNING
7e1b33e5 768 "ipv6: Neighbour table overflow.\n");
d8d1f30b 769 dst_free(&rt->dst);
14deae41
DM
770 return NULL;
771 }
69cce1d1 772 dst_set_neighbour(&rt->dst, neigh);
1da177e4 773
95a9a5ba 774 }
1da177e4 775
95a9a5ba
YH
776 return rt;
777}
1da177e4 778
21efcfa0
ED
779static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
780 const struct in6_addr *daddr)
299d9939 781{
21efcfa0
ED
782 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
783
299d9939 784 if (rt) {
299d9939 785 rt->rt6i_flags |= RTF_CACHE;
f2c31e32 786 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst)));
299d9939
YH
787 }
788 return rt;
789}
790
8ed67789 791static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 792 struct flowi6 *fl6, int flags)
1da177e4
LT
793{
794 struct fib6_node *fn;
519fbd87 795 struct rt6_info *rt, *nrt;
c71099ac 796 int strict = 0;
1da177e4 797 int attempts = 3;
519fbd87 798 int err;
53b7997f 799 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 800
77d16f45 801 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
802
803relookup:
c71099ac 804 read_lock_bh(&table->tb6_lock);
1da177e4 805
8238dd06 806restart_2:
4c9483b2 807 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1da177e4
LT
808
809restart:
4acad72d 810 rt = rt6_select(fn, oif, strict | reachable);
8ed67789 811
4c9483b2 812 BACKTRACK(net, &fl6->saddr);
8ed67789 813 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 814 rt->rt6i_flags & RTF_CACHE)
1ddef044 815 goto out;
1da177e4 816
d8d1f30b 817 dst_hold(&rt->dst);
c71099ac 818 read_unlock_bh(&table->tb6_lock);
fb9de91e 819
f2c31e32 820 if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
4c9483b2 821 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
7343ff31 822 else if (!(rt->dst.flags & DST_HOST))
4c9483b2 823 nrt = rt6_alloc_clone(rt, &fl6->daddr);
7343ff31
DM
824 else
825 goto out2;
e40cf353 826
d8d1f30b 827 dst_release(&rt->dst);
8ed67789 828 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 829
d8d1f30b 830 dst_hold(&rt->dst);
519fbd87 831 if (nrt) {
40e22e8f 832 err = ip6_ins_rt(nrt);
519fbd87 833 if (!err)
1da177e4 834 goto out2;
1da177e4 835 }
1da177e4 836
519fbd87
YH
837 if (--attempts <= 0)
838 goto out2;
839
840 /*
c71099ac 841 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
842 * released someone could insert this route. Relookup.
843 */
d8d1f30b 844 dst_release(&rt->dst);
519fbd87
YH
845 goto relookup;
846
847out:
8238dd06
YH
848 if (reachable) {
849 reachable = 0;
850 goto restart_2;
851 }
d8d1f30b 852 dst_hold(&rt->dst);
c71099ac 853 read_unlock_bh(&table->tb6_lock);
1da177e4 854out2:
d8d1f30b
CG
855 rt->dst.lastuse = jiffies;
856 rt->dst.__use++;
c71099ac
TG
857
858 return rt;
1da177e4
LT
859}
860
8ed67789 861static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 862 struct flowi6 *fl6, int flags)
4acad72d 863{
4c9483b2 864 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
865}
866
c71099ac
TG
867void ip6_route_input(struct sk_buff *skb)
868{
b71d1d42 869 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 870 struct net *net = dev_net(skb->dev);
adaa70bb 871 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
872 struct flowi6 fl6 = {
873 .flowi6_iif = skb->dev->ifindex,
874 .daddr = iph->daddr,
875 .saddr = iph->saddr,
876 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
877 .flowi6_mark = skb->mark,
878 .flowi6_proto = iph->nexthdr,
c71099ac 879 };
adaa70bb 880
1d6e55f1 881 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
adaa70bb 882 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 883
4c9483b2 884 skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
c71099ac
TG
885}
886
8ed67789 887static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 888 struct flowi6 *fl6, int flags)
1da177e4 889{
4c9483b2 890 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
891}
892
9c7a4f9c 893struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 894 struct flowi6 *fl6)
c71099ac
TG
895{
896 int flags = 0;
897
4c9483b2 898 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 899 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 900
4c9483b2 901 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 902 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
903 else if (sk)
904 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 905
4c9483b2 906 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4
LT
907}
908
7159039a 909EXPORT_SYMBOL(ip6_route_output);
1da177e4 910
2774c131 911struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 912{
5c1e6aa3 913 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
914 struct dst_entry *new = NULL;
915
5c1e6aa3 916 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
14e50e57 917 if (rt) {
cf911662
DM
918 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
919
d8d1f30b 920 new = &rt->dst;
14e50e57 921
14e50e57 922 new->__use = 1;
352e512c
HX
923 new->input = dst_discard;
924 new->output = dst_discard;
14e50e57 925
21efcfa0
ED
926 if (dst_metrics_read_only(&ort->dst))
927 new->_metrics = ort->dst._metrics;
928 else
929 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
930 rt->rt6i_idev = ort->rt6i_idev;
931 if (rt->rt6i_idev)
932 in6_dev_hold(rt->rt6i_idev);
933 rt->rt6i_expires = 0;
934
4e3fd7a0 935 rt->rt6i_gateway = ort->rt6i_gateway;
14e50e57
DM
936 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
937 rt->rt6i_metric = 0;
938
939 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
940#ifdef CONFIG_IPV6_SUBTREES
941 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
942#endif
943
944 dst_free(new);
945 }
946
69ead7af
DM
947 dst_release(dst_orig);
948 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 949}
14e50e57 950
1da177e4
LT
951/*
952 * Destination cache support functions
953 */
954
955static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
956{
957 struct rt6_info *rt;
958
959 rt = (struct rt6_info *) dst;
960
6431cbc2
DM
961 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
962 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
963 if (!rt->rt6i_peer)
964 rt6_bind_peer(rt, 0);
965 rt->rt6i_peer_genid = rt6_peer_genid();
966 }
1da177e4 967 return dst;
6431cbc2 968 }
1da177e4
LT
969 return NULL;
970}
971
972static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
973{
974 struct rt6_info *rt = (struct rt6_info *) dst;
975
976 if (rt) {
54c1a859
YH
977 if (rt->rt6i_flags & RTF_CACHE) {
978 if (rt6_check_expired(rt)) {
979 ip6_del_rt(rt);
980 dst = NULL;
981 }
982 } else {
1da177e4 983 dst_release(dst);
54c1a859
YH
984 dst = NULL;
985 }
1da177e4 986 }
54c1a859 987 return dst;
1da177e4
LT
988}
989
990static void ip6_link_failure(struct sk_buff *skb)
991{
992 struct rt6_info *rt;
993
3ffe533c 994 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 995
adf30907 996 rt = (struct rt6_info *) skb_dst(skb);
1da177e4
LT
997 if (rt) {
998 if (rt->rt6i_flags&RTF_CACHE) {
d8d1f30b 999 dst_set_expires(&rt->dst, 0);
1da177e4
LT
1000 rt->rt6i_flags |= RTF_EXPIRES;
1001 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1002 rt->rt6i_node->fn_sernum = -1;
1003 }
1004}
1005
1006static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1007{
1008 struct rt6_info *rt6 = (struct rt6_info*)dst;
1009
1010 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1011 rt6->rt6i_flags |= RTF_MODIFIED;
1012 if (mtu < IPV6_MIN_MTU) {
defb3519 1013 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 1014 mtu = IPV6_MIN_MTU;
defb3519
DM
1015 features |= RTAX_FEATURE_ALLFRAG;
1016 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 1017 }
defb3519 1018 dst_metric_set(dst, RTAX_MTU, mtu);
1da177e4
LT
1019 }
1020}
1021
0dbaee3b 1022static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1023{
0dbaee3b
DM
1024 struct net_device *dev = dst->dev;
1025 unsigned int mtu = dst_mtu(dst);
1026 struct net *net = dev_net(dev);
1027
1da177e4
LT
1028 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1029
5578689a
DL
1030 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1031 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1032
1033 /*
1ab1457c
YH
1034 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1035 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1036 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1037 * rely only on pmtu discovery"
1038 */
1039 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1040 mtu = IPV6_MAXPLEN;
1041 return mtu;
1042}
1043
d33e4553
DM
1044static unsigned int ip6_default_mtu(const struct dst_entry *dst)
1045{
1046 unsigned int mtu = IPV6_MIN_MTU;
1047 struct inet6_dev *idev;
1048
1049 rcu_read_lock();
1050 idev = __in6_dev_get(dst->dev);
1051 if (idev)
1052 mtu = idev->cnf.mtu6;
1053 rcu_read_unlock();
1054
1055 return mtu;
1056}
1057
3b00944c
YH
1058static struct dst_entry *icmp6_dst_gc_list;
1059static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1060
3b00944c 1061struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1062 struct neighbour *neigh,
9acd9f3a 1063 const struct in6_addr *addr)
1da177e4
LT
1064{
1065 struct rt6_info *rt;
1066 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1067 struct net *net = dev_net(dev);
1da177e4
LT
1068
1069 if (unlikely(idev == NULL))
1070 return NULL;
1071
957c665f 1072 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
1da177e4
LT
1073 if (unlikely(rt == NULL)) {
1074 in6_dev_put(idev);
1075 goto out;
1076 }
1077
1da177e4
LT
1078 if (neigh)
1079 neigh_hold(neigh);
14deae41 1080 else {
1da177e4 1081 neigh = ndisc_get_neigh(dev, addr);
14deae41
DM
1082 if (IS_ERR(neigh))
1083 neigh = NULL;
1084 }
1da177e4 1085
8e2ec639
YZ
1086 rt->dst.flags |= DST_HOST;
1087 rt->dst.output = ip6_output;
69cce1d1 1088 dst_set_neighbour(&rt->dst, neigh);
d8d1f30b 1089 atomic_set(&rt->dst.__refcnt, 1);
4e3fd7a0 1090 rt->rt6i_dst.addr = *addr;
8e2ec639
YZ
1091 rt->rt6i_dst.plen = 128;
1092 rt->rt6i_idev = idev;
7011687f 1093 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1da177e4 1094
3b00944c 1095 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1096 rt->dst.next = icmp6_dst_gc_list;
1097 icmp6_dst_gc_list = &rt->dst;
3b00944c 1098 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1099
5578689a 1100 fib6_force_start_gc(net);
1da177e4
LT
1101
1102out:
d8d1f30b 1103 return &rt->dst;
1da177e4
LT
1104}
1105
3d0f24a7 1106int icmp6_dst_gc(void)
1da177e4 1107{
e9476e95 1108 struct dst_entry *dst, **pprev;
3d0f24a7 1109 int more = 0;
1da177e4 1110
3b00944c
YH
1111 spin_lock_bh(&icmp6_dst_lock);
1112 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1113
1da177e4
LT
1114 while ((dst = *pprev) != NULL) {
1115 if (!atomic_read(&dst->__refcnt)) {
1116 *pprev = dst->next;
1117 dst_free(dst);
1da177e4
LT
1118 } else {
1119 pprev = &dst->next;
3d0f24a7 1120 ++more;
1da177e4
LT
1121 }
1122 }
1123
3b00944c 1124 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1125
3d0f24a7 1126 return more;
1da177e4
LT
1127}
1128
1e493d19
DM
1129static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1130 void *arg)
1131{
1132 struct dst_entry *dst, **pprev;
1133
1134 spin_lock_bh(&icmp6_dst_lock);
1135 pprev = &icmp6_dst_gc_list;
1136 while ((dst = *pprev) != NULL) {
1137 struct rt6_info *rt = (struct rt6_info *) dst;
1138 if (func(rt, arg)) {
1139 *pprev = dst->next;
1140 dst_free(dst);
1141 } else {
1142 pprev = &dst->next;
1143 }
1144 }
1145 spin_unlock_bh(&icmp6_dst_lock);
1146}
1147
569d3645 1148static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1149{
1da177e4 1150 unsigned long now = jiffies;
86393e52 1151 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1152 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1153 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1154 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1155 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1156 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1157 int entries;
7019b78e 1158
fc66f95c 1159 entries = dst_entries_get_fast(ops);
7019b78e 1160 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1161 entries <= rt_max_size)
1da177e4
LT
1162 goto out;
1163
6891a346
BT
1164 net->ipv6.ip6_rt_gc_expire++;
1165 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1166 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1167 entries = dst_entries_get_slow(ops);
1168 if (entries < ops->gc_thresh)
7019b78e 1169 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1170out:
7019b78e 1171 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1172 return entries > rt_max_size;
1da177e4
LT
1173}
1174
1175/* Clean host part of a prefix. Not necessary in radix tree,
1176 but results in cleaner routing tables.
1177
1178 Remove it only when all the things will work!
1179 */
1180
6b75d090 1181int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1182{
5170ae82 1183 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1184 if (hoplimit == 0) {
6b75d090 1185 struct net_device *dev = dst->dev;
c68f24cc
ED
1186 struct inet6_dev *idev;
1187
1188 rcu_read_lock();
1189 idev = __in6_dev_get(dev);
1190 if (idev)
6b75d090 1191 hoplimit = idev->cnf.hop_limit;
c68f24cc 1192 else
53b7997f 1193 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1194 rcu_read_unlock();
1da177e4
LT
1195 }
1196 return hoplimit;
1197}
abbf46ae 1198EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1199
1200/*
1201 *
1202 */
1203
86872cb5 1204int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1205{
1206 int err;
5578689a 1207 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1208 struct rt6_info *rt = NULL;
1209 struct net_device *dev = NULL;
1210 struct inet6_dev *idev = NULL;
c71099ac 1211 struct fib6_table *table;
1da177e4
LT
1212 int addr_type;
1213
86872cb5 1214 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1215 return -EINVAL;
1216#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1217 if (cfg->fc_src_len)
1da177e4
LT
1218 return -EINVAL;
1219#endif
86872cb5 1220 if (cfg->fc_ifindex) {
1da177e4 1221 err = -ENODEV;
5578689a 1222 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1223 if (!dev)
1224 goto out;
1225 idev = in6_dev_get(dev);
1226 if (!idev)
1227 goto out;
1228 }
1229
86872cb5
TG
1230 if (cfg->fc_metric == 0)
1231 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1232
d71314b4
MV
1233 err = -ENOBUFS;
1234 if (NULL != cfg->fc_nlinfo.nlh &&
1235 !(cfg->fc_nlinfo.nlh->nlmsg_flags&NLM_F_CREATE)) {
1236 table = fib6_get_table(net, cfg->fc_table);
1237 if (table == NULL) {
1238 printk(KERN_WARNING "IPv6: NLM_F_CREATE should be specified when creating new route\n");
1239 table = fib6_new_table(net, cfg->fc_table);
1240 }
1241 } else {
1242 table = fib6_new_table(net, cfg->fc_table);
1243 }
c71099ac 1244 if (table == NULL) {
c71099ac
TG
1245 goto out;
1246 }
1247
957c665f 1248 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1da177e4
LT
1249
1250 if (rt == NULL) {
1251 err = -ENOMEM;
1252 goto out;
1253 }
1254
d8d1f30b 1255 rt->dst.obsolete = -1;
6f704992
YH
1256 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1257 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1258 0;
1da177e4 1259
86872cb5
TG
1260 if (cfg->fc_protocol == RTPROT_UNSPEC)
1261 cfg->fc_protocol = RTPROT_BOOT;
1262 rt->rt6i_protocol = cfg->fc_protocol;
1263
1264 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1265
1266 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1267 rt->dst.input = ip6_mc_input;
ab79ad14
1268 else if (cfg->fc_flags & RTF_LOCAL)
1269 rt->dst.input = ip6_input;
1da177e4 1270 else
d8d1f30b 1271 rt->dst.input = ip6_forward;
1da177e4 1272
d8d1f30b 1273 rt->dst.output = ip6_output;
1da177e4 1274
86872cb5
TG
1275 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1276 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1277 if (rt->rt6i_dst.plen == 128)
11d53b49 1278 rt->dst.flags |= DST_HOST;
1da177e4 1279
8e2ec639
YZ
1280 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1281 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1282 if (!metrics) {
1283 err = -ENOMEM;
1284 goto out;
1285 }
1286 dst_init_metrics(&rt->dst, metrics, 0);
1287 }
1da177e4 1288#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1289 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1290 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1291#endif
1292
86872cb5 1293 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1294
1295 /* We cannot add true routes via loopback here,
1296 they would result in kernel looping; promote them to reject routes
1297 */
86872cb5 1298 if ((cfg->fc_flags & RTF_REJECT) ||
ab79ad14
1299 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1300 && !(cfg->fc_flags&RTF_LOCAL))) {
1da177e4 1301 /* hold loopback dev/idev if we haven't done so. */
5578689a 1302 if (dev != net->loopback_dev) {
1da177e4
LT
1303 if (dev) {
1304 dev_put(dev);
1305 in6_dev_put(idev);
1306 }
5578689a 1307 dev = net->loopback_dev;
1da177e4
LT
1308 dev_hold(dev);
1309 idev = in6_dev_get(dev);
1310 if (!idev) {
1311 err = -ENODEV;
1312 goto out;
1313 }
1314 }
d8d1f30b
CG
1315 rt->dst.output = ip6_pkt_discard_out;
1316 rt->dst.input = ip6_pkt_discard;
1317 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1318 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1319 goto install_route;
1320 }
1321
86872cb5 1322 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1323 const struct in6_addr *gw_addr;
1da177e4
LT
1324 int gwa_type;
1325
86872cb5 1326 gw_addr = &cfg->fc_gateway;
4e3fd7a0 1327 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1328 gwa_type = ipv6_addr_type(gw_addr);
1329
1330 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1331 struct rt6_info *grt;
1332
1333 /* IPv6 strictly inhibits using not link-local
1334 addresses as nexthop address.
1335 Otherwise, router will not able to send redirects.
1336 It is very good, but in some (rare!) circumstances
1337 (SIT, PtP, NBMA NOARP links) it is handy to allow
1338 some exceptions. --ANK
1339 */
1340 err = -EINVAL;
1341 if (!(gwa_type&IPV6_ADDR_UNICAST))
1342 goto out;
1343
5578689a 1344 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1345
1346 err = -EHOSTUNREACH;
1347 if (grt == NULL)
1348 goto out;
1349 if (dev) {
1350 if (dev != grt->rt6i_dev) {
d8d1f30b 1351 dst_release(&grt->dst);
1da177e4
LT
1352 goto out;
1353 }
1354 } else {
1355 dev = grt->rt6i_dev;
1356 idev = grt->rt6i_idev;
1357 dev_hold(dev);
1358 in6_dev_hold(grt->rt6i_idev);
1359 }
1360 if (!(grt->rt6i_flags&RTF_GATEWAY))
1361 err = 0;
d8d1f30b 1362 dst_release(&grt->dst);
1da177e4
LT
1363
1364 if (err)
1365 goto out;
1366 }
1367 err = -EINVAL;
1368 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1369 goto out;
1370 }
1371
1372 err = -ENODEV;
1373 if (dev == NULL)
1374 goto out;
1375
c3968a85
DW
1376 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1377 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1378 err = -EINVAL;
1379 goto out;
1380 }
4e3fd7a0 1381 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1382 rt->rt6i_prefsrc.plen = 128;
1383 } else
1384 rt->rt6i_prefsrc.plen = 0;
1385
86872cb5 1386 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
69cce1d1
DM
1387 struct neighbour *n = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1388 if (IS_ERR(n)) {
1389 err = PTR_ERR(n);
1da177e4
LT
1390 goto out;
1391 }
69cce1d1 1392 dst_set_neighbour(&rt->dst, n);
1da177e4
LT
1393 }
1394
86872cb5 1395 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1396
1397install_route:
86872cb5
TG
1398 if (cfg->fc_mx) {
1399 struct nlattr *nla;
1400 int remaining;
1401
1402 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1403 int type = nla_type(nla);
86872cb5
TG
1404
1405 if (type) {
1406 if (type > RTAX_MAX) {
1da177e4
LT
1407 err = -EINVAL;
1408 goto out;
1409 }
86872cb5 1410
defb3519 1411 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1412 }
1da177e4
LT
1413 }
1414 }
1415
d8d1f30b 1416 rt->dst.dev = dev;
1da177e4 1417 rt->rt6i_idev = idev;
c71099ac 1418 rt->rt6i_table = table;
63152fc0 1419
c346dca1 1420 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1421
86872cb5 1422 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1423
1424out:
1425 if (dev)
1426 dev_put(dev);
1427 if (idev)
1428 in6_dev_put(idev);
1429 if (rt)
d8d1f30b 1430 dst_free(&rt->dst);
1da177e4
LT
1431 return err;
1432}
1433
86872cb5 1434static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1435{
1436 int err;
c71099ac 1437 struct fib6_table *table;
c346dca1 1438 struct net *net = dev_net(rt->rt6i_dev);
1da177e4 1439
8ed67789 1440 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1441 return -ENOENT;
1442
c71099ac
TG
1443 table = rt->rt6i_table;
1444 write_lock_bh(&table->tb6_lock);
1da177e4 1445
86872cb5 1446 err = fib6_del(rt, info);
d8d1f30b 1447 dst_release(&rt->dst);
1da177e4 1448
c71099ac 1449 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1450
1451 return err;
1452}
1453
e0a1ad73
TG
1454int ip6_del_rt(struct rt6_info *rt)
1455{
4d1169c1 1456 struct nl_info info = {
c346dca1 1457 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 1458 };
528c4ceb 1459 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1460}
1461
86872cb5 1462static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1463{
c71099ac 1464 struct fib6_table *table;
1da177e4
LT
1465 struct fib6_node *fn;
1466 struct rt6_info *rt;
1467 int err = -ESRCH;
1468
5578689a 1469 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
c71099ac
TG
1470 if (table == NULL)
1471 return err;
1472
1473 read_lock_bh(&table->tb6_lock);
1da177e4 1474
c71099ac 1475 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1476 &cfg->fc_dst, cfg->fc_dst_len,
1477 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1478
1da177e4 1479 if (fn) {
d8d1f30b 1480 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1481 if (cfg->fc_ifindex &&
1da177e4 1482 (rt->rt6i_dev == NULL ||
86872cb5 1483 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1484 continue;
86872cb5
TG
1485 if (cfg->fc_flags & RTF_GATEWAY &&
1486 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1487 continue;
86872cb5 1488 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1489 continue;
d8d1f30b 1490 dst_hold(&rt->dst);
c71099ac 1491 read_unlock_bh(&table->tb6_lock);
1da177e4 1492
86872cb5 1493 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1494 }
1495 }
c71099ac 1496 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1497
1498 return err;
1499}
1500
1501/*
1502 * Handle redirects
1503 */
a6279458 1504struct ip6rd_flowi {
4c9483b2 1505 struct flowi6 fl6;
a6279458
YH
1506 struct in6_addr gateway;
1507};
1508
8ed67789
DL
1509static struct rt6_info *__ip6_route_redirect(struct net *net,
1510 struct fib6_table *table,
4c9483b2 1511 struct flowi6 *fl6,
a6279458 1512 int flags)
1da177e4 1513{
4c9483b2 1514 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
a6279458 1515 struct rt6_info *rt;
e843b9e1 1516 struct fib6_node *fn;
c71099ac 1517
1da177e4 1518 /*
e843b9e1
YH
1519 * Get the "current" route for this destination and
1520 * check if the redirect has come from approriate router.
1521 *
1522 * RFC 2461 specifies that redirects should only be
1523 * accepted if they come from the nexthop to the target.
1524 * Due to the way the routes are chosen, this notion
1525 * is a bit fuzzy and one might need to check all possible
1526 * routes.
1da177e4 1527 */
1da177e4 1528
c71099ac 1529 read_lock_bh(&table->tb6_lock);
4c9483b2 1530 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
e843b9e1 1531restart:
d8d1f30b 1532 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
e843b9e1
YH
1533 /*
1534 * Current route is on-link; redirect is always invalid.
1535 *
1536 * Seems, previous statement is not true. It could
1537 * be node, which looks for us as on-link (f.e. proxy ndisc)
1538 * But then router serving it might decide, that we should
1539 * know truth 8)8) --ANK (980726).
1540 */
1541 if (rt6_check_expired(rt))
1542 continue;
1543 if (!(rt->rt6i_flags & RTF_GATEWAY))
1544 continue;
4c9483b2 1545 if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
e843b9e1 1546 continue;
a6279458 1547 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1548 continue;
1549 break;
1550 }
a6279458 1551
cb15d9c2 1552 if (!rt)
8ed67789 1553 rt = net->ipv6.ip6_null_entry;
4c9483b2 1554 BACKTRACK(net, &fl6->saddr);
cb15d9c2 1555out:
d8d1f30b 1556 dst_hold(&rt->dst);
a6279458 1557
c71099ac 1558 read_unlock_bh(&table->tb6_lock);
e843b9e1 1559
a6279458
YH
1560 return rt;
1561};
1562
b71d1d42
ED
1563static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1564 const struct in6_addr *src,
1565 const struct in6_addr *gateway,
a6279458
YH
1566 struct net_device *dev)
1567{
adaa70bb 1568 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1569 struct net *net = dev_net(dev);
a6279458 1570 struct ip6rd_flowi rdfl = {
4c9483b2
DM
1571 .fl6 = {
1572 .flowi6_oif = dev->ifindex,
1573 .daddr = *dest,
1574 .saddr = *src,
a6279458 1575 },
a6279458 1576 };
adaa70bb 1577
4e3fd7a0 1578 rdfl.gateway = *gateway;
86c36ce4 1579
adaa70bb
TG
1580 if (rt6_need_strict(dest))
1581 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1582
4c9483b2 1583 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
58f09b78 1584 flags, __ip6_route_redirect);
a6279458
YH
1585}
1586
b71d1d42
ED
1587void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1588 const struct in6_addr *saddr,
a6279458
YH
1589 struct neighbour *neigh, u8 *lladdr, int on_link)
1590{
1591 struct rt6_info *rt, *nrt = NULL;
1592 struct netevent_redirect netevent;
c346dca1 1593 struct net *net = dev_net(neigh->dev);
a6279458
YH
1594
1595 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1596
8ed67789 1597 if (rt == net->ipv6.ip6_null_entry) {
1da177e4
LT
1598 if (net_ratelimit())
1599 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1600 "for redirect target\n");
a6279458 1601 goto out;
1da177e4
LT
1602 }
1603
1da177e4
LT
1604 /*
1605 * We have finally decided to accept it.
1606 */
1607
1ab1457c 1608 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1609 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1610 NEIGH_UPDATE_F_OVERRIDE|
1611 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1612 NEIGH_UPDATE_F_ISROUTER))
1613 );
1614
1615 /*
1616 * Redirect received -> path was valid.
1617 * Look, redirects are sent only in response to data packets,
1618 * so that this nexthop apparently is reachable. --ANK
1619 */
d8d1f30b 1620 dst_confirm(&rt->dst);
1da177e4
LT
1621
1622 /* Duplicate redirect: silently ignore. */
f2c31e32 1623 if (neigh == dst_get_neighbour_raw(&rt->dst))
1da177e4
LT
1624 goto out;
1625
21efcfa0 1626 nrt = ip6_rt_copy(rt, dest);
1da177e4
LT
1627 if (nrt == NULL)
1628 goto out;
1629
1630 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1631 if (on_link)
1632 nrt->rt6i_flags &= ~RTF_GATEWAY;
1633
4e3fd7a0 1634 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
69cce1d1 1635 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1da177e4 1636
40e22e8f 1637 if (ip6_ins_rt(nrt))
1da177e4
LT
1638 goto out;
1639
d8d1f30b
CG
1640 netevent.old = &rt->dst;
1641 netevent.new = &nrt->dst;
8d71740c
TT
1642 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1643
1da177e4 1644 if (rt->rt6i_flags&RTF_CACHE) {
e0a1ad73 1645 ip6_del_rt(rt);
1da177e4
LT
1646 return;
1647 }
1648
1649out:
d8d1f30b 1650 dst_release(&rt->dst);
1da177e4
LT
1651}
1652
1653/*
1654 * Handle ICMP "packet too big" messages
1655 * i.e. Path MTU discovery
1656 */
1657
b71d1d42 1658static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2 1659 struct net *net, u32 pmtu, int ifindex)
1da177e4
LT
1660{
1661 struct rt6_info *rt, *nrt;
1662 int allfrag = 0;
d3052b55 1663again:
ae878ae2 1664 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1da177e4
LT
1665 if (rt == NULL)
1666 return;
1667
d3052b55
AV
1668 if (rt6_check_expired(rt)) {
1669 ip6_del_rt(rt);
1670 goto again;
1671 }
1672
d8d1f30b 1673 if (pmtu >= dst_mtu(&rt->dst))
1da177e4
LT
1674 goto out;
1675
1676 if (pmtu < IPV6_MIN_MTU) {
1677 /*
1ab1457c 1678 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1679 * MTU (1280) and a fragment header should always be included
1680 * after a node receiving Too Big message reporting PMTU is
1681 * less than the IPv6 Minimum Link MTU.
1682 */
1683 pmtu = IPV6_MIN_MTU;
1684 allfrag = 1;
1685 }
1686
1687 /* New mtu received -> path was valid.
1688 They are sent only in response to data packets,
1689 so that this nexthop apparently is reachable. --ANK
1690 */
d8d1f30b 1691 dst_confirm(&rt->dst);
1da177e4
LT
1692
1693 /* Host route. If it is static, it would be better
1694 not to override it, but add new one, so that
1695 when cache entry will expire old pmtu
1696 would return automatically.
1697 */
1698 if (rt->rt6i_flags & RTF_CACHE) {
defb3519
DM
1699 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1700 if (allfrag) {
1701 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1702 features |= RTAX_FEATURE_ALLFRAG;
1703 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1704 }
d8d1f30b 1705 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1706 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1707 goto out;
1708 }
1709
1710 /* Network route.
1711 Two cases are possible:
1712 1. It is connected route. Action: COW
1713 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1714 */
f2c31e32 1715 if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1716 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1717 else
1718 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1719
d5315b50 1720 if (nrt) {
defb3519
DM
1721 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1722 if (allfrag) {
1723 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1724 features |= RTAX_FEATURE_ALLFRAG;
1725 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1726 }
a1e78363
YH
1727
1728 /* According to RFC 1981, detecting PMTU increase shouldn't be
1729 * happened within 5 mins, the recommended timer is 10 mins.
1730 * Here this route expiration time is set to ip6_rt_mtu_expires
1731 * which is 10 mins. After 10 mins the decreased pmtu is expired
1732 * and detecting PMTU increase will be automatically happened.
1733 */
d8d1f30b 1734 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1735 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1736
40e22e8f 1737 ip6_ins_rt(nrt);
1da177e4 1738 }
1da177e4 1739out:
d8d1f30b 1740 dst_release(&rt->dst);
1da177e4
LT
1741}
1742
b71d1d42 1743void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2
1744 struct net_device *dev, u32 pmtu)
1745{
1746 struct net *net = dev_net(dev);
1747
1748 /*
1749 * RFC 1981 states that a node "MUST reduce the size of the packets it
1750 * is sending along the path" that caused the Packet Too Big message.
1751 * Since it's not possible in the general case to determine which
1752 * interface was used to send the original packet, we update the MTU
1753 * on the interface that will be used to send future packets. We also
1754 * update the MTU on the interface that received the Packet Too Big in
1755 * case the original packet was forced out that interface with
1756 * SO_BINDTODEVICE or similar. This is the next best thing to the
1757 * correct behaviour, which would be to update the MTU on all
1758 * interfaces.
1759 */
1760 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1761 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1762}
1763
1da177e4
LT
1764/*
1765 * Misc support functions
1766 */
1767
21efcfa0
ED
1768static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1769 const struct in6_addr *dest)
1da177e4 1770{
c346dca1 1771 struct net *net = dev_net(ort->rt6i_dev);
5c1e6aa3 1772 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
957c665f 1773 ort->dst.dev, 0);
1da177e4
LT
1774
1775 if (rt) {
d8d1f30b
CG
1776 rt->dst.input = ort->dst.input;
1777 rt->dst.output = ort->dst.output;
8e2ec639 1778 rt->dst.flags |= DST_HOST;
d8d1f30b 1779
4e3fd7a0 1780 rt->rt6i_dst.addr = *dest;
8e2ec639 1781 rt->rt6i_dst.plen = 128;
defb3519 1782 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b 1783 rt->dst.error = ort->dst.error;
1da177e4
LT
1784 rt->rt6i_idev = ort->rt6i_idev;
1785 if (rt->rt6i_idev)
1786 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1787 rt->dst.lastuse = jiffies;
1da177e4
LT
1788 rt->rt6i_expires = 0;
1789
4e3fd7a0 1790 rt->rt6i_gateway = ort->rt6i_gateway;
1da177e4
LT
1791 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1792 rt->rt6i_metric = 0;
1793
1da177e4
LT
1794#ifdef CONFIG_IPV6_SUBTREES
1795 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1796#endif
0f6c6392 1797 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
c71099ac 1798 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1799 }
1800 return rt;
1801}
1802
70ceb4f5 1803#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 1804static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
1805 const struct in6_addr *prefix, int prefixlen,
1806 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
1807{
1808 struct fib6_node *fn;
1809 struct rt6_info *rt = NULL;
c71099ac
TG
1810 struct fib6_table *table;
1811
efa2cea0 1812 table = fib6_get_table(net, RT6_TABLE_INFO);
c71099ac
TG
1813 if (table == NULL)
1814 return NULL;
70ceb4f5 1815
c71099ac
TG
1816 write_lock_bh(&table->tb6_lock);
1817 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1818 if (!fn)
1819 goto out;
1820
d8d1f30b 1821 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
70ceb4f5
YH
1822 if (rt->rt6i_dev->ifindex != ifindex)
1823 continue;
1824 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1825 continue;
1826 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1827 continue;
d8d1f30b 1828 dst_hold(&rt->dst);
70ceb4f5
YH
1829 break;
1830 }
1831out:
c71099ac 1832 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1833 return rt;
1834}
1835
efa2cea0 1836static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
1837 const struct in6_addr *prefix, int prefixlen,
1838 const struct in6_addr *gwaddr, int ifindex,
70ceb4f5
YH
1839 unsigned pref)
1840{
86872cb5
TG
1841 struct fib6_config cfg = {
1842 .fc_table = RT6_TABLE_INFO,
238fc7ea 1843 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1844 .fc_ifindex = ifindex,
1845 .fc_dst_len = prefixlen,
1846 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1847 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1848 .fc_nlinfo.pid = 0,
1849 .fc_nlinfo.nlh = NULL,
1850 .fc_nlinfo.nl_net = net,
86872cb5
TG
1851 };
1852
4e3fd7a0
AD
1853 cfg.fc_dst = *prefix;
1854 cfg.fc_gateway = *gwaddr;
70ceb4f5 1855
e317da96
YH
1856 /* We should treat it as a default route if prefix length is 0. */
1857 if (!prefixlen)
86872cb5 1858 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1859
86872cb5 1860 ip6_route_add(&cfg);
70ceb4f5 1861
efa2cea0 1862 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1863}
1864#endif
1865
b71d1d42 1866struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 1867{
1da177e4 1868 struct rt6_info *rt;
c71099ac 1869 struct fib6_table *table;
1da177e4 1870
c346dca1 1871 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
c71099ac
TG
1872 if (table == NULL)
1873 return NULL;
1da177e4 1874
c71099ac 1875 write_lock_bh(&table->tb6_lock);
d8d1f30b 1876 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1da177e4 1877 if (dev == rt->rt6i_dev &&
045927ff 1878 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1879 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1880 break;
1881 }
1882 if (rt)
d8d1f30b 1883 dst_hold(&rt->dst);
c71099ac 1884 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1885 return rt;
1886}
1887
b71d1d42 1888struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
1889 struct net_device *dev,
1890 unsigned int pref)
1da177e4 1891{
86872cb5
TG
1892 struct fib6_config cfg = {
1893 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1894 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1895 .fc_ifindex = dev->ifindex,
1896 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1897 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1898 .fc_nlinfo.pid = 0,
1899 .fc_nlinfo.nlh = NULL,
c346dca1 1900 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1901 };
1da177e4 1902
4e3fd7a0 1903 cfg.fc_gateway = *gwaddr;
1da177e4 1904
86872cb5 1905 ip6_route_add(&cfg);
1da177e4 1906
1da177e4
LT
1907 return rt6_get_dflt_router(gwaddr, dev);
1908}
1909
7b4da532 1910void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1911{
1912 struct rt6_info *rt;
c71099ac
TG
1913 struct fib6_table *table;
1914
1915 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1916 table = fib6_get_table(net, RT6_TABLE_DFLT);
c71099ac
TG
1917 if (table == NULL)
1918 return;
1da177e4
LT
1919
1920restart:
c71099ac 1921 read_lock_bh(&table->tb6_lock);
d8d1f30b 1922 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1923 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1924 dst_hold(&rt->dst);
c71099ac 1925 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1926 ip6_del_rt(rt);
1da177e4
LT
1927 goto restart;
1928 }
1929 }
c71099ac 1930 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1931}
1932
5578689a
DL
1933static void rtmsg_to_fib6_config(struct net *net,
1934 struct in6_rtmsg *rtmsg,
86872cb5
TG
1935 struct fib6_config *cfg)
1936{
1937 memset(cfg, 0, sizeof(*cfg));
1938
1939 cfg->fc_table = RT6_TABLE_MAIN;
1940 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1941 cfg->fc_metric = rtmsg->rtmsg_metric;
1942 cfg->fc_expires = rtmsg->rtmsg_info;
1943 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1944 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1945 cfg->fc_flags = rtmsg->rtmsg_flags;
1946
5578689a 1947 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1948
4e3fd7a0
AD
1949 cfg->fc_dst = rtmsg->rtmsg_dst;
1950 cfg->fc_src = rtmsg->rtmsg_src;
1951 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
1952}
1953
5578689a 1954int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1955{
86872cb5 1956 struct fib6_config cfg;
1da177e4
LT
1957 struct in6_rtmsg rtmsg;
1958 int err;
1959
1960 switch(cmd) {
1961 case SIOCADDRT: /* Add a route */
1962 case SIOCDELRT: /* Delete a route */
1963 if (!capable(CAP_NET_ADMIN))
1964 return -EPERM;
1965 err = copy_from_user(&rtmsg, arg,
1966 sizeof(struct in6_rtmsg));
1967 if (err)
1968 return -EFAULT;
86872cb5 1969
5578689a 1970 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1971
1da177e4
LT
1972 rtnl_lock();
1973 switch (cmd) {
1974 case SIOCADDRT:
86872cb5 1975 err = ip6_route_add(&cfg);
1da177e4
LT
1976 break;
1977 case SIOCDELRT:
86872cb5 1978 err = ip6_route_del(&cfg);
1da177e4
LT
1979 break;
1980 default:
1981 err = -EINVAL;
1982 }
1983 rtnl_unlock();
1984
1985 return err;
3ff50b79 1986 }
1da177e4
LT
1987
1988 return -EINVAL;
1989}
1990
1991/*
1992 * Drop the packet on the floor
1993 */
1994
d5fdd6ba 1995static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 1996{
612f09e8 1997 int type;
adf30907 1998 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
1999 switch (ipstats_mib_noroutes) {
2000 case IPSTATS_MIB_INNOROUTES:
0660e03f 2001 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2002 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2003 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2004 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2005 break;
2006 }
2007 /* FALLTHROUGH */
2008 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2009 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2010 ipstats_mib_noroutes);
612f09e8
YH
2011 break;
2012 }
3ffe533c 2013 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2014 kfree_skb(skb);
2015 return 0;
2016}
2017
9ce8ade0
TG
2018static int ip6_pkt_discard(struct sk_buff *skb)
2019{
612f09e8 2020 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2021}
2022
20380731 2023static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 2024{
adf30907 2025 skb->dev = skb_dst(skb)->dev;
612f09e8 2026 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2027}
2028
6723ab54
DM
2029#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2030
9ce8ade0
TG
2031static int ip6_pkt_prohibit(struct sk_buff *skb)
2032{
612f09e8 2033 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2034}
2035
2036static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2037{
adf30907 2038 skb->dev = skb_dst(skb)->dev;
612f09e8 2039 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2040}
2041
6723ab54
DM
2042#endif
2043
1da177e4
LT
2044/*
2045 * Allocate a dst for local (unicast / anycast) address.
2046 */
2047
2048struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2049 const struct in6_addr *addr,
2050 int anycast)
2051{
c346dca1 2052 struct net *net = dev_net(idev->dev);
5c1e6aa3 2053 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
957c665f 2054 net->loopback_dev, 0);
14deae41 2055 struct neighbour *neigh;
1da177e4 2056
40385653
BG
2057 if (rt == NULL) {
2058 if (net_ratelimit())
2059 pr_warning("IPv6: Maximum number of routes reached,"
2060 " consider increasing route/max_size.\n");
1da177e4 2061 return ERR_PTR(-ENOMEM);
40385653 2062 }
1da177e4 2063
1da177e4
LT
2064 in6_dev_hold(idev);
2065
11d53b49 2066 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2067 rt->dst.input = ip6_input;
2068 rt->dst.output = ip6_output;
1da177e4 2069 rt->rt6i_idev = idev;
d8d1f30b 2070 rt->dst.obsolete = -1;
1da177e4
LT
2071
2072 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2073 if (anycast)
2074 rt->rt6i_flags |= RTF_ANYCAST;
2075 else
1da177e4 2076 rt->rt6i_flags |= RTF_LOCAL;
14deae41
DM
2077 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2078 if (IS_ERR(neigh)) {
d8d1f30b 2079 dst_free(&rt->dst);
14deae41 2080
29546a64 2081 return ERR_CAST(neigh);
1da177e4 2082 }
69cce1d1 2083 dst_set_neighbour(&rt->dst, neigh);
1da177e4 2084
4e3fd7a0 2085 rt->rt6i_dst.addr = *addr;
1da177e4 2086 rt->rt6i_dst.plen = 128;
5578689a 2087 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2088
d8d1f30b 2089 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2090
2091 return rt;
2092}
2093
c3968a85
DW
2094int ip6_route_get_saddr(struct net *net,
2095 struct rt6_info *rt,
b71d1d42 2096 const struct in6_addr *daddr,
c3968a85
DW
2097 unsigned int prefs,
2098 struct in6_addr *saddr)
2099{
2100 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2101 int err = 0;
2102 if (rt->rt6i_prefsrc.plen)
4e3fd7a0 2103 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2104 else
2105 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2106 daddr, prefs, saddr);
2107 return err;
2108}
2109
2110/* remove deleted ip from prefsrc entries */
2111struct arg_dev_net_ip {
2112 struct net_device *dev;
2113 struct net *net;
2114 struct in6_addr *addr;
2115};
2116
2117static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2118{
2119 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2120 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2121 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2122
2123 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2124 rt != net->ipv6.ip6_null_entry &&
2125 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2126 /* remove prefsrc entry */
2127 rt->rt6i_prefsrc.plen = 0;
2128 }
2129 return 0;
2130}
2131
2132void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2133{
2134 struct net *net = dev_net(ifp->idev->dev);
2135 struct arg_dev_net_ip adni = {
2136 .dev = ifp->idev->dev,
2137 .net = net,
2138 .addr = &ifp->addr,
2139 };
2140 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2141}
2142
8ed67789
DL
2143struct arg_dev_net {
2144 struct net_device *dev;
2145 struct net *net;
2146};
2147
1da177e4
LT
2148static int fib6_ifdown(struct rt6_info *rt, void *arg)
2149{
bc3ef660 2150 const struct arg_dev_net *adn = arg;
2151 const struct net_device *dev = adn->dev;
8ed67789 2152
bc3ef660 2153 if ((rt->rt6i_dev == dev || dev == NULL) &&
2154 rt != adn->net->ipv6.ip6_null_entry) {
1da177e4
LT
2155 RT6_TRACE("deleted by ifdown %p\n", rt);
2156 return -1;
2157 }
2158 return 0;
2159}
2160
f3db4851 2161void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2162{
8ed67789
DL
2163 struct arg_dev_net adn = {
2164 .dev = dev,
2165 .net = net,
2166 };
2167
2168 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2169 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2170}
2171
2172struct rt6_mtu_change_arg
2173{
2174 struct net_device *dev;
2175 unsigned mtu;
2176};
2177
2178static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2179{
2180 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2181 struct inet6_dev *idev;
2182
2183 /* In IPv6 pmtu discovery is not optional,
2184 so that RTAX_MTU lock cannot disable it.
2185 We still use this lock to block changes
2186 caused by addrconf/ndisc.
2187 */
2188
2189 idev = __in6_dev_get(arg->dev);
2190 if (idev == NULL)
2191 return 0;
2192
2193 /* For administrative MTU increase, there is no way to discover
2194 IPv6 PMTU increase, so PMTU increase should be updated here.
2195 Since RFC 1981 doesn't include administrative MTU increase
2196 update PMTU increase is a MUST. (i.e. jumbo frame)
2197 */
2198 /*
2199 If new MTU is less than route PMTU, this new MTU will be the
2200 lowest MTU in the path, update the route PMTU to reflect PMTU
2201 decreases; if new MTU is greater than route PMTU, and the
2202 old MTU is the lowest MTU in the path, update the route PMTU
2203 to reflect the increase. In this case if the other nodes' MTU
2204 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2205 PMTU discouvery.
2206 */
2207 if (rt->rt6i_dev == arg->dev &&
d8d1f30b
CG
2208 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2209 (dst_mtu(&rt->dst) >= arg->mtu ||
2210 (dst_mtu(&rt->dst) < arg->mtu &&
2211 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2212 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2213 }
1da177e4
LT
2214 return 0;
2215}
2216
2217void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2218{
c71099ac
TG
2219 struct rt6_mtu_change_arg arg = {
2220 .dev = dev,
2221 .mtu = mtu,
2222 };
1da177e4 2223
c346dca1 2224 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2225}
2226
ef7c79ed 2227static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2228 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2229 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2230 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2231 [RTA_PRIORITY] = { .type = NLA_U32 },
2232 [RTA_METRICS] = { .type = NLA_NESTED },
2233};
2234
2235static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2236 struct fib6_config *cfg)
1da177e4 2237{
86872cb5
TG
2238 struct rtmsg *rtm;
2239 struct nlattr *tb[RTA_MAX+1];
2240 int err;
1da177e4 2241
86872cb5
TG
2242 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2243 if (err < 0)
2244 goto errout;
1da177e4 2245
86872cb5
TG
2246 err = -EINVAL;
2247 rtm = nlmsg_data(nlh);
2248 memset(cfg, 0, sizeof(*cfg));
2249
2250 cfg->fc_table = rtm->rtm_table;
2251 cfg->fc_dst_len = rtm->rtm_dst_len;
2252 cfg->fc_src_len = rtm->rtm_src_len;
2253 cfg->fc_flags = RTF_UP;
2254 cfg->fc_protocol = rtm->rtm_protocol;
2255
2256 if (rtm->rtm_type == RTN_UNREACHABLE)
2257 cfg->fc_flags |= RTF_REJECT;
2258
ab79ad14
2259 if (rtm->rtm_type == RTN_LOCAL)
2260 cfg->fc_flags |= RTF_LOCAL;
2261
86872cb5
TG
2262 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2263 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2264 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2265
2266 if (tb[RTA_GATEWAY]) {
2267 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2268 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2269 }
86872cb5
TG
2270
2271 if (tb[RTA_DST]) {
2272 int plen = (rtm->rtm_dst_len + 7) >> 3;
2273
2274 if (nla_len(tb[RTA_DST]) < plen)
2275 goto errout;
2276
2277 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2278 }
86872cb5
TG
2279
2280 if (tb[RTA_SRC]) {
2281 int plen = (rtm->rtm_src_len + 7) >> 3;
2282
2283 if (nla_len(tb[RTA_SRC]) < plen)
2284 goto errout;
2285
2286 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2287 }
86872cb5 2288
c3968a85
DW
2289 if (tb[RTA_PREFSRC])
2290 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2291
86872cb5
TG
2292 if (tb[RTA_OIF])
2293 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2294
2295 if (tb[RTA_PRIORITY])
2296 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2297
2298 if (tb[RTA_METRICS]) {
2299 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2300 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2301 }
86872cb5
TG
2302
2303 if (tb[RTA_TABLE])
2304 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2305
2306 err = 0;
2307errout:
2308 return err;
1da177e4
LT
2309}
2310
c127ea2c 2311static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2312{
86872cb5
TG
2313 struct fib6_config cfg;
2314 int err;
1da177e4 2315
86872cb5
TG
2316 err = rtm_to_fib6_config(skb, nlh, &cfg);
2317 if (err < 0)
2318 return err;
2319
2320 return ip6_route_del(&cfg);
1da177e4
LT
2321}
2322
c127ea2c 2323static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2324{
86872cb5
TG
2325 struct fib6_config cfg;
2326 int err;
1da177e4 2327
86872cb5
TG
2328 err = rtm_to_fib6_config(skb, nlh, &cfg);
2329 if (err < 0)
2330 return err;
2331
2332 return ip6_route_add(&cfg);
1da177e4
LT
2333}
2334
339bf98f
TG
2335static inline size_t rt6_nlmsg_size(void)
2336{
2337 return NLMSG_ALIGN(sizeof(struct rtmsg))
2338 + nla_total_size(16) /* RTA_SRC */
2339 + nla_total_size(16) /* RTA_DST */
2340 + nla_total_size(16) /* RTA_GATEWAY */
2341 + nla_total_size(16) /* RTA_PREFSRC */
2342 + nla_total_size(4) /* RTA_TABLE */
2343 + nla_total_size(4) /* RTA_IIF */
2344 + nla_total_size(4) /* RTA_OIF */
2345 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2346 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2347 + nla_total_size(sizeof(struct rta_cacheinfo));
2348}
2349
191cd582
BH
2350static int rt6_fill_node(struct net *net,
2351 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2352 struct in6_addr *dst, struct in6_addr *src,
2353 int iif, int type, u32 pid, u32 seq,
7bc570c8 2354 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2355{
2356 struct rtmsg *rtm;
2d7202bf 2357 struct nlmsghdr *nlh;
e3703b3d 2358 long expires;
9e762a4a 2359 u32 table;
f2c31e32 2360 struct neighbour *n;
1da177e4
LT
2361
2362 if (prefix) { /* user wants prefix routes only */
2363 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2364 /* success since this is not a prefix route */
2365 return 1;
2366 }
2367 }
2368
2d7202bf
TG
2369 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2370 if (nlh == NULL)
26932566 2371 return -EMSGSIZE;
2d7202bf
TG
2372
2373 rtm = nlmsg_data(nlh);
1da177e4
LT
2374 rtm->rtm_family = AF_INET6;
2375 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2376 rtm->rtm_src_len = rt->rt6i_src.plen;
2377 rtm->rtm_tos = 0;
c71099ac 2378 if (rt->rt6i_table)
9e762a4a 2379 table = rt->rt6i_table->tb6_id;
c71099ac 2380 else
9e762a4a
PM
2381 table = RT6_TABLE_UNSPEC;
2382 rtm->rtm_table = table;
2d7202bf 2383 NLA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
2384 if (rt->rt6i_flags&RTF_REJECT)
2385 rtm->rtm_type = RTN_UNREACHABLE;
ab79ad14
2386 else if (rt->rt6i_flags&RTF_LOCAL)
2387 rtm->rtm_type = RTN_LOCAL;
1da177e4
LT
2388 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2389 rtm->rtm_type = RTN_LOCAL;
2390 else
2391 rtm->rtm_type = RTN_UNICAST;
2392 rtm->rtm_flags = 0;
2393 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2394 rtm->rtm_protocol = rt->rt6i_protocol;
2395 if (rt->rt6i_flags&RTF_DYNAMIC)
2396 rtm->rtm_protocol = RTPROT_REDIRECT;
2397 else if (rt->rt6i_flags & RTF_ADDRCONF)
2398 rtm->rtm_protocol = RTPROT_KERNEL;
2399 else if (rt->rt6i_flags&RTF_DEFAULT)
2400 rtm->rtm_protocol = RTPROT_RA;
2401
2402 if (rt->rt6i_flags&RTF_CACHE)
2403 rtm->rtm_flags |= RTM_F_CLONED;
2404
2405 if (dst) {
2d7202bf 2406 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2407 rtm->rtm_dst_len = 128;
1da177e4 2408 } else if (rtm->rtm_dst_len)
2d7202bf 2409 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2410#ifdef CONFIG_IPV6_SUBTREES
2411 if (src) {
2d7202bf 2412 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2413 rtm->rtm_src_len = 128;
1da177e4 2414 } else if (rtm->rtm_src_len)
2d7202bf 2415 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4 2416#endif
7bc570c8
YH
2417 if (iif) {
2418#ifdef CONFIG_IPV6_MROUTE
2419 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2420 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2421 if (err <= 0) {
2422 if (!nowait) {
2423 if (err == 0)
2424 return 0;
2425 goto nla_put_failure;
2426 } else {
2427 if (err == -EMSGSIZE)
2428 goto nla_put_failure;
2429 }
2430 }
2431 } else
2432#endif
2433 NLA_PUT_U32(skb, RTA_IIF, iif);
2434 } else if (dst) {
1da177e4 2435 struct in6_addr saddr_buf;
c3968a85 2436 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2d7202bf 2437 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2438 }
2d7202bf 2439
c3968a85
DW
2440 if (rt->rt6i_prefsrc.plen) {
2441 struct in6_addr saddr_buf;
4e3fd7a0 2442 saddr_buf = rt->rt6i_prefsrc.addr;
c3968a85
DW
2443 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2444 }
2445
defb3519 2446 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2447 goto nla_put_failure;
2448
f2c31e32
ED
2449 rcu_read_lock();
2450 n = dst_get_neighbour(&rt->dst);
2451 if (n)
2452 NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
2453 rcu_read_unlock();
2d7202bf 2454
d8d1f30b 2455 if (rt->dst.dev)
2d7202bf
TG
2456 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2457
2458 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d 2459
36e3deae
YH
2460 if (!(rt->rt6i_flags & RTF_EXPIRES))
2461 expires = 0;
2462 else if (rt->rt6i_expires - jiffies < INT_MAX)
2463 expires = rt->rt6i_expires - jiffies;
2464 else
2465 expires = INT_MAX;
69cdf8f9 2466
d8d1f30b
CG
2467 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2468 expires, rt->dst.error) < 0)
e3703b3d 2469 goto nla_put_failure;
2d7202bf
TG
2470
2471 return nlmsg_end(skb, nlh);
2472
2473nla_put_failure:
26932566
PM
2474 nlmsg_cancel(skb, nlh);
2475 return -EMSGSIZE;
1da177e4
LT
2476}
2477
1b43af54 2478int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2479{
2480 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2481 int prefix;
2482
2d7202bf
TG
2483 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2484 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2485 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2486 } else
2487 prefix = 0;
2488
191cd582
BH
2489 return rt6_fill_node(arg->net,
2490 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2491 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2492 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2493}
2494
c127ea2c 2495static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2496{
3b1e0a65 2497 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2498 struct nlattr *tb[RTA_MAX+1];
2499 struct rt6_info *rt;
1da177e4 2500 struct sk_buff *skb;
ab364a6f 2501 struct rtmsg *rtm;
4c9483b2 2502 struct flowi6 fl6;
ab364a6f 2503 int err, iif = 0;
1da177e4 2504
ab364a6f
TG
2505 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2506 if (err < 0)
2507 goto errout;
1da177e4 2508
ab364a6f 2509 err = -EINVAL;
4c9483b2 2510 memset(&fl6, 0, sizeof(fl6));
1da177e4 2511
ab364a6f
TG
2512 if (tb[RTA_SRC]) {
2513 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2514 goto errout;
2515
4e3fd7a0 2516 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
2517 }
2518
2519 if (tb[RTA_DST]) {
2520 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2521 goto errout;
2522
4e3fd7a0 2523 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
2524 }
2525
2526 if (tb[RTA_IIF])
2527 iif = nla_get_u32(tb[RTA_IIF]);
2528
2529 if (tb[RTA_OIF])
4c9483b2 2530 fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2531
2532 if (iif) {
2533 struct net_device *dev;
5578689a 2534 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2535 if (!dev) {
2536 err = -ENODEV;
ab364a6f 2537 goto errout;
1da177e4
LT
2538 }
2539 }
2540
ab364a6f
TG
2541 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2542 if (skb == NULL) {
2543 err = -ENOBUFS;
2544 goto errout;
2545 }
1da177e4 2546
ab364a6f
TG
2547 /* Reserve room for dummy headers, this skb can pass
2548 through good chunk of routing engine.
2549 */
459a98ed 2550 skb_reset_mac_header(skb);
ab364a6f 2551 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2552
4c9483b2 2553 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
d8d1f30b 2554 skb_dst_set(skb, &rt->dst);
1da177e4 2555
4c9483b2 2556 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
1da177e4 2557 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2558 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2559 if (err < 0) {
ab364a6f
TG
2560 kfree_skb(skb);
2561 goto errout;
1da177e4
LT
2562 }
2563
5578689a 2564 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2565errout:
1da177e4 2566 return err;
1da177e4
LT
2567}
2568
86872cb5 2569void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2570{
2571 struct sk_buff *skb;
5578689a 2572 struct net *net = info->nl_net;
528c4ceb
DL
2573 u32 seq;
2574 int err;
2575
2576 err = -ENOBUFS;
2577 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
86872cb5 2578
339bf98f 2579 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
21713ebc
TG
2580 if (skb == NULL)
2581 goto errout;
2582
191cd582 2583 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2584 event, info->pid, seq, 0, 0, 0);
26932566
PM
2585 if (err < 0) {
2586 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2587 WARN_ON(err == -EMSGSIZE);
2588 kfree_skb(skb);
2589 goto errout;
2590 }
1ce85fe4
PNA
2591 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2592 info->nlh, gfp_any());
2593 return;
21713ebc
TG
2594errout:
2595 if (err < 0)
5578689a 2596 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2597}
2598
8ed67789
DL
2599static int ip6_route_dev_notify(struct notifier_block *this,
2600 unsigned long event, void *data)
2601{
2602 struct net_device *dev = (struct net_device *)data;
c346dca1 2603 struct net *net = dev_net(dev);
8ed67789
DL
2604
2605 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2606 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2607 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2608#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2609 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2610 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2611 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2612 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2613#endif
2614 }
2615
2616 return NOTIFY_OK;
2617}
2618
1da177e4
LT
2619/*
2620 * /proc
2621 */
2622
2623#ifdef CONFIG_PROC_FS
2624
1da177e4
LT
2625struct rt6_proc_arg
2626{
2627 char *buffer;
2628 int offset;
2629 int length;
2630 int skip;
2631 int len;
2632};
2633
2634static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2635{
33120b30 2636 struct seq_file *m = p_arg;
69cce1d1 2637 struct neighbour *n;
1da177e4 2638
4b7a4274 2639 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2640
2641#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2642 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2643#else
33120b30 2644 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4 2645#endif
f2c31e32 2646 rcu_read_lock();
69cce1d1
DM
2647 n = dst_get_neighbour(&rt->dst);
2648 if (n) {
2649 seq_printf(m, "%pi6", n->primary_key);
1da177e4 2650 } else {
33120b30 2651 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2652 }
f2c31e32 2653 rcu_read_unlock();
33120b30 2654 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2655 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2656 rt->dst.__use, rt->rt6i_flags,
33120b30 2657 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1da177e4
LT
2658 return 0;
2659}
2660
33120b30 2661static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2662{
f3db4851
DL
2663 struct net *net = (struct net *)m->private;
2664 fib6_clean_all(net, rt6_info_route, 0, m);
33120b30
AD
2665 return 0;
2666}
1da177e4 2667
33120b30
AD
2668static int ipv6_route_open(struct inode *inode, struct file *file)
2669{
de05c557 2670 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2671}
2672
33120b30
AD
2673static const struct file_operations ipv6_route_proc_fops = {
2674 .owner = THIS_MODULE,
2675 .open = ipv6_route_open,
2676 .read = seq_read,
2677 .llseek = seq_lseek,
b6fcbdb4 2678 .release = single_release_net,
33120b30
AD
2679};
2680
1da177e4
LT
2681static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2682{
69ddb805 2683 struct net *net = (struct net *)seq->private;
1da177e4 2684 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2685 net->ipv6.rt6_stats->fib_nodes,
2686 net->ipv6.rt6_stats->fib_route_nodes,
2687 net->ipv6.rt6_stats->fib_rt_alloc,
2688 net->ipv6.rt6_stats->fib_rt_entries,
2689 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2690 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2691 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2692
2693 return 0;
2694}
2695
2696static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2697{
de05c557 2698 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2699}
2700
9a32144e 2701static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2702 .owner = THIS_MODULE,
2703 .open = rt6_stats_seq_open,
2704 .read = seq_read,
2705 .llseek = seq_lseek,
b6fcbdb4 2706 .release = single_release_net,
1da177e4
LT
2707};
2708#endif /* CONFIG_PROC_FS */
2709
2710#ifdef CONFIG_SYSCTL
2711
1da177e4 2712static
8d65af78 2713int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2714 void __user *buffer, size_t *lenp, loff_t *ppos)
2715{
c486da34
LAG
2716 struct net *net;
2717 int delay;
2718 if (!write)
1da177e4 2719 return -EINVAL;
c486da34
LAG
2720
2721 net = (struct net *)ctl->extra1;
2722 delay = net->ipv6.sysctl.flush_delay;
2723 proc_dointvec(ctl, write, buffer, lenp, ppos);
2724 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2725 return 0;
1da177e4
LT
2726}
2727
760f2d01 2728ctl_table ipv6_route_table_template[] = {
1ab1457c 2729 {
1da177e4 2730 .procname = "flush",
4990509f 2731 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2732 .maxlen = sizeof(int),
89c8b3a1 2733 .mode = 0200,
6d9f239a 2734 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2735 },
2736 {
1da177e4 2737 .procname = "gc_thresh",
9a7ec3a9 2738 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2739 .maxlen = sizeof(int),
2740 .mode = 0644,
6d9f239a 2741 .proc_handler = proc_dointvec,
1da177e4
LT
2742 },
2743 {
1da177e4 2744 .procname = "max_size",
4990509f 2745 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2746 .maxlen = sizeof(int),
2747 .mode = 0644,
6d9f239a 2748 .proc_handler = proc_dointvec,
1da177e4
LT
2749 },
2750 {
1da177e4 2751 .procname = "gc_min_interval",
4990509f 2752 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2753 .maxlen = sizeof(int),
2754 .mode = 0644,
6d9f239a 2755 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2756 },
2757 {
1da177e4 2758 .procname = "gc_timeout",
4990509f 2759 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2760 .maxlen = sizeof(int),
2761 .mode = 0644,
6d9f239a 2762 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2763 },
2764 {
1da177e4 2765 .procname = "gc_interval",
4990509f 2766 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2767 .maxlen = sizeof(int),
2768 .mode = 0644,
6d9f239a 2769 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2770 },
2771 {
1da177e4 2772 .procname = "gc_elasticity",
4990509f 2773 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2774 .maxlen = sizeof(int),
2775 .mode = 0644,
f3d3f616 2776 .proc_handler = proc_dointvec,
1da177e4
LT
2777 },
2778 {
1da177e4 2779 .procname = "mtu_expires",
4990509f 2780 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2781 .maxlen = sizeof(int),
2782 .mode = 0644,
6d9f239a 2783 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2784 },
2785 {
1da177e4 2786 .procname = "min_adv_mss",
4990509f 2787 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2788 .maxlen = sizeof(int),
2789 .mode = 0644,
f3d3f616 2790 .proc_handler = proc_dointvec,
1da177e4
LT
2791 },
2792 {
1da177e4 2793 .procname = "gc_min_interval_ms",
4990509f 2794 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2795 .maxlen = sizeof(int),
2796 .mode = 0644,
6d9f239a 2797 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2798 },
f8572d8f 2799 { }
1da177e4
LT
2800};
2801
2c8c1e72 2802struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2803{
2804 struct ctl_table *table;
2805
2806 table = kmemdup(ipv6_route_table_template,
2807 sizeof(ipv6_route_table_template),
2808 GFP_KERNEL);
5ee09105
YH
2809
2810 if (table) {
2811 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 2812 table[0].extra1 = net;
86393e52 2813 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2814 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2815 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2816 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2817 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2818 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2819 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2820 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2821 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2822 }
2823
760f2d01
DL
2824 return table;
2825}
1da177e4
LT
2826#endif
2827
2c8c1e72 2828static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2829{
633d424b 2830 int ret = -ENOMEM;
8ed67789 2831
86393e52
AD
2832 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2833 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2834
fc66f95c
ED
2835 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2836 goto out_ip6_dst_ops;
2837
8ed67789
DL
2838 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2839 sizeof(*net->ipv6.ip6_null_entry),
2840 GFP_KERNEL);
2841 if (!net->ipv6.ip6_null_entry)
fc66f95c 2842 goto out_ip6_dst_entries;
d8d1f30b 2843 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2844 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2845 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2846 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2847 ip6_template_metrics, true);
8ed67789
DL
2848
2849#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2850 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2851 sizeof(*net->ipv6.ip6_prohibit_entry),
2852 GFP_KERNEL);
68fffc67
PZ
2853 if (!net->ipv6.ip6_prohibit_entry)
2854 goto out_ip6_null_entry;
d8d1f30b 2855 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2856 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2857 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2858 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2859 ip6_template_metrics, true);
8ed67789
DL
2860
2861 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2862 sizeof(*net->ipv6.ip6_blk_hole_entry),
2863 GFP_KERNEL);
68fffc67
PZ
2864 if (!net->ipv6.ip6_blk_hole_entry)
2865 goto out_ip6_prohibit_entry;
d8d1f30b 2866 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2867 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2868 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2869 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2870 ip6_template_metrics, true);
8ed67789
DL
2871#endif
2872
b339a47c
PZ
2873 net->ipv6.sysctl.flush_delay = 0;
2874 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2875 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2876 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2877 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2878 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2879 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2880 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2881
cdb18761
DL
2882#ifdef CONFIG_PROC_FS
2883 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2884 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2885#endif
6891a346
BT
2886 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2887
8ed67789
DL
2888 ret = 0;
2889out:
2890 return ret;
f2fc6a54 2891
68fffc67
PZ
2892#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2893out_ip6_prohibit_entry:
2894 kfree(net->ipv6.ip6_prohibit_entry);
2895out_ip6_null_entry:
2896 kfree(net->ipv6.ip6_null_entry);
2897#endif
fc66f95c
ED
2898out_ip6_dst_entries:
2899 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2900out_ip6_dst_ops:
f2fc6a54 2901 goto out;
cdb18761
DL
2902}
2903
2c8c1e72 2904static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761
DL
2905{
2906#ifdef CONFIG_PROC_FS
2907 proc_net_remove(net, "ipv6_route");
2908 proc_net_remove(net, "rt6_stats");
2909#endif
8ed67789
DL
2910 kfree(net->ipv6.ip6_null_entry);
2911#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2912 kfree(net->ipv6.ip6_prohibit_entry);
2913 kfree(net->ipv6.ip6_blk_hole_entry);
2914#endif
41bb78b4 2915 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2916}
2917
2918static struct pernet_operations ip6_route_net_ops = {
2919 .init = ip6_route_net_init,
2920 .exit = ip6_route_net_exit,
2921};
2922
8ed67789
DL
2923static struct notifier_block ip6_route_dev_notifier = {
2924 .notifier_call = ip6_route_dev_notify,
2925 .priority = 0,
2926};
2927
433d49c3 2928int __init ip6_route_init(void)
1da177e4 2929{
433d49c3
DL
2930 int ret;
2931
9a7ec3a9
DL
2932 ret = -ENOMEM;
2933 ip6_dst_ops_template.kmem_cachep =
e5d679f3 2934 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 2935 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 2936 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 2937 goto out;
14e50e57 2938
fc66f95c 2939 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 2940 if (ret)
bdb3289f 2941 goto out_kmem_cache;
bdb3289f 2942
fc66f95c
ED
2943 ret = register_pernet_subsys(&ip6_route_net_ops);
2944 if (ret)
2945 goto out_dst_entries;
2946
5dc121e9
AE
2947 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2948
8ed67789
DL
2949 /* Registering of the loopback is done before this portion of code,
2950 * the loopback reference in rt6_info will not be taken, do it
2951 * manually for init_net */
d8d1f30b 2952 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2953 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2954 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2955 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 2956 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 2957 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2958 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2959 #endif
433d49c3
DL
2960 ret = fib6_init();
2961 if (ret)
8ed67789 2962 goto out_register_subsys;
433d49c3 2963
433d49c3
DL
2964 ret = xfrm6_init();
2965 if (ret)
cdb18761 2966 goto out_fib6_init;
c35b7e72 2967
433d49c3
DL
2968 ret = fib6_rules_init();
2969 if (ret)
2970 goto xfrm6_init;
7e5449c2 2971
433d49c3 2972 ret = -ENOBUFS;
c7ac8679
GR
2973 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
2974 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
2975 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
433d49c3 2976 goto fib6_rules_init;
c127ea2c 2977
8ed67789 2978 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
2979 if (ret)
2980 goto fib6_rules_init;
8ed67789 2981
433d49c3
DL
2982out:
2983 return ret;
2984
2985fib6_rules_init:
433d49c3
DL
2986 fib6_rules_cleanup();
2987xfrm6_init:
433d49c3 2988 xfrm6_fini();
433d49c3 2989out_fib6_init:
433d49c3 2990 fib6_gc_cleanup();
8ed67789
DL
2991out_register_subsys:
2992 unregister_pernet_subsys(&ip6_route_net_ops);
fc66f95c
ED
2993out_dst_entries:
2994 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 2995out_kmem_cache:
f2fc6a54 2996 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 2997 goto out;
1da177e4
LT
2998}
2999
3000void ip6_route_cleanup(void)
3001{
8ed67789 3002 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 3003 fib6_rules_cleanup();
1da177e4 3004 xfrm6_fini();
1da177e4 3005 fib6_gc_cleanup();
8ed67789 3006 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3007 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3008 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3009}
This page took 0.960342 seconds and 5 git commands to generate.