ipv6: Use ip6_dst_hoplimit() instead of direct dst_metric() calls.
[deliverable/linux.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
7bc570c8 37#include <linux/mroute6.h>
1da177e4 38#include <linux/init.h>
1da177e4 39#include <linux/if_arp.h>
1da177e4
LT
40#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
5b7c931d 42#include <linux/nsproxy.h>
5a0e3ad6 43#include <linux/slab.h>
457c4cbc 44#include <net/net_namespace.h>
1da177e4
LT
45#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
8d71740c 55#include <net/netevent.h>
21713ebc 56#include <net/netlink.h>
1da177e4
LT
57
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
519fbd87 75#define CLONE_OFFLINK_ROUTE 0
1da177e4 76
1da177e4
LT
77static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
78static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
79static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80static void ip6_dst_destroy(struct dst_entry *);
81static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
569d3645 83static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
84
85static int ip6_pkt_discard(struct sk_buff *skb);
86static int ip6_pkt_discard_out(struct sk_buff *skb);
87static void ip6_link_failure(struct sk_buff *skb);
88static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
70ceb4f5 90#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
91static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
93 struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
efa2cea0
DL
95static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
97 struct in6_addr *gwaddr, int ifindex);
98#endif
99
9a7ec3a9 100static struct dst_ops ip6_dst_ops_template = {
1da177e4 101 .family = AF_INET6,
09640e63 102 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
103 .gc = ip6_dst_gc,
104 .gc_thresh = 1024,
105 .check = ip6_dst_check,
106 .destroy = ip6_dst_destroy,
107 .ifdown = ip6_dst_ifdown,
108 .negative_advice = ip6_negative_advice,
109 .link_failure = ip6_link_failure,
110 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 111 .local_out = __ip6_local_out,
1da177e4
LT
112};
113
14e50e57
DM
114static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
115{
116}
117
118static struct dst_ops ip6_dst_blackhole_ops = {
119 .family = AF_INET6,
09640e63 120 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
121 .destroy = ip6_dst_destroy,
122 .check = ip6_dst_check,
123 .update_pmtu = ip6_rt_blackhole_update_pmtu,
14e50e57
DM
124};
125
bdb3289f 126static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
127 .dst = {
128 .__refcnt = ATOMIC_INIT(1),
129 .__use = 1,
130 .obsolete = -1,
131 .error = -ENETUNREACH,
d8d1f30b
CG
132 .input = ip6_pkt_discard,
133 .output = ip6_pkt_discard_out,
1da177e4
LT
134 },
135 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 136 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
137 .rt6i_metric = ~(u32) 0,
138 .rt6i_ref = ATOMIC_INIT(1),
139};
140
101367c2
TG
141#ifdef CONFIG_IPV6_MULTIPLE_TABLES
142
6723ab54
DM
143static int ip6_pkt_prohibit(struct sk_buff *skb);
144static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 145
280a34c8 146static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
147 .dst = {
148 .__refcnt = ATOMIC_INIT(1),
149 .__use = 1,
150 .obsolete = -1,
151 .error = -EACCES,
d8d1f30b
CG
152 .input = ip6_pkt_prohibit,
153 .output = ip6_pkt_prohibit_out,
101367c2
TG
154 },
155 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 156 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
157 .rt6i_metric = ~(u32) 0,
158 .rt6i_ref = ATOMIC_INIT(1),
159};
160
bdb3289f 161static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
162 .dst = {
163 .__refcnt = ATOMIC_INIT(1),
164 .__use = 1,
165 .obsolete = -1,
166 .error = -EINVAL,
d8d1f30b
CG
167 .input = dst_discard,
168 .output = dst_discard,
101367c2
TG
169 },
170 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 171 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
172 .rt6i_metric = ~(u32) 0,
173 .rt6i_ref = ATOMIC_INIT(1),
174};
175
176#endif
177
1da177e4 178/* allocate dst with ip6_dst_ops */
f2fc6a54 179static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
1da177e4 180{
f2fc6a54 181 return (struct rt6_info *)dst_alloc(ops);
1da177e4
LT
182}
183
184static void ip6_dst_destroy(struct dst_entry *dst)
185{
186 struct rt6_info *rt = (struct rt6_info *)dst;
187 struct inet6_dev *idev = rt->rt6i_idev;
b3419363 188 struct inet_peer *peer = rt->rt6i_peer;
1da177e4
LT
189
190 if (idev != NULL) {
191 rt->rt6i_idev = NULL;
192 in6_dev_put(idev);
1ab1457c 193 }
b3419363
DM
194 if (peer) {
195 BUG_ON(!(rt->rt6i_flags & RTF_CACHE));
196 rt->rt6i_peer = NULL;
197 inet_putpeer(peer);
198 }
199}
200
201void rt6_bind_peer(struct rt6_info *rt, int create)
202{
203 struct inet_peer *peer;
204
205 if (WARN_ON(!(rt->rt6i_flags & RTF_CACHE)))
206 return;
207
208 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
209 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
210 inet_putpeer(peer);
1da177e4
LT
211}
212
213static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
214 int how)
215{
216 struct rt6_info *rt = (struct rt6_info *)dst;
217 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 218 struct net_device *loopback_dev =
c346dca1 219 dev_net(dev)->loopback_dev;
1da177e4 220
5a3e55d6
DL
221 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
222 struct inet6_dev *loopback_idev =
223 in6_dev_get(loopback_dev);
1da177e4
LT
224 if (loopback_idev != NULL) {
225 rt->rt6i_idev = loopback_idev;
226 in6_dev_put(idev);
227 }
228 }
229}
230
231static __inline__ int rt6_check_expired(const struct rt6_info *rt)
232{
a02cec21
ED
233 return (rt->rt6i_flags & RTF_EXPIRES) &&
234 time_after(jiffies, rt->rt6i_expires);
1da177e4
LT
235}
236
c71099ac
TG
237static inline int rt6_need_strict(struct in6_addr *daddr)
238{
a02cec21
ED
239 return ipv6_addr_type(daddr) &
240 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
241}
242
1da177e4 243/*
c71099ac 244 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
245 */
246
8ed67789
DL
247static inline struct rt6_info *rt6_device_match(struct net *net,
248 struct rt6_info *rt,
dd3abc4e 249 struct in6_addr *saddr,
1da177e4 250 int oif,
d420895e 251 int flags)
1da177e4
LT
252{
253 struct rt6_info *local = NULL;
254 struct rt6_info *sprt;
255
dd3abc4e
YH
256 if (!oif && ipv6_addr_any(saddr))
257 goto out;
258
d8d1f30b 259 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
dd3abc4e
YH
260 struct net_device *dev = sprt->rt6i_dev;
261
262 if (oif) {
1da177e4
LT
263 if (dev->ifindex == oif)
264 return sprt;
265 if (dev->flags & IFF_LOOPBACK) {
266 if (sprt->rt6i_idev == NULL ||
267 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 268 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 269 continue;
1ab1457c 270 if (local && (!oif ||
1da177e4
LT
271 local->rt6i_idev->dev->ifindex == oif))
272 continue;
273 }
274 local = sprt;
275 }
dd3abc4e
YH
276 } else {
277 if (ipv6_chk_addr(net, saddr, dev,
278 flags & RT6_LOOKUP_F_IFACE))
279 return sprt;
1da177e4 280 }
dd3abc4e 281 }
1da177e4 282
dd3abc4e 283 if (oif) {
1da177e4
LT
284 if (local)
285 return local;
286
d420895e 287 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 288 return net->ipv6.ip6_null_entry;
1da177e4 289 }
dd3abc4e 290out:
1da177e4
LT
291 return rt;
292}
293
27097255
YH
294#ifdef CONFIG_IPV6_ROUTER_PREF
295static void rt6_probe(struct rt6_info *rt)
296{
297 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
298 /*
299 * Okay, this does not seem to be appropriate
300 * for now, however, we need to check if it
301 * is really so; aka Router Reachability Probing.
302 *
303 * Router Reachability Probe MUST be rate-limited
304 * to no more than one per minute.
305 */
306 if (!neigh || (neigh->nud_state & NUD_VALID))
307 return;
308 read_lock_bh(&neigh->lock);
309 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 310 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
311 struct in6_addr mcaddr;
312 struct in6_addr *target;
313
314 neigh->updated = jiffies;
315 read_unlock_bh(&neigh->lock);
316
317 target = (struct in6_addr *)&neigh->primary_key;
318 addrconf_addr_solict_mult(target, &mcaddr);
319 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
320 } else
321 read_unlock_bh(&neigh->lock);
322}
323#else
324static inline void rt6_probe(struct rt6_info *rt)
325{
27097255
YH
326}
327#endif
328
1da177e4 329/*
554cfb7e 330 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 331 */
b6f99a21 332static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e
YH
333{
334 struct net_device *dev = rt->rt6i_dev;
161980f4 335 if (!oif || dev->ifindex == oif)
554cfb7e 336 return 2;
161980f4
DM
337 if ((dev->flags & IFF_LOOPBACK) &&
338 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
339 return 1;
340 return 0;
554cfb7e 341}
1da177e4 342
b6f99a21 343static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 344{
554cfb7e 345 struct neighbour *neigh = rt->rt6i_nexthop;
398bcbeb 346 int m;
4d0c5911
YH
347 if (rt->rt6i_flags & RTF_NONEXTHOP ||
348 !(rt->rt6i_flags & RTF_GATEWAY))
349 m = 1;
350 else if (neigh) {
554cfb7e
YH
351 read_lock_bh(&neigh->lock);
352 if (neigh->nud_state & NUD_VALID)
4d0c5911 353 m = 2;
398bcbeb
YH
354#ifdef CONFIG_IPV6_ROUTER_PREF
355 else if (neigh->nud_state & NUD_FAILED)
356 m = 0;
357#endif
358 else
ea73ee23 359 m = 1;
554cfb7e 360 read_unlock_bh(&neigh->lock);
398bcbeb
YH
361 } else
362 m = 0;
554cfb7e 363 return m;
1da177e4
LT
364}
365
554cfb7e
YH
366static int rt6_score_route(struct rt6_info *rt, int oif,
367 int strict)
1da177e4 368{
4d0c5911 369 int m, n;
1ab1457c 370
4d0c5911 371 m = rt6_check_dev(rt, oif);
77d16f45 372 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 373 return -1;
ebacaaa0
YH
374#ifdef CONFIG_IPV6_ROUTER_PREF
375 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
376#endif
4d0c5911 377 n = rt6_check_neigh(rt);
557e92ef 378 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
379 return -1;
380 return m;
381}
382
f11e6659
DM
383static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
384 int *mpri, struct rt6_info *match)
554cfb7e 385{
f11e6659
DM
386 int m;
387
388 if (rt6_check_expired(rt))
389 goto out;
390
391 m = rt6_score_route(rt, oif, strict);
392 if (m < 0)
393 goto out;
394
395 if (m > *mpri) {
396 if (strict & RT6_LOOKUP_F_REACHABLE)
397 rt6_probe(match);
398 *mpri = m;
399 match = rt;
400 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
401 rt6_probe(rt);
402 }
403
404out:
405 return match;
406}
407
408static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
409 struct rt6_info *rr_head,
410 u32 metric, int oif, int strict)
411{
412 struct rt6_info *rt, *match;
554cfb7e 413 int mpri = -1;
1da177e4 414
f11e6659
DM
415 match = NULL;
416 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 417 rt = rt->dst.rt6_next)
f11e6659
DM
418 match = find_match(rt, oif, strict, &mpri, match);
419 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 420 rt = rt->dst.rt6_next)
f11e6659 421 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 422
f11e6659
DM
423 return match;
424}
1da177e4 425
f11e6659
DM
426static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
427{
428 struct rt6_info *match, *rt0;
8ed67789 429 struct net *net;
1da177e4 430
f11e6659 431 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
0dc47877 432 __func__, fn->leaf, oif);
554cfb7e 433
f11e6659
DM
434 rt0 = fn->rr_ptr;
435 if (!rt0)
436 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 437
f11e6659 438 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 439
554cfb7e 440 if (!match &&
f11e6659 441 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 442 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 443
554cfb7e 444 /* no entries matched; do round-robin */
f11e6659
DM
445 if (!next || next->rt6i_metric != rt0->rt6i_metric)
446 next = fn->leaf;
447
448 if (next != rt0)
449 fn->rr_ptr = next;
1da177e4 450 }
1da177e4 451
f11e6659 452 RT6_TRACE("%s() => %p\n",
0dc47877 453 __func__, match);
1da177e4 454
c346dca1 455 net = dev_net(rt0->rt6i_dev);
a02cec21 456 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
457}
458
70ceb4f5
YH
459#ifdef CONFIG_IPV6_ROUTE_INFO
460int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
461 struct in6_addr *gwaddr)
462{
c346dca1 463 struct net *net = dev_net(dev);
70ceb4f5
YH
464 struct route_info *rinfo = (struct route_info *) opt;
465 struct in6_addr prefix_buf, *prefix;
466 unsigned int pref;
4bed72e4 467 unsigned long lifetime;
70ceb4f5
YH
468 struct rt6_info *rt;
469
470 if (len < sizeof(struct route_info)) {
471 return -EINVAL;
472 }
473
474 /* Sanity check for prefix_len and length */
475 if (rinfo->length > 3) {
476 return -EINVAL;
477 } else if (rinfo->prefix_len > 128) {
478 return -EINVAL;
479 } else if (rinfo->prefix_len > 64) {
480 if (rinfo->length < 2) {
481 return -EINVAL;
482 }
483 } else if (rinfo->prefix_len > 0) {
484 if (rinfo->length < 1) {
485 return -EINVAL;
486 }
487 }
488
489 pref = rinfo->route_pref;
490 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 491 return -EINVAL;
70ceb4f5 492
4bed72e4 493 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
494
495 if (rinfo->length == 3)
496 prefix = (struct in6_addr *)rinfo->prefix;
497 else {
498 /* this function is safe */
499 ipv6_addr_prefix(&prefix_buf,
500 (struct in6_addr *)rinfo->prefix,
501 rinfo->prefix_len);
502 prefix = &prefix_buf;
503 }
504
efa2cea0
DL
505 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
506 dev->ifindex);
70ceb4f5
YH
507
508 if (rt && !lifetime) {
e0a1ad73 509 ip6_del_rt(rt);
70ceb4f5
YH
510 rt = NULL;
511 }
512
513 if (!rt && lifetime)
efa2cea0 514 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
515 pref);
516 else if (rt)
517 rt->rt6i_flags = RTF_ROUTEINFO |
518 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
519
520 if (rt) {
4bed72e4 521 if (!addrconf_finite_timeout(lifetime)) {
70ceb4f5
YH
522 rt->rt6i_flags &= ~RTF_EXPIRES;
523 } else {
524 rt->rt6i_expires = jiffies + HZ * lifetime;
525 rt->rt6i_flags |= RTF_EXPIRES;
526 }
d8d1f30b 527 dst_release(&rt->dst);
70ceb4f5
YH
528 }
529 return 0;
530}
531#endif
532
8ed67789 533#define BACKTRACK(__net, saddr) \
982f56f3 534do { \
8ed67789 535 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 536 struct fib6_node *pn; \
e0eda7bb 537 while (1) { \
982f56f3
YH
538 if (fn->fn_flags & RTN_TL_ROOT) \
539 goto out; \
540 pn = fn->parent; \
541 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 542 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
543 else \
544 fn = pn; \
545 if (fn->fn_flags & RTN_RTINFO) \
546 goto restart; \
c71099ac 547 } \
c71099ac 548 } \
982f56f3 549} while(0)
c71099ac 550
8ed67789
DL
551static struct rt6_info *ip6_pol_route_lookup(struct net *net,
552 struct fib6_table *table,
c71099ac 553 struct flowi *fl, int flags)
1da177e4
LT
554{
555 struct fib6_node *fn;
556 struct rt6_info *rt;
557
c71099ac
TG
558 read_lock_bh(&table->tb6_lock);
559 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
560restart:
561 rt = fn->leaf;
dd3abc4e 562 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
8ed67789 563 BACKTRACK(net, &fl->fl6_src);
c71099ac 564out:
d8d1f30b 565 dst_use(&rt->dst, jiffies);
c71099ac 566 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
567 return rt;
568
569}
570
9acd9f3a
YH
571struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
572 const struct in6_addr *saddr, int oif, int strict)
c71099ac
TG
573{
574 struct flowi fl = {
575 .oif = oif,
5811662b 576 .fl6_dst = *daddr,
c71099ac
TG
577 };
578 struct dst_entry *dst;
77d16f45 579 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 580
adaa70bb
TG
581 if (saddr) {
582 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
583 flags |= RT6_LOOKUP_F_HAS_SADDR;
584 }
585
606a2b48 586 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
c71099ac
TG
587 if (dst->error == 0)
588 return (struct rt6_info *) dst;
589
590 dst_release(dst);
591
1da177e4
LT
592 return NULL;
593}
594
7159039a
YH
595EXPORT_SYMBOL(rt6_lookup);
596
c71099ac 597/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
598 It takes new route entry, the addition fails by any reason the
599 route is freed. In any case, if caller does not hold it, it may
600 be destroyed.
601 */
602
86872cb5 603static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
604{
605 int err;
c71099ac 606 struct fib6_table *table;
1da177e4 607
c71099ac
TG
608 table = rt->rt6i_table;
609 write_lock_bh(&table->tb6_lock);
86872cb5 610 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 611 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
612
613 return err;
614}
615
40e22e8f
TG
616int ip6_ins_rt(struct rt6_info *rt)
617{
4d1169c1 618 struct nl_info info = {
c346dca1 619 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 620 };
528c4ceb 621 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
622}
623
95a9a5ba
YH
624static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
625 struct in6_addr *saddr)
1da177e4 626{
1da177e4
LT
627 struct rt6_info *rt;
628
629 /*
630 * Clone the route.
631 */
632
633 rt = ip6_rt_copy(ort);
634
635 if (rt) {
14deae41
DM
636 struct neighbour *neigh;
637 int attempts = !in_softirq();
638
58c4fb86
YH
639 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
640 if (rt->rt6i_dst.plen != 128 &&
641 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
642 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 643 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 644 }
1da177e4 645
58c4fb86 646 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
647 rt->rt6i_dst.plen = 128;
648 rt->rt6i_flags |= RTF_CACHE;
d8d1f30b 649 rt->dst.flags |= DST_HOST;
1da177e4
LT
650
651#ifdef CONFIG_IPV6_SUBTREES
652 if (rt->rt6i_src.plen && saddr) {
653 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
654 rt->rt6i_src.plen = 128;
655 }
656#endif
657
14deae41
DM
658 retry:
659 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
660 if (IS_ERR(neigh)) {
661 struct net *net = dev_net(rt->rt6i_dev);
662 int saved_rt_min_interval =
663 net->ipv6.sysctl.ip6_rt_gc_min_interval;
664 int saved_rt_elasticity =
665 net->ipv6.sysctl.ip6_rt_gc_elasticity;
666
667 if (attempts-- > 0) {
668 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
669 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
670
86393e52 671 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
672
673 net->ipv6.sysctl.ip6_rt_gc_elasticity =
674 saved_rt_elasticity;
675 net->ipv6.sysctl.ip6_rt_gc_min_interval =
676 saved_rt_min_interval;
677 goto retry;
678 }
679
680 if (net_ratelimit())
681 printk(KERN_WARNING
7e1b33e5 682 "ipv6: Neighbour table overflow.\n");
d8d1f30b 683 dst_free(&rt->dst);
14deae41
DM
684 return NULL;
685 }
686 rt->rt6i_nexthop = neigh;
1da177e4 687
95a9a5ba 688 }
1da177e4 689
95a9a5ba
YH
690 return rt;
691}
1da177e4 692
299d9939
YH
693static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
694{
695 struct rt6_info *rt = ip6_rt_copy(ort);
696 if (rt) {
697 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
698 rt->rt6i_dst.plen = 128;
699 rt->rt6i_flags |= RTF_CACHE;
d8d1f30b 700 rt->dst.flags |= DST_HOST;
299d9939
YH
701 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
702 }
703 return rt;
704}
705
8ed67789
DL
706static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
707 struct flowi *fl, int flags)
1da177e4
LT
708{
709 struct fib6_node *fn;
519fbd87 710 struct rt6_info *rt, *nrt;
c71099ac 711 int strict = 0;
1da177e4 712 int attempts = 3;
519fbd87 713 int err;
53b7997f 714 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 715
77d16f45 716 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
717
718relookup:
c71099ac 719 read_lock_bh(&table->tb6_lock);
1da177e4 720
8238dd06 721restart_2:
c71099ac 722 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
723
724restart:
4acad72d 725 rt = rt6_select(fn, oif, strict | reachable);
8ed67789
DL
726
727 BACKTRACK(net, &fl->fl6_src);
728 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 729 rt->rt6i_flags & RTF_CACHE)
1ddef044 730 goto out;
1da177e4 731
d8d1f30b 732 dst_hold(&rt->dst);
c71099ac 733 read_unlock_bh(&table->tb6_lock);
fb9de91e 734
519fbd87 735 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
c71099ac 736 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
519fbd87
YH
737 else {
738#if CLONE_OFFLINK_ROUTE
c71099ac 739 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
519fbd87
YH
740#else
741 goto out2;
742#endif
743 }
e40cf353 744
d8d1f30b 745 dst_release(&rt->dst);
8ed67789 746 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 747
d8d1f30b 748 dst_hold(&rt->dst);
519fbd87 749 if (nrt) {
40e22e8f 750 err = ip6_ins_rt(nrt);
519fbd87 751 if (!err)
1da177e4 752 goto out2;
1da177e4 753 }
1da177e4 754
519fbd87
YH
755 if (--attempts <= 0)
756 goto out2;
757
758 /*
c71099ac 759 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
760 * released someone could insert this route. Relookup.
761 */
d8d1f30b 762 dst_release(&rt->dst);
519fbd87
YH
763 goto relookup;
764
765out:
8238dd06
YH
766 if (reachable) {
767 reachable = 0;
768 goto restart_2;
769 }
d8d1f30b 770 dst_hold(&rt->dst);
c71099ac 771 read_unlock_bh(&table->tb6_lock);
1da177e4 772out2:
d8d1f30b
CG
773 rt->dst.lastuse = jiffies;
774 rt->dst.__use++;
c71099ac
TG
775
776 return rt;
1da177e4
LT
777}
778
8ed67789 779static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4acad72d
PE
780 struct flowi *fl, int flags)
781{
8ed67789 782 return ip6_pol_route(net, table, fl->iif, fl, flags);
4acad72d
PE
783}
784
c71099ac
TG
785void ip6_route_input(struct sk_buff *skb)
786{
0660e03f 787 struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 788 struct net *net = dev_net(skb->dev);
adaa70bb 789 int flags = RT6_LOOKUP_F_HAS_SADDR;
c71099ac
TG
790 struct flowi fl = {
791 .iif = skb->dev->ifindex,
5811662b
CG
792 .fl6_dst = iph->daddr,
793 .fl6_src = iph->saddr,
794 .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
1ab1457c 795 .mark = skb->mark,
c71099ac
TG
796 .proto = iph->nexthdr,
797 };
adaa70bb 798
1d6e55f1 799 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
adaa70bb 800 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 801
adf30907 802 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
c71099ac
TG
803}
804
8ed67789 805static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
c71099ac 806 struct flowi *fl, int flags)
1da177e4 807{
8ed67789 808 return ip6_pol_route(net, table, fl->oif, fl, flags);
c71099ac
TG
809}
810
4591db4f
DL
811struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
812 struct flowi *fl)
c71099ac
TG
813{
814 int flags = 0;
815
6057fd78 816 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst))
77d16f45 817 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 818
adaa70bb
TG
819 if (!ipv6_addr_any(&fl->fl6_src))
820 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
821 else if (sk)
822 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 823
4591db4f 824 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
1da177e4
LT
825}
826
7159039a 827EXPORT_SYMBOL(ip6_route_output);
1da177e4 828
14e50e57
DM
829int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
830{
831 struct rt6_info *ort = (struct rt6_info *) *dstp;
832 struct rt6_info *rt = (struct rt6_info *)
833 dst_alloc(&ip6_dst_blackhole_ops);
834 struct dst_entry *new = NULL;
835
836 if (rt) {
d8d1f30b 837 new = &rt->dst;
14e50e57
DM
838
839 atomic_set(&new->__refcnt, 1);
840 new->__use = 1;
352e512c
HX
841 new->input = dst_discard;
842 new->output = dst_discard;
14e50e57 843
defb3519 844 dst_copy_metrics(new, &ort->dst);
d8d1f30b 845 new->dev = ort->dst.dev;
14e50e57
DM
846 if (new->dev)
847 dev_hold(new->dev);
848 rt->rt6i_idev = ort->rt6i_idev;
849 if (rt->rt6i_idev)
850 in6_dev_hold(rt->rt6i_idev);
851 rt->rt6i_expires = 0;
852
853 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
854 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
855 rt->rt6i_metric = 0;
856
857 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
858#ifdef CONFIG_IPV6_SUBTREES
859 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
860#endif
861
862 dst_free(new);
863 }
864
865 dst_release(*dstp);
866 *dstp = new;
a02cec21 867 return new ? 0 : -ENOMEM;
14e50e57
DM
868}
869EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
870
1da177e4
LT
871/*
872 * Destination cache support functions
873 */
874
875static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
876{
877 struct rt6_info *rt;
878
879 rt = (struct rt6_info *) dst;
880
10414444 881 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
1da177e4
LT
882 return dst;
883
884 return NULL;
885}
886
887static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
888{
889 struct rt6_info *rt = (struct rt6_info *) dst;
890
891 if (rt) {
54c1a859
YH
892 if (rt->rt6i_flags & RTF_CACHE) {
893 if (rt6_check_expired(rt)) {
894 ip6_del_rt(rt);
895 dst = NULL;
896 }
897 } else {
1da177e4 898 dst_release(dst);
54c1a859
YH
899 dst = NULL;
900 }
1da177e4 901 }
54c1a859 902 return dst;
1da177e4
LT
903}
904
905static void ip6_link_failure(struct sk_buff *skb)
906{
907 struct rt6_info *rt;
908
3ffe533c 909 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 910
adf30907 911 rt = (struct rt6_info *) skb_dst(skb);
1da177e4
LT
912 if (rt) {
913 if (rt->rt6i_flags&RTF_CACHE) {
d8d1f30b 914 dst_set_expires(&rt->dst, 0);
1da177e4
LT
915 rt->rt6i_flags |= RTF_EXPIRES;
916 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
917 rt->rt6i_node->fn_sernum = -1;
918 }
919}
920
921static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
922{
923 struct rt6_info *rt6 = (struct rt6_info*)dst;
924
925 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
926 rt6->rt6i_flags |= RTF_MODIFIED;
927 if (mtu < IPV6_MIN_MTU) {
defb3519 928 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 929 mtu = IPV6_MIN_MTU;
defb3519
DM
930 features |= RTAX_FEATURE_ALLFRAG;
931 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 932 }
defb3519 933 dst_metric_set(dst, RTAX_MTU, mtu);
8d71740c 934 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
1da177e4
LT
935 }
936}
937
1da177e4
LT
938static int ipv6_get_mtu(struct net_device *dev);
939
5578689a 940static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
1da177e4
LT
941{
942 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
943
5578689a
DL
944 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
945 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
946
947 /*
1ab1457c
YH
948 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
949 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
950 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
951 * rely only on pmtu discovery"
952 */
953 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
954 mtu = IPV6_MAXPLEN;
955 return mtu;
956}
957
3b00944c
YH
958static struct dst_entry *icmp6_dst_gc_list;
959static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 960
3b00944c 961struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 962 struct neighbour *neigh,
9acd9f3a 963 const struct in6_addr *addr)
1da177e4
LT
964{
965 struct rt6_info *rt;
966 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 967 struct net *net = dev_net(dev);
1da177e4
LT
968
969 if (unlikely(idev == NULL))
970 return NULL;
971
86393e52 972 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
973 if (unlikely(rt == NULL)) {
974 in6_dev_put(idev);
975 goto out;
976 }
977
978 dev_hold(dev);
979 if (neigh)
980 neigh_hold(neigh);
14deae41 981 else {
1da177e4 982 neigh = ndisc_get_neigh(dev, addr);
14deae41
DM
983 if (IS_ERR(neigh))
984 neigh = NULL;
985 }
1da177e4
LT
986
987 rt->rt6i_dev = dev;
988 rt->rt6i_idev = idev;
989 rt->rt6i_nexthop = neigh;
d8d1f30b 990 atomic_set(&rt->dst.__refcnt, 1);
defb3519
DM
991 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
992 dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(rt->rt6i_dev));
993 dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, dst_mtu(&rt->dst)));
d8d1f30b 994 rt->dst.output = ip6_output;
1da177e4
LT
995
996#if 0 /* there's no chance to use these for ndisc */
d8d1f30b 997 rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
1ab1457c 998 ? DST_HOST
1da177e4
LT
999 : 0;
1000 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1001 rt->rt6i_dst.plen = 128;
1002#endif
1003
3b00944c 1004 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1005 rt->dst.next = icmp6_dst_gc_list;
1006 icmp6_dst_gc_list = &rt->dst;
3b00944c 1007 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1008
5578689a 1009 fib6_force_start_gc(net);
1da177e4
LT
1010
1011out:
d8d1f30b 1012 return &rt->dst;
1da177e4
LT
1013}
1014
3d0f24a7 1015int icmp6_dst_gc(void)
1da177e4
LT
1016{
1017 struct dst_entry *dst, *next, **pprev;
3d0f24a7 1018 int more = 0;
1da177e4
LT
1019
1020 next = NULL;
5d0bbeeb 1021
3b00944c
YH
1022 spin_lock_bh(&icmp6_dst_lock);
1023 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1024
1da177e4
LT
1025 while ((dst = *pprev) != NULL) {
1026 if (!atomic_read(&dst->__refcnt)) {
1027 *pprev = dst->next;
1028 dst_free(dst);
1da177e4
LT
1029 } else {
1030 pprev = &dst->next;
3d0f24a7 1031 ++more;
1da177e4
LT
1032 }
1033 }
1034
3b00944c 1035 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1036
3d0f24a7 1037 return more;
1da177e4
LT
1038}
1039
1e493d19
DM
1040static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1041 void *arg)
1042{
1043 struct dst_entry *dst, **pprev;
1044
1045 spin_lock_bh(&icmp6_dst_lock);
1046 pprev = &icmp6_dst_gc_list;
1047 while ((dst = *pprev) != NULL) {
1048 struct rt6_info *rt = (struct rt6_info *) dst;
1049 if (func(rt, arg)) {
1050 *pprev = dst->next;
1051 dst_free(dst);
1052 } else {
1053 pprev = &dst->next;
1054 }
1055 }
1056 spin_unlock_bh(&icmp6_dst_lock);
1057}
1058
569d3645 1059static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1060{
1da177e4 1061 unsigned long now = jiffies;
86393e52 1062 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1063 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1064 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1065 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1066 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1067 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1068 int entries;
7019b78e 1069
fc66f95c 1070 entries = dst_entries_get_fast(ops);
7019b78e 1071 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1072 entries <= rt_max_size)
1da177e4
LT
1073 goto out;
1074
6891a346
BT
1075 net->ipv6.ip6_rt_gc_expire++;
1076 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1077 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1078 entries = dst_entries_get_slow(ops);
1079 if (entries < ops->gc_thresh)
7019b78e 1080 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1081out:
7019b78e 1082 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1083 return entries > rt_max_size;
1da177e4
LT
1084}
1085
1086/* Clean host part of a prefix. Not necessary in radix tree,
1087 but results in cleaner routing tables.
1088
1089 Remove it only when all the things will work!
1090 */
1091
1092static int ipv6_get_mtu(struct net_device *dev)
1093{
1094 int mtu = IPV6_MIN_MTU;
1095 struct inet6_dev *idev;
1096
c68f24cc
ED
1097 rcu_read_lock();
1098 idev = __in6_dev_get(dev);
1099 if (idev)
1da177e4 1100 mtu = idev->cnf.mtu6;
c68f24cc 1101 rcu_read_unlock();
1da177e4
LT
1102 return mtu;
1103}
1104
6b75d090 1105int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1106{
6b75d090
YH
1107 int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
1108 if (hoplimit < 0) {
1109 struct net_device *dev = dst->dev;
c68f24cc
ED
1110 struct inet6_dev *idev;
1111
1112 rcu_read_lock();
1113 idev = __in6_dev_get(dev);
1114 if (idev)
6b75d090 1115 hoplimit = idev->cnf.hop_limit;
c68f24cc 1116 else
53b7997f 1117 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1118 rcu_read_unlock();
1da177e4
LT
1119 }
1120 return hoplimit;
1121}
abbf46ae 1122EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1123
1124/*
1125 *
1126 */
1127
86872cb5 1128int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1129{
1130 int err;
5578689a 1131 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1132 struct rt6_info *rt = NULL;
1133 struct net_device *dev = NULL;
1134 struct inet6_dev *idev = NULL;
c71099ac 1135 struct fib6_table *table;
1da177e4
LT
1136 int addr_type;
1137
86872cb5 1138 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1139 return -EINVAL;
1140#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1141 if (cfg->fc_src_len)
1da177e4
LT
1142 return -EINVAL;
1143#endif
86872cb5 1144 if (cfg->fc_ifindex) {
1da177e4 1145 err = -ENODEV;
5578689a 1146 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1147 if (!dev)
1148 goto out;
1149 idev = in6_dev_get(dev);
1150 if (!idev)
1151 goto out;
1152 }
1153
86872cb5
TG
1154 if (cfg->fc_metric == 0)
1155 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1156
5578689a 1157 table = fib6_new_table(net, cfg->fc_table);
c71099ac
TG
1158 if (table == NULL) {
1159 err = -ENOBUFS;
1160 goto out;
1161 }
1162
86393e52 1163 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
1164
1165 if (rt == NULL) {
1166 err = -ENOMEM;
1167 goto out;
1168 }
1169
d8d1f30b 1170 rt->dst.obsolete = -1;
6f704992
YH
1171 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1172 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1173 0;
1da177e4 1174
86872cb5
TG
1175 if (cfg->fc_protocol == RTPROT_UNSPEC)
1176 cfg->fc_protocol = RTPROT_BOOT;
1177 rt->rt6i_protocol = cfg->fc_protocol;
1178
1179 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1180
1181 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1182 rt->dst.input = ip6_mc_input;
ab79ad14
1183 else if (cfg->fc_flags & RTF_LOCAL)
1184 rt->dst.input = ip6_input;
1da177e4 1185 else
d8d1f30b 1186 rt->dst.input = ip6_forward;
1da177e4 1187
d8d1f30b 1188 rt->dst.output = ip6_output;
1da177e4 1189
86872cb5
TG
1190 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1191 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1192 if (rt->rt6i_dst.plen == 128)
d8d1f30b 1193 rt->dst.flags = DST_HOST;
1da177e4
LT
1194
1195#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1196 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1197 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1198#endif
1199
86872cb5 1200 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1201
1202 /* We cannot add true routes via loopback here,
1203 they would result in kernel looping; promote them to reject routes
1204 */
86872cb5 1205 if ((cfg->fc_flags & RTF_REJECT) ||
ab79ad14
1206 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1207 && !(cfg->fc_flags&RTF_LOCAL))) {
1da177e4 1208 /* hold loopback dev/idev if we haven't done so. */
5578689a 1209 if (dev != net->loopback_dev) {
1da177e4
LT
1210 if (dev) {
1211 dev_put(dev);
1212 in6_dev_put(idev);
1213 }
5578689a 1214 dev = net->loopback_dev;
1da177e4
LT
1215 dev_hold(dev);
1216 idev = in6_dev_get(dev);
1217 if (!idev) {
1218 err = -ENODEV;
1219 goto out;
1220 }
1221 }
d8d1f30b
CG
1222 rt->dst.output = ip6_pkt_discard_out;
1223 rt->dst.input = ip6_pkt_discard;
1224 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1225 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1226 goto install_route;
1227 }
1228
86872cb5 1229 if (cfg->fc_flags & RTF_GATEWAY) {
1da177e4
LT
1230 struct in6_addr *gw_addr;
1231 int gwa_type;
1232
86872cb5
TG
1233 gw_addr = &cfg->fc_gateway;
1234 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1da177e4
LT
1235 gwa_type = ipv6_addr_type(gw_addr);
1236
1237 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1238 struct rt6_info *grt;
1239
1240 /* IPv6 strictly inhibits using not link-local
1241 addresses as nexthop address.
1242 Otherwise, router will not able to send redirects.
1243 It is very good, but in some (rare!) circumstances
1244 (SIT, PtP, NBMA NOARP links) it is handy to allow
1245 some exceptions. --ANK
1246 */
1247 err = -EINVAL;
1248 if (!(gwa_type&IPV6_ADDR_UNICAST))
1249 goto out;
1250
5578689a 1251 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1252
1253 err = -EHOSTUNREACH;
1254 if (grt == NULL)
1255 goto out;
1256 if (dev) {
1257 if (dev != grt->rt6i_dev) {
d8d1f30b 1258 dst_release(&grt->dst);
1da177e4
LT
1259 goto out;
1260 }
1261 } else {
1262 dev = grt->rt6i_dev;
1263 idev = grt->rt6i_idev;
1264 dev_hold(dev);
1265 in6_dev_hold(grt->rt6i_idev);
1266 }
1267 if (!(grt->rt6i_flags&RTF_GATEWAY))
1268 err = 0;
d8d1f30b 1269 dst_release(&grt->dst);
1da177e4
LT
1270
1271 if (err)
1272 goto out;
1273 }
1274 err = -EINVAL;
1275 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1276 goto out;
1277 }
1278
1279 err = -ENODEV;
1280 if (dev == NULL)
1281 goto out;
1282
86872cb5 1283 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1da177e4
LT
1284 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1285 if (IS_ERR(rt->rt6i_nexthop)) {
1286 err = PTR_ERR(rt->rt6i_nexthop);
1287 rt->rt6i_nexthop = NULL;
1288 goto out;
1289 }
1290 }
1291
86872cb5 1292 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1293
1294install_route:
86872cb5
TG
1295 if (cfg->fc_mx) {
1296 struct nlattr *nla;
1297 int remaining;
1298
1299 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1300 int type = nla_type(nla);
86872cb5
TG
1301
1302 if (type) {
1303 if (type > RTAX_MAX) {
1da177e4
LT
1304 err = -EINVAL;
1305 goto out;
1306 }
86872cb5 1307
defb3519 1308 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1309 }
1da177e4
LT
1310 }
1311 }
1312
d8d1f30b 1313 if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0)
defb3519 1314 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1);
d8d1f30b 1315 if (!dst_mtu(&rt->dst))
defb3519 1316 dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(dev));
d8d1f30b 1317 if (!dst_metric(&rt->dst, RTAX_ADVMSS))
defb3519 1318 dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, dst_mtu(&rt->dst)));
d8d1f30b 1319 rt->dst.dev = dev;
1da177e4 1320 rt->rt6i_idev = idev;
c71099ac 1321 rt->rt6i_table = table;
63152fc0 1322
c346dca1 1323 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1324
86872cb5 1325 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1326
1327out:
1328 if (dev)
1329 dev_put(dev);
1330 if (idev)
1331 in6_dev_put(idev);
1332 if (rt)
d8d1f30b 1333 dst_free(&rt->dst);
1da177e4
LT
1334 return err;
1335}
1336
86872cb5 1337static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1338{
1339 int err;
c71099ac 1340 struct fib6_table *table;
c346dca1 1341 struct net *net = dev_net(rt->rt6i_dev);
1da177e4 1342
8ed67789 1343 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1344 return -ENOENT;
1345
c71099ac
TG
1346 table = rt->rt6i_table;
1347 write_lock_bh(&table->tb6_lock);
1da177e4 1348
86872cb5 1349 err = fib6_del(rt, info);
d8d1f30b 1350 dst_release(&rt->dst);
1da177e4 1351
c71099ac 1352 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1353
1354 return err;
1355}
1356
e0a1ad73
TG
1357int ip6_del_rt(struct rt6_info *rt)
1358{
4d1169c1 1359 struct nl_info info = {
c346dca1 1360 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 1361 };
528c4ceb 1362 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1363}
1364
86872cb5 1365static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1366{
c71099ac 1367 struct fib6_table *table;
1da177e4
LT
1368 struct fib6_node *fn;
1369 struct rt6_info *rt;
1370 int err = -ESRCH;
1371
5578689a 1372 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
c71099ac
TG
1373 if (table == NULL)
1374 return err;
1375
1376 read_lock_bh(&table->tb6_lock);
1da177e4 1377
c71099ac 1378 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1379 &cfg->fc_dst, cfg->fc_dst_len,
1380 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1381
1da177e4 1382 if (fn) {
d8d1f30b 1383 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1384 if (cfg->fc_ifindex &&
1da177e4 1385 (rt->rt6i_dev == NULL ||
86872cb5 1386 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1387 continue;
86872cb5
TG
1388 if (cfg->fc_flags & RTF_GATEWAY &&
1389 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1390 continue;
86872cb5 1391 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1392 continue;
d8d1f30b 1393 dst_hold(&rt->dst);
c71099ac 1394 read_unlock_bh(&table->tb6_lock);
1da177e4 1395
86872cb5 1396 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1397 }
1398 }
c71099ac 1399 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1400
1401 return err;
1402}
1403
1404/*
1405 * Handle redirects
1406 */
a6279458
YH
1407struct ip6rd_flowi {
1408 struct flowi fl;
1409 struct in6_addr gateway;
1410};
1411
8ed67789
DL
1412static struct rt6_info *__ip6_route_redirect(struct net *net,
1413 struct fib6_table *table,
a6279458
YH
1414 struct flowi *fl,
1415 int flags)
1da177e4 1416{
a6279458
YH
1417 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1418 struct rt6_info *rt;
e843b9e1 1419 struct fib6_node *fn;
c71099ac 1420
1da177e4 1421 /*
e843b9e1
YH
1422 * Get the "current" route for this destination and
1423 * check if the redirect has come from approriate router.
1424 *
1425 * RFC 2461 specifies that redirects should only be
1426 * accepted if they come from the nexthop to the target.
1427 * Due to the way the routes are chosen, this notion
1428 * is a bit fuzzy and one might need to check all possible
1429 * routes.
1da177e4 1430 */
1da177e4 1431
c71099ac 1432 read_lock_bh(&table->tb6_lock);
a6279458 1433 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
e843b9e1 1434restart:
d8d1f30b 1435 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
e843b9e1
YH
1436 /*
1437 * Current route is on-link; redirect is always invalid.
1438 *
1439 * Seems, previous statement is not true. It could
1440 * be node, which looks for us as on-link (f.e. proxy ndisc)
1441 * But then router serving it might decide, that we should
1442 * know truth 8)8) --ANK (980726).
1443 */
1444 if (rt6_check_expired(rt))
1445 continue;
1446 if (!(rt->rt6i_flags & RTF_GATEWAY))
1447 continue;
a6279458 1448 if (fl->oif != rt->rt6i_dev->ifindex)
e843b9e1 1449 continue;
a6279458 1450 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1451 continue;
1452 break;
1453 }
a6279458 1454
cb15d9c2 1455 if (!rt)
8ed67789
DL
1456 rt = net->ipv6.ip6_null_entry;
1457 BACKTRACK(net, &fl->fl6_src);
cb15d9c2 1458out:
d8d1f30b 1459 dst_hold(&rt->dst);
a6279458 1460
c71099ac 1461 read_unlock_bh(&table->tb6_lock);
e843b9e1 1462
a6279458
YH
1463 return rt;
1464};
1465
1466static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1467 struct in6_addr *src,
1468 struct in6_addr *gateway,
1469 struct net_device *dev)
1470{
adaa70bb 1471 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1472 struct net *net = dev_net(dev);
a6279458
YH
1473 struct ip6rd_flowi rdfl = {
1474 .fl = {
1475 .oif = dev->ifindex,
5811662b
CG
1476 .fl6_dst = *dest,
1477 .fl6_src = *src,
a6279458 1478 },
a6279458 1479 };
adaa70bb 1480
86c36ce4
BH
1481 ipv6_addr_copy(&rdfl.gateway, gateway);
1482
adaa70bb
TG
1483 if (rt6_need_strict(dest))
1484 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1485
5578689a 1486 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
58f09b78 1487 flags, __ip6_route_redirect);
a6279458
YH
1488}
1489
1490void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1491 struct in6_addr *saddr,
1492 struct neighbour *neigh, u8 *lladdr, int on_link)
1493{
1494 struct rt6_info *rt, *nrt = NULL;
1495 struct netevent_redirect netevent;
c346dca1 1496 struct net *net = dev_net(neigh->dev);
a6279458
YH
1497
1498 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1499
8ed67789 1500 if (rt == net->ipv6.ip6_null_entry) {
1da177e4
LT
1501 if (net_ratelimit())
1502 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1503 "for redirect target\n");
a6279458 1504 goto out;
1da177e4
LT
1505 }
1506
1da177e4
LT
1507 /*
1508 * We have finally decided to accept it.
1509 */
1510
1ab1457c 1511 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1512 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1513 NEIGH_UPDATE_F_OVERRIDE|
1514 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1515 NEIGH_UPDATE_F_ISROUTER))
1516 );
1517
1518 /*
1519 * Redirect received -> path was valid.
1520 * Look, redirects are sent only in response to data packets,
1521 * so that this nexthop apparently is reachable. --ANK
1522 */
d8d1f30b 1523 dst_confirm(&rt->dst);
1da177e4
LT
1524
1525 /* Duplicate redirect: silently ignore. */
d8d1f30b 1526 if (neigh == rt->dst.neighbour)
1da177e4
LT
1527 goto out;
1528
1529 nrt = ip6_rt_copy(rt);
1530 if (nrt == NULL)
1531 goto out;
1532
1533 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1534 if (on_link)
1535 nrt->rt6i_flags &= ~RTF_GATEWAY;
1536
1537 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1538 nrt->rt6i_dst.plen = 128;
d8d1f30b 1539 nrt->dst.flags |= DST_HOST;
1da177e4
LT
1540
1541 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1542 nrt->rt6i_nexthop = neigh_clone(neigh);
1543 /* Reset pmtu, it may be better */
defb3519
DM
1544 dst_metric_set(&nrt->dst, RTAX_MTU, ipv6_get_mtu(neigh->dev));
1545 dst_metric_set(&nrt->dst, RTAX_ADVMSS, ipv6_advmss(dev_net(neigh->dev),
1546 dst_mtu(&nrt->dst)));
1da177e4 1547
40e22e8f 1548 if (ip6_ins_rt(nrt))
1da177e4
LT
1549 goto out;
1550
d8d1f30b
CG
1551 netevent.old = &rt->dst;
1552 netevent.new = &nrt->dst;
8d71740c
TT
1553 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1554
1da177e4 1555 if (rt->rt6i_flags&RTF_CACHE) {
e0a1ad73 1556 ip6_del_rt(rt);
1da177e4
LT
1557 return;
1558 }
1559
1560out:
d8d1f30b 1561 dst_release(&rt->dst);
1da177e4
LT
1562}
1563
1564/*
1565 * Handle ICMP "packet too big" messages
1566 * i.e. Path MTU discovery
1567 */
1568
ae878ae2
1569static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
1570 struct net *net, u32 pmtu, int ifindex)
1da177e4
LT
1571{
1572 struct rt6_info *rt, *nrt;
1573 int allfrag = 0;
1574
ae878ae2 1575 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1da177e4
LT
1576 if (rt == NULL)
1577 return;
1578
d8d1f30b 1579 if (pmtu >= dst_mtu(&rt->dst))
1da177e4
LT
1580 goto out;
1581
1582 if (pmtu < IPV6_MIN_MTU) {
1583 /*
1ab1457c 1584 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1585 * MTU (1280) and a fragment header should always be included
1586 * after a node receiving Too Big message reporting PMTU is
1587 * less than the IPv6 Minimum Link MTU.
1588 */
1589 pmtu = IPV6_MIN_MTU;
1590 allfrag = 1;
1591 }
1592
1593 /* New mtu received -> path was valid.
1594 They are sent only in response to data packets,
1595 so that this nexthop apparently is reachable. --ANK
1596 */
d8d1f30b 1597 dst_confirm(&rt->dst);
1da177e4
LT
1598
1599 /* Host route. If it is static, it would be better
1600 not to override it, but add new one, so that
1601 when cache entry will expire old pmtu
1602 would return automatically.
1603 */
1604 if (rt->rt6i_flags & RTF_CACHE) {
defb3519
DM
1605 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1606 if (allfrag) {
1607 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1608 features |= RTAX_FEATURE_ALLFRAG;
1609 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1610 }
d8d1f30b 1611 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1612 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1613 goto out;
1614 }
1615
1616 /* Network route.
1617 Two cases are possible:
1618 1. It is connected route. Action: COW
1619 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1620 */
d5315b50 1621 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1622 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1623 else
1624 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1625
d5315b50 1626 if (nrt) {
defb3519
DM
1627 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1628 if (allfrag) {
1629 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1630 features |= RTAX_FEATURE_ALLFRAG;
1631 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1632 }
a1e78363
YH
1633
1634 /* According to RFC 1981, detecting PMTU increase shouldn't be
1635 * happened within 5 mins, the recommended timer is 10 mins.
1636 * Here this route expiration time is set to ip6_rt_mtu_expires
1637 * which is 10 mins. After 10 mins the decreased pmtu is expired
1638 * and detecting PMTU increase will be automatically happened.
1639 */
d8d1f30b 1640 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1641 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1642
40e22e8f 1643 ip6_ins_rt(nrt);
1da177e4 1644 }
1da177e4 1645out:
d8d1f30b 1646 dst_release(&rt->dst);
1da177e4
LT
1647}
1648
ae878ae2
1649void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1650 struct net_device *dev, u32 pmtu)
1651{
1652 struct net *net = dev_net(dev);
1653
1654 /*
1655 * RFC 1981 states that a node "MUST reduce the size of the packets it
1656 * is sending along the path" that caused the Packet Too Big message.
1657 * Since it's not possible in the general case to determine which
1658 * interface was used to send the original packet, we update the MTU
1659 * on the interface that will be used to send future packets. We also
1660 * update the MTU on the interface that received the Packet Too Big in
1661 * case the original packet was forced out that interface with
1662 * SO_BINDTODEVICE or similar. This is the next best thing to the
1663 * correct behaviour, which would be to update the MTU on all
1664 * interfaces.
1665 */
1666 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1667 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1668}
1669
1da177e4
LT
1670/*
1671 * Misc support functions
1672 */
1673
1674static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1675{
c346dca1 1676 struct net *net = dev_net(ort->rt6i_dev);
86393e52 1677 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
1678
1679 if (rt) {
d8d1f30b
CG
1680 rt->dst.input = ort->dst.input;
1681 rt->dst.output = ort->dst.output;
1682
defb3519 1683 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b
CG
1684 rt->dst.error = ort->dst.error;
1685 rt->dst.dev = ort->dst.dev;
1686 if (rt->dst.dev)
1687 dev_hold(rt->dst.dev);
1da177e4
LT
1688 rt->rt6i_idev = ort->rt6i_idev;
1689 if (rt->rt6i_idev)
1690 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1691 rt->dst.lastuse = jiffies;
1da177e4
LT
1692 rt->rt6i_expires = 0;
1693
1694 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1695 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1696 rt->rt6i_metric = 0;
1697
1698 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1699#ifdef CONFIG_IPV6_SUBTREES
1700 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1701#endif
c71099ac 1702 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1703 }
1704 return rt;
1705}
1706
70ceb4f5 1707#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
1708static struct rt6_info *rt6_get_route_info(struct net *net,
1709 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1710 struct in6_addr *gwaddr, int ifindex)
1711{
1712 struct fib6_node *fn;
1713 struct rt6_info *rt = NULL;
c71099ac
TG
1714 struct fib6_table *table;
1715
efa2cea0 1716 table = fib6_get_table(net, RT6_TABLE_INFO);
c71099ac
TG
1717 if (table == NULL)
1718 return NULL;
70ceb4f5 1719
c71099ac
TG
1720 write_lock_bh(&table->tb6_lock);
1721 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1722 if (!fn)
1723 goto out;
1724
d8d1f30b 1725 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
70ceb4f5
YH
1726 if (rt->rt6i_dev->ifindex != ifindex)
1727 continue;
1728 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1729 continue;
1730 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1731 continue;
d8d1f30b 1732 dst_hold(&rt->dst);
70ceb4f5
YH
1733 break;
1734 }
1735out:
c71099ac 1736 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1737 return rt;
1738}
1739
efa2cea0
DL
1740static struct rt6_info *rt6_add_route_info(struct net *net,
1741 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1742 struct in6_addr *gwaddr, int ifindex,
1743 unsigned pref)
1744{
86872cb5
TG
1745 struct fib6_config cfg = {
1746 .fc_table = RT6_TABLE_INFO,
238fc7ea 1747 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1748 .fc_ifindex = ifindex,
1749 .fc_dst_len = prefixlen,
1750 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1751 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1752 .fc_nlinfo.pid = 0,
1753 .fc_nlinfo.nlh = NULL,
1754 .fc_nlinfo.nl_net = net,
86872cb5
TG
1755 };
1756
1757 ipv6_addr_copy(&cfg.fc_dst, prefix);
1758 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
70ceb4f5 1759
e317da96
YH
1760 /* We should treat it as a default route if prefix length is 0. */
1761 if (!prefixlen)
86872cb5 1762 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1763
86872cb5 1764 ip6_route_add(&cfg);
70ceb4f5 1765
efa2cea0 1766 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1767}
1768#endif
1769
1da177e4 1770struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1ab1457c 1771{
1da177e4 1772 struct rt6_info *rt;
c71099ac 1773 struct fib6_table *table;
1da177e4 1774
c346dca1 1775 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
c71099ac
TG
1776 if (table == NULL)
1777 return NULL;
1da177e4 1778
c71099ac 1779 write_lock_bh(&table->tb6_lock);
d8d1f30b 1780 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1da177e4 1781 if (dev == rt->rt6i_dev &&
045927ff 1782 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1783 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1784 break;
1785 }
1786 if (rt)
d8d1f30b 1787 dst_hold(&rt->dst);
c71099ac 1788 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1789 return rt;
1790}
1791
1792struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
ebacaaa0
YH
1793 struct net_device *dev,
1794 unsigned int pref)
1da177e4 1795{
86872cb5
TG
1796 struct fib6_config cfg = {
1797 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1798 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1799 .fc_ifindex = dev->ifindex,
1800 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1801 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1802 .fc_nlinfo.pid = 0,
1803 .fc_nlinfo.nlh = NULL,
c346dca1 1804 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1805 };
1da177e4 1806
86872cb5 1807 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1da177e4 1808
86872cb5 1809 ip6_route_add(&cfg);
1da177e4 1810
1da177e4
LT
1811 return rt6_get_dflt_router(gwaddr, dev);
1812}
1813
7b4da532 1814void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1815{
1816 struct rt6_info *rt;
c71099ac
TG
1817 struct fib6_table *table;
1818
1819 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1820 table = fib6_get_table(net, RT6_TABLE_DFLT);
c71099ac
TG
1821 if (table == NULL)
1822 return;
1da177e4
LT
1823
1824restart:
c71099ac 1825 read_lock_bh(&table->tb6_lock);
d8d1f30b 1826 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1827 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1828 dst_hold(&rt->dst);
c71099ac 1829 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1830 ip6_del_rt(rt);
1da177e4
LT
1831 goto restart;
1832 }
1833 }
c71099ac 1834 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1835}
1836
5578689a
DL
1837static void rtmsg_to_fib6_config(struct net *net,
1838 struct in6_rtmsg *rtmsg,
86872cb5
TG
1839 struct fib6_config *cfg)
1840{
1841 memset(cfg, 0, sizeof(*cfg));
1842
1843 cfg->fc_table = RT6_TABLE_MAIN;
1844 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1845 cfg->fc_metric = rtmsg->rtmsg_metric;
1846 cfg->fc_expires = rtmsg->rtmsg_info;
1847 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1848 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1849 cfg->fc_flags = rtmsg->rtmsg_flags;
1850
5578689a 1851 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1852
86872cb5
TG
1853 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1854 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1855 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1856}
1857
5578689a 1858int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1859{
86872cb5 1860 struct fib6_config cfg;
1da177e4
LT
1861 struct in6_rtmsg rtmsg;
1862 int err;
1863
1864 switch(cmd) {
1865 case SIOCADDRT: /* Add a route */
1866 case SIOCDELRT: /* Delete a route */
1867 if (!capable(CAP_NET_ADMIN))
1868 return -EPERM;
1869 err = copy_from_user(&rtmsg, arg,
1870 sizeof(struct in6_rtmsg));
1871 if (err)
1872 return -EFAULT;
86872cb5 1873
5578689a 1874 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1875
1da177e4
LT
1876 rtnl_lock();
1877 switch (cmd) {
1878 case SIOCADDRT:
86872cb5 1879 err = ip6_route_add(&cfg);
1da177e4
LT
1880 break;
1881 case SIOCDELRT:
86872cb5 1882 err = ip6_route_del(&cfg);
1da177e4
LT
1883 break;
1884 default:
1885 err = -EINVAL;
1886 }
1887 rtnl_unlock();
1888
1889 return err;
3ff50b79 1890 }
1da177e4
LT
1891
1892 return -EINVAL;
1893}
1894
1895/*
1896 * Drop the packet on the floor
1897 */
1898
d5fdd6ba 1899static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 1900{
612f09e8 1901 int type;
adf30907 1902 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
1903 switch (ipstats_mib_noroutes) {
1904 case IPSTATS_MIB_INNOROUTES:
0660e03f 1905 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 1906 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
1907 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1908 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
1909 break;
1910 }
1911 /* FALLTHROUGH */
1912 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
1913 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1914 ipstats_mib_noroutes);
612f09e8
YH
1915 break;
1916 }
3ffe533c 1917 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
1918 kfree_skb(skb);
1919 return 0;
1920}
1921
9ce8ade0
TG
1922static int ip6_pkt_discard(struct sk_buff *skb)
1923{
612f09e8 1924 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1925}
1926
20380731 1927static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 1928{
adf30907 1929 skb->dev = skb_dst(skb)->dev;
612f09e8 1930 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1931}
1932
6723ab54
DM
1933#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1934
9ce8ade0
TG
1935static int ip6_pkt_prohibit(struct sk_buff *skb)
1936{
612f09e8 1937 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1938}
1939
1940static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1941{
adf30907 1942 skb->dev = skb_dst(skb)->dev;
612f09e8 1943 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
1944}
1945
6723ab54
DM
1946#endif
1947
1da177e4
LT
1948/*
1949 * Allocate a dst for local (unicast / anycast) address.
1950 */
1951
1952struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1953 const struct in6_addr *addr,
1954 int anycast)
1955{
c346dca1 1956 struct net *net = dev_net(idev->dev);
86393e52 1957 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
14deae41 1958 struct neighbour *neigh;
1da177e4 1959
40385653
BG
1960 if (rt == NULL) {
1961 if (net_ratelimit())
1962 pr_warning("IPv6: Maximum number of routes reached,"
1963 " consider increasing route/max_size.\n");
1da177e4 1964 return ERR_PTR(-ENOMEM);
40385653 1965 }
1da177e4 1966
5578689a 1967 dev_hold(net->loopback_dev);
1da177e4
LT
1968 in6_dev_hold(idev);
1969
d8d1f30b
CG
1970 rt->dst.flags = DST_HOST;
1971 rt->dst.input = ip6_input;
1972 rt->dst.output = ip6_output;
5578689a 1973 rt->rt6i_dev = net->loopback_dev;
1da177e4 1974 rt->rt6i_idev = idev;
defb3519
DM
1975 dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(rt->rt6i_dev));
1976 dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, dst_mtu(&rt->dst)));
1977 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1);
d8d1f30b 1978 rt->dst.obsolete = -1;
1da177e4
LT
1979
1980 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
1981 if (anycast)
1982 rt->rt6i_flags |= RTF_ANYCAST;
1983 else
1da177e4 1984 rt->rt6i_flags |= RTF_LOCAL;
14deae41
DM
1985 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1986 if (IS_ERR(neigh)) {
d8d1f30b 1987 dst_free(&rt->dst);
14deae41
DM
1988
1989 /* We are casting this because that is the return
1990 * value type. But an errno encoded pointer is the
1991 * same regardless of the underlying pointer type,
1992 * and that's what we are returning. So this is OK.
1993 */
1994 return (struct rt6_info *) neigh;
1da177e4 1995 }
14deae41 1996 rt->rt6i_nexthop = neigh;
1da177e4
LT
1997
1998 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1999 rt->rt6i_dst.plen = 128;
5578689a 2000 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2001
d8d1f30b 2002 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2003
2004 return rt;
2005}
2006
8ed67789
DL
2007struct arg_dev_net {
2008 struct net_device *dev;
2009 struct net *net;
2010};
2011
1da177e4
LT
2012static int fib6_ifdown(struct rt6_info *rt, void *arg)
2013{
8ed67789
DL
2014 struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
2015 struct net *net = ((struct arg_dev_net *)arg)->net;
2016
2017 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2018 rt != net->ipv6.ip6_null_entry) {
1da177e4
LT
2019 RT6_TRACE("deleted by ifdown %p\n", rt);
2020 return -1;
2021 }
2022 return 0;
2023}
2024
f3db4851 2025void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2026{
8ed67789
DL
2027 struct arg_dev_net adn = {
2028 .dev = dev,
2029 .net = net,
2030 };
2031
2032 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2033 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2034}
2035
2036struct rt6_mtu_change_arg
2037{
2038 struct net_device *dev;
2039 unsigned mtu;
2040};
2041
2042static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2043{
2044 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2045 struct inet6_dev *idev;
c346dca1 2046 struct net *net = dev_net(arg->dev);
1da177e4
LT
2047
2048 /* In IPv6 pmtu discovery is not optional,
2049 so that RTAX_MTU lock cannot disable it.
2050 We still use this lock to block changes
2051 caused by addrconf/ndisc.
2052 */
2053
2054 idev = __in6_dev_get(arg->dev);
2055 if (idev == NULL)
2056 return 0;
2057
2058 /* For administrative MTU increase, there is no way to discover
2059 IPv6 PMTU increase, so PMTU increase should be updated here.
2060 Since RFC 1981 doesn't include administrative MTU increase
2061 update PMTU increase is a MUST. (i.e. jumbo frame)
2062 */
2063 /*
2064 If new MTU is less than route PMTU, this new MTU will be the
2065 lowest MTU in the path, update the route PMTU to reflect PMTU
2066 decreases; if new MTU is greater than route PMTU, and the
2067 old MTU is the lowest MTU in the path, update the route PMTU
2068 to reflect the increase. In this case if the other nodes' MTU
2069 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2070 PMTU discouvery.
2071 */
2072 if (rt->rt6i_dev == arg->dev &&
d8d1f30b
CG
2073 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2074 (dst_mtu(&rt->dst) >= arg->mtu ||
2075 (dst_mtu(&rt->dst) < arg->mtu &&
2076 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519
DM
2077 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2078 dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, arg->mtu));
566cfd8f 2079 }
1da177e4
LT
2080 return 0;
2081}
2082
2083void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2084{
c71099ac
TG
2085 struct rt6_mtu_change_arg arg = {
2086 .dev = dev,
2087 .mtu = mtu,
2088 };
1da177e4 2089
c346dca1 2090 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2091}
2092
ef7c79ed 2093static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2094 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2095 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2096 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2097 [RTA_PRIORITY] = { .type = NLA_U32 },
2098 [RTA_METRICS] = { .type = NLA_NESTED },
2099};
2100
2101static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2102 struct fib6_config *cfg)
1da177e4 2103{
86872cb5
TG
2104 struct rtmsg *rtm;
2105 struct nlattr *tb[RTA_MAX+1];
2106 int err;
1da177e4 2107
86872cb5
TG
2108 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2109 if (err < 0)
2110 goto errout;
1da177e4 2111
86872cb5
TG
2112 err = -EINVAL;
2113 rtm = nlmsg_data(nlh);
2114 memset(cfg, 0, sizeof(*cfg));
2115
2116 cfg->fc_table = rtm->rtm_table;
2117 cfg->fc_dst_len = rtm->rtm_dst_len;
2118 cfg->fc_src_len = rtm->rtm_src_len;
2119 cfg->fc_flags = RTF_UP;
2120 cfg->fc_protocol = rtm->rtm_protocol;
2121
2122 if (rtm->rtm_type == RTN_UNREACHABLE)
2123 cfg->fc_flags |= RTF_REJECT;
2124
ab79ad14
2125 if (rtm->rtm_type == RTN_LOCAL)
2126 cfg->fc_flags |= RTF_LOCAL;
2127
86872cb5
TG
2128 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2129 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2130 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2131
2132 if (tb[RTA_GATEWAY]) {
2133 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2134 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2135 }
86872cb5
TG
2136
2137 if (tb[RTA_DST]) {
2138 int plen = (rtm->rtm_dst_len + 7) >> 3;
2139
2140 if (nla_len(tb[RTA_DST]) < plen)
2141 goto errout;
2142
2143 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2144 }
86872cb5
TG
2145
2146 if (tb[RTA_SRC]) {
2147 int plen = (rtm->rtm_src_len + 7) >> 3;
2148
2149 if (nla_len(tb[RTA_SRC]) < plen)
2150 goto errout;
2151
2152 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2153 }
86872cb5
TG
2154
2155 if (tb[RTA_OIF])
2156 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2157
2158 if (tb[RTA_PRIORITY])
2159 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2160
2161 if (tb[RTA_METRICS]) {
2162 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2163 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2164 }
86872cb5
TG
2165
2166 if (tb[RTA_TABLE])
2167 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2168
2169 err = 0;
2170errout:
2171 return err;
1da177e4
LT
2172}
2173
c127ea2c 2174static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2175{
86872cb5
TG
2176 struct fib6_config cfg;
2177 int err;
1da177e4 2178
86872cb5
TG
2179 err = rtm_to_fib6_config(skb, nlh, &cfg);
2180 if (err < 0)
2181 return err;
2182
2183 return ip6_route_del(&cfg);
1da177e4
LT
2184}
2185
c127ea2c 2186static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2187{
86872cb5
TG
2188 struct fib6_config cfg;
2189 int err;
1da177e4 2190
86872cb5
TG
2191 err = rtm_to_fib6_config(skb, nlh, &cfg);
2192 if (err < 0)
2193 return err;
2194
2195 return ip6_route_add(&cfg);
1da177e4
LT
2196}
2197
339bf98f
TG
2198static inline size_t rt6_nlmsg_size(void)
2199{
2200 return NLMSG_ALIGN(sizeof(struct rtmsg))
2201 + nla_total_size(16) /* RTA_SRC */
2202 + nla_total_size(16) /* RTA_DST */
2203 + nla_total_size(16) /* RTA_GATEWAY */
2204 + nla_total_size(16) /* RTA_PREFSRC */
2205 + nla_total_size(4) /* RTA_TABLE */
2206 + nla_total_size(4) /* RTA_IIF */
2207 + nla_total_size(4) /* RTA_OIF */
2208 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2209 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2210 + nla_total_size(sizeof(struct rta_cacheinfo));
2211}
2212
191cd582
BH
2213static int rt6_fill_node(struct net *net,
2214 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2215 struct in6_addr *dst, struct in6_addr *src,
2216 int iif, int type, u32 pid, u32 seq,
7bc570c8 2217 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2218{
2219 struct rtmsg *rtm;
2d7202bf 2220 struct nlmsghdr *nlh;
e3703b3d 2221 long expires;
9e762a4a 2222 u32 table;
1da177e4
LT
2223
2224 if (prefix) { /* user wants prefix routes only */
2225 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2226 /* success since this is not a prefix route */
2227 return 1;
2228 }
2229 }
2230
2d7202bf
TG
2231 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2232 if (nlh == NULL)
26932566 2233 return -EMSGSIZE;
2d7202bf
TG
2234
2235 rtm = nlmsg_data(nlh);
1da177e4
LT
2236 rtm->rtm_family = AF_INET6;
2237 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2238 rtm->rtm_src_len = rt->rt6i_src.plen;
2239 rtm->rtm_tos = 0;
c71099ac 2240 if (rt->rt6i_table)
9e762a4a 2241 table = rt->rt6i_table->tb6_id;
c71099ac 2242 else
9e762a4a
PM
2243 table = RT6_TABLE_UNSPEC;
2244 rtm->rtm_table = table;
2d7202bf 2245 NLA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
2246 if (rt->rt6i_flags&RTF_REJECT)
2247 rtm->rtm_type = RTN_UNREACHABLE;
ab79ad14
2248 else if (rt->rt6i_flags&RTF_LOCAL)
2249 rtm->rtm_type = RTN_LOCAL;
1da177e4
LT
2250 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2251 rtm->rtm_type = RTN_LOCAL;
2252 else
2253 rtm->rtm_type = RTN_UNICAST;
2254 rtm->rtm_flags = 0;
2255 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2256 rtm->rtm_protocol = rt->rt6i_protocol;
2257 if (rt->rt6i_flags&RTF_DYNAMIC)
2258 rtm->rtm_protocol = RTPROT_REDIRECT;
2259 else if (rt->rt6i_flags & RTF_ADDRCONF)
2260 rtm->rtm_protocol = RTPROT_KERNEL;
2261 else if (rt->rt6i_flags&RTF_DEFAULT)
2262 rtm->rtm_protocol = RTPROT_RA;
2263
2264 if (rt->rt6i_flags&RTF_CACHE)
2265 rtm->rtm_flags |= RTM_F_CLONED;
2266
2267 if (dst) {
2d7202bf 2268 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2269 rtm->rtm_dst_len = 128;
1da177e4 2270 } else if (rtm->rtm_dst_len)
2d7202bf 2271 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2272#ifdef CONFIG_IPV6_SUBTREES
2273 if (src) {
2d7202bf 2274 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2275 rtm->rtm_src_len = 128;
1da177e4 2276 } else if (rtm->rtm_src_len)
2d7202bf 2277 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4 2278#endif
7bc570c8
YH
2279 if (iif) {
2280#ifdef CONFIG_IPV6_MROUTE
2281 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2282 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2283 if (err <= 0) {
2284 if (!nowait) {
2285 if (err == 0)
2286 return 0;
2287 goto nla_put_failure;
2288 } else {
2289 if (err == -EMSGSIZE)
2290 goto nla_put_failure;
2291 }
2292 }
2293 } else
2294#endif
2295 NLA_PUT_U32(skb, RTA_IIF, iif);
2296 } else if (dst) {
d8d1f30b 2297 struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
1da177e4 2298 struct in6_addr saddr_buf;
191cd582 2299 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
7cbca67c 2300 dst, 0, &saddr_buf) == 0)
2d7202bf 2301 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2302 }
2d7202bf 2303
defb3519 2304 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2305 goto nla_put_failure;
2306
d8d1f30b
CG
2307 if (rt->dst.neighbour)
2308 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
2d7202bf 2309
d8d1f30b 2310 if (rt->dst.dev)
2d7202bf
TG
2311 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2312
2313 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d 2314
36e3deae
YH
2315 if (!(rt->rt6i_flags & RTF_EXPIRES))
2316 expires = 0;
2317 else if (rt->rt6i_expires - jiffies < INT_MAX)
2318 expires = rt->rt6i_expires - jiffies;
2319 else
2320 expires = INT_MAX;
69cdf8f9 2321
d8d1f30b
CG
2322 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2323 expires, rt->dst.error) < 0)
e3703b3d 2324 goto nla_put_failure;
2d7202bf
TG
2325
2326 return nlmsg_end(skb, nlh);
2327
2328nla_put_failure:
26932566
PM
2329 nlmsg_cancel(skb, nlh);
2330 return -EMSGSIZE;
1da177e4
LT
2331}
2332
1b43af54 2333int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2334{
2335 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2336 int prefix;
2337
2d7202bf
TG
2338 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2339 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2340 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2341 } else
2342 prefix = 0;
2343
191cd582
BH
2344 return rt6_fill_node(arg->net,
2345 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2346 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2347 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2348}
2349
c127ea2c 2350static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2351{
3b1e0a65 2352 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2353 struct nlattr *tb[RTA_MAX+1];
2354 struct rt6_info *rt;
1da177e4 2355 struct sk_buff *skb;
ab364a6f 2356 struct rtmsg *rtm;
1da177e4 2357 struct flowi fl;
ab364a6f 2358 int err, iif = 0;
1da177e4 2359
ab364a6f
TG
2360 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2361 if (err < 0)
2362 goto errout;
1da177e4 2363
ab364a6f 2364 err = -EINVAL;
1da177e4 2365 memset(&fl, 0, sizeof(fl));
1da177e4 2366
ab364a6f
TG
2367 if (tb[RTA_SRC]) {
2368 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2369 goto errout;
2370
2371 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2372 }
2373
2374 if (tb[RTA_DST]) {
2375 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2376 goto errout;
2377
2378 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2379 }
2380
2381 if (tb[RTA_IIF])
2382 iif = nla_get_u32(tb[RTA_IIF]);
2383
2384 if (tb[RTA_OIF])
2385 fl.oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2386
2387 if (iif) {
2388 struct net_device *dev;
5578689a 2389 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2390 if (!dev) {
2391 err = -ENODEV;
ab364a6f 2392 goto errout;
1da177e4
LT
2393 }
2394 }
2395
ab364a6f
TG
2396 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2397 if (skb == NULL) {
2398 err = -ENOBUFS;
2399 goto errout;
2400 }
1da177e4 2401
ab364a6f
TG
2402 /* Reserve room for dummy headers, this skb can pass
2403 through good chunk of routing engine.
2404 */
459a98ed 2405 skb_reset_mac_header(skb);
ab364a6f 2406 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2407
8a3edd80 2408 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
d8d1f30b 2409 skb_dst_set(skb, &rt->dst);
1da177e4 2410
191cd582 2411 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
1da177e4 2412 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2413 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2414 if (err < 0) {
ab364a6f
TG
2415 kfree_skb(skb);
2416 goto errout;
1da177e4
LT
2417 }
2418
5578689a 2419 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2420errout:
1da177e4 2421 return err;
1da177e4
LT
2422}
2423
86872cb5 2424void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2425{
2426 struct sk_buff *skb;
5578689a 2427 struct net *net = info->nl_net;
528c4ceb
DL
2428 u32 seq;
2429 int err;
2430
2431 err = -ENOBUFS;
2432 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
86872cb5 2433
339bf98f 2434 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
21713ebc
TG
2435 if (skb == NULL)
2436 goto errout;
2437
191cd582 2438 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2439 event, info->pid, seq, 0, 0, 0);
26932566
PM
2440 if (err < 0) {
2441 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2442 WARN_ON(err == -EMSGSIZE);
2443 kfree_skb(skb);
2444 goto errout;
2445 }
1ce85fe4
PNA
2446 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2447 info->nlh, gfp_any());
2448 return;
21713ebc
TG
2449errout:
2450 if (err < 0)
5578689a 2451 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2452}
2453
8ed67789
DL
2454static int ip6_route_dev_notify(struct notifier_block *this,
2455 unsigned long event, void *data)
2456{
2457 struct net_device *dev = (struct net_device *)data;
c346dca1 2458 struct net *net = dev_net(dev);
8ed67789
DL
2459
2460 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2461 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2462 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2463#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2464 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2465 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2466 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2467 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2468#endif
2469 }
2470
2471 return NOTIFY_OK;
2472}
2473
1da177e4
LT
2474/*
2475 * /proc
2476 */
2477
2478#ifdef CONFIG_PROC_FS
2479
1da177e4
LT
2480struct rt6_proc_arg
2481{
2482 char *buffer;
2483 int offset;
2484 int length;
2485 int skip;
2486 int len;
2487};
2488
2489static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2490{
33120b30 2491 struct seq_file *m = p_arg;
1da177e4 2492
4b7a4274 2493 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2494
2495#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2496 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2497#else
33120b30 2498 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4
LT
2499#endif
2500
2501 if (rt->rt6i_nexthop) {
4b7a4274 2502 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
1da177e4 2503 } else {
33120b30 2504 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2505 }
33120b30 2506 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2507 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2508 rt->dst.__use, rt->rt6i_flags,
33120b30 2509 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1da177e4
LT
2510 return 0;
2511}
2512
33120b30 2513static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2514{
f3db4851
DL
2515 struct net *net = (struct net *)m->private;
2516 fib6_clean_all(net, rt6_info_route, 0, m);
33120b30
AD
2517 return 0;
2518}
1da177e4 2519
33120b30
AD
2520static int ipv6_route_open(struct inode *inode, struct file *file)
2521{
de05c557 2522 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2523}
2524
33120b30
AD
2525static const struct file_operations ipv6_route_proc_fops = {
2526 .owner = THIS_MODULE,
2527 .open = ipv6_route_open,
2528 .read = seq_read,
2529 .llseek = seq_lseek,
b6fcbdb4 2530 .release = single_release_net,
33120b30
AD
2531};
2532
1da177e4
LT
2533static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2534{
69ddb805 2535 struct net *net = (struct net *)seq->private;
1da177e4 2536 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2537 net->ipv6.rt6_stats->fib_nodes,
2538 net->ipv6.rt6_stats->fib_route_nodes,
2539 net->ipv6.rt6_stats->fib_rt_alloc,
2540 net->ipv6.rt6_stats->fib_rt_entries,
2541 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2542 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2543 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2544
2545 return 0;
2546}
2547
2548static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2549{
de05c557 2550 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2551}
2552
9a32144e 2553static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2554 .owner = THIS_MODULE,
2555 .open = rt6_stats_seq_open,
2556 .read = seq_read,
2557 .llseek = seq_lseek,
b6fcbdb4 2558 .release = single_release_net,
1da177e4
LT
2559};
2560#endif /* CONFIG_PROC_FS */
2561
2562#ifdef CONFIG_SYSCTL
2563
1da177e4 2564static
8d65af78 2565int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2566 void __user *buffer, size_t *lenp, loff_t *ppos)
2567{
5b7c931d
DL
2568 struct net *net = current->nsproxy->net_ns;
2569 int delay = net->ipv6.sysctl.flush_delay;
1da177e4 2570 if (write) {
8d65af78 2571 proc_dointvec(ctl, write, buffer, lenp, ppos);
5b7c931d 2572 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
1da177e4
LT
2573 return 0;
2574 } else
2575 return -EINVAL;
2576}
2577
760f2d01 2578ctl_table ipv6_route_table_template[] = {
1ab1457c 2579 {
1da177e4 2580 .procname = "flush",
4990509f 2581 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2582 .maxlen = sizeof(int),
89c8b3a1 2583 .mode = 0200,
6d9f239a 2584 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2585 },
2586 {
1da177e4 2587 .procname = "gc_thresh",
9a7ec3a9 2588 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2589 .maxlen = sizeof(int),
2590 .mode = 0644,
6d9f239a 2591 .proc_handler = proc_dointvec,
1da177e4
LT
2592 },
2593 {
1da177e4 2594 .procname = "max_size",
4990509f 2595 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2596 .maxlen = sizeof(int),
2597 .mode = 0644,
6d9f239a 2598 .proc_handler = proc_dointvec,
1da177e4
LT
2599 },
2600 {
1da177e4 2601 .procname = "gc_min_interval",
4990509f 2602 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2603 .maxlen = sizeof(int),
2604 .mode = 0644,
6d9f239a 2605 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2606 },
2607 {
1da177e4 2608 .procname = "gc_timeout",
4990509f 2609 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2610 .maxlen = sizeof(int),
2611 .mode = 0644,
6d9f239a 2612 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2613 },
2614 {
1da177e4 2615 .procname = "gc_interval",
4990509f 2616 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2617 .maxlen = sizeof(int),
2618 .mode = 0644,
6d9f239a 2619 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2620 },
2621 {
1da177e4 2622 .procname = "gc_elasticity",
4990509f 2623 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2624 .maxlen = sizeof(int),
2625 .mode = 0644,
f3d3f616 2626 .proc_handler = proc_dointvec,
1da177e4
LT
2627 },
2628 {
1da177e4 2629 .procname = "mtu_expires",
4990509f 2630 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2631 .maxlen = sizeof(int),
2632 .mode = 0644,
6d9f239a 2633 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2634 },
2635 {
1da177e4 2636 .procname = "min_adv_mss",
4990509f 2637 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2638 .maxlen = sizeof(int),
2639 .mode = 0644,
f3d3f616 2640 .proc_handler = proc_dointvec,
1da177e4
LT
2641 },
2642 {
1da177e4 2643 .procname = "gc_min_interval_ms",
4990509f 2644 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2645 .maxlen = sizeof(int),
2646 .mode = 0644,
6d9f239a 2647 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2648 },
f8572d8f 2649 { }
1da177e4
LT
2650};
2651
2c8c1e72 2652struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2653{
2654 struct ctl_table *table;
2655
2656 table = kmemdup(ipv6_route_table_template,
2657 sizeof(ipv6_route_table_template),
2658 GFP_KERNEL);
5ee09105
YH
2659
2660 if (table) {
2661 table[0].data = &net->ipv6.sysctl.flush_delay;
86393e52 2662 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2663 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2664 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2665 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2666 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2667 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2668 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2669 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2670 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2671 }
2672
760f2d01
DL
2673 return table;
2674}
1da177e4
LT
2675#endif
2676
2c8c1e72 2677static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2678{
633d424b 2679 int ret = -ENOMEM;
8ed67789 2680
86393e52
AD
2681 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2682 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2683
fc66f95c
ED
2684 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2685 goto out_ip6_dst_ops;
2686
8ed67789
DL
2687 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2688 sizeof(*net->ipv6.ip6_null_entry),
2689 GFP_KERNEL);
2690 if (!net->ipv6.ip6_null_entry)
fc66f95c 2691 goto out_ip6_dst_entries;
d8d1f30b 2692 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2693 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2694 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
defb3519 2695 dst_metric_set(&net->ipv6.ip6_null_entry->dst, RTAX_HOPLIMIT, 255);
8ed67789
DL
2696
2697#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2698 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2699 sizeof(*net->ipv6.ip6_prohibit_entry),
2700 GFP_KERNEL);
68fffc67
PZ
2701 if (!net->ipv6.ip6_prohibit_entry)
2702 goto out_ip6_null_entry;
d8d1f30b 2703 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2704 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2705 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
defb3519 2706 dst_metric_set(&net->ipv6.ip6_prohibit_entry->dst, RTAX_HOPLIMIT, 255);
8ed67789
DL
2707
2708 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2709 sizeof(*net->ipv6.ip6_blk_hole_entry),
2710 GFP_KERNEL);
68fffc67
PZ
2711 if (!net->ipv6.ip6_blk_hole_entry)
2712 goto out_ip6_prohibit_entry;
d8d1f30b 2713 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2714 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2715 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
defb3519 2716 dst_metric_set(&net->ipv6.ip6_blk_hole_entry->dst, RTAX_HOPLIMIT, 255);
8ed67789
DL
2717#endif
2718
b339a47c
PZ
2719 net->ipv6.sysctl.flush_delay = 0;
2720 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2721 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2722 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2723 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2724 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2725 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2726 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2727
cdb18761
DL
2728#ifdef CONFIG_PROC_FS
2729 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2730 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2731#endif
6891a346
BT
2732 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2733
8ed67789
DL
2734 ret = 0;
2735out:
2736 return ret;
f2fc6a54 2737
68fffc67
PZ
2738#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2739out_ip6_prohibit_entry:
2740 kfree(net->ipv6.ip6_prohibit_entry);
2741out_ip6_null_entry:
2742 kfree(net->ipv6.ip6_null_entry);
2743#endif
fc66f95c
ED
2744out_ip6_dst_entries:
2745 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2746out_ip6_dst_ops:
f2fc6a54 2747 goto out;
cdb18761
DL
2748}
2749
2c8c1e72 2750static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761
DL
2751{
2752#ifdef CONFIG_PROC_FS
2753 proc_net_remove(net, "ipv6_route");
2754 proc_net_remove(net, "rt6_stats");
2755#endif
8ed67789
DL
2756 kfree(net->ipv6.ip6_null_entry);
2757#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2758 kfree(net->ipv6.ip6_prohibit_entry);
2759 kfree(net->ipv6.ip6_blk_hole_entry);
2760#endif
41bb78b4 2761 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2762}
2763
2764static struct pernet_operations ip6_route_net_ops = {
2765 .init = ip6_route_net_init,
2766 .exit = ip6_route_net_exit,
2767};
2768
8ed67789
DL
2769static struct notifier_block ip6_route_dev_notifier = {
2770 .notifier_call = ip6_route_dev_notify,
2771 .priority = 0,
2772};
2773
433d49c3 2774int __init ip6_route_init(void)
1da177e4 2775{
433d49c3
DL
2776 int ret;
2777
9a7ec3a9
DL
2778 ret = -ENOMEM;
2779 ip6_dst_ops_template.kmem_cachep =
e5d679f3 2780 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 2781 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 2782 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 2783 goto out;
14e50e57 2784
fc66f95c 2785 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 2786 if (ret)
bdb3289f 2787 goto out_kmem_cache;
bdb3289f 2788
fc66f95c
ED
2789 ret = register_pernet_subsys(&ip6_route_net_ops);
2790 if (ret)
2791 goto out_dst_entries;
2792
5dc121e9
AE
2793 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2794
8ed67789
DL
2795 /* Registering of the loopback is done before this portion of code,
2796 * the loopback reference in rt6_info will not be taken, do it
2797 * manually for init_net */
d8d1f30b 2798 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2799 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2800 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2801 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 2802 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 2803 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2804 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2805 #endif
433d49c3
DL
2806 ret = fib6_init();
2807 if (ret)
8ed67789 2808 goto out_register_subsys;
433d49c3 2809
433d49c3
DL
2810 ret = xfrm6_init();
2811 if (ret)
cdb18761 2812 goto out_fib6_init;
c35b7e72 2813
433d49c3
DL
2814 ret = fib6_rules_init();
2815 if (ret)
2816 goto xfrm6_init;
7e5449c2 2817
433d49c3
DL
2818 ret = -ENOBUFS;
2819 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2820 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2821 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2822 goto fib6_rules_init;
c127ea2c 2823
8ed67789 2824 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
2825 if (ret)
2826 goto fib6_rules_init;
8ed67789 2827
433d49c3
DL
2828out:
2829 return ret;
2830
2831fib6_rules_init:
433d49c3
DL
2832 fib6_rules_cleanup();
2833xfrm6_init:
433d49c3 2834 xfrm6_fini();
433d49c3 2835out_fib6_init:
433d49c3 2836 fib6_gc_cleanup();
8ed67789
DL
2837out_register_subsys:
2838 unregister_pernet_subsys(&ip6_route_net_ops);
fc66f95c
ED
2839out_dst_entries:
2840 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 2841out_kmem_cache:
f2fc6a54 2842 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 2843 goto out;
1da177e4
LT
2844}
2845
2846void ip6_route_cleanup(void)
2847{
8ed67789 2848 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 2849 fib6_rules_cleanup();
1da177e4 2850 xfrm6_fini();
1da177e4 2851 fib6_gc_cleanup();
8ed67789 2852 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 2853 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 2854 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 2855}
This page took 0.898569 seconds and 5 git commands to generate.