ipv6: Stop rt6_info from using inet_peer's metrics
[deliverable/linux.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
457c4cbc 47#include <net/net_namespace.h>
1da177e4
LT
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
57#include <net/xfrm.h>
8d71740c 58#include <net/netevent.h>
21713ebc 59#include <net/netlink.h>
51ebd318 60#include <net/nexthop.h>
1da177e4
LT
61
62#include <asm/uaccess.h>
63
64#ifdef CONFIG_SYSCTL
65#include <linux/sysctl.h>
66#endif
67
afc154e9 68enum rt6_nud_state {
7e980569
JB
69 RT6_NUD_FAIL_HARD = -3,
70 RT6_NUD_FAIL_PROBE = -2,
71 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
72 RT6_NUD_SUCCEED = 1
73};
74
1716a961 75static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 76 const struct in6_addr *dest);
1da177e4 77static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 78static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 79static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
80static struct dst_entry *ip6_negative_advice(struct dst_entry *);
81static void ip6_dst_destroy(struct dst_entry *);
82static void ip6_dst_ifdown(struct dst_entry *,
83 struct net_device *dev, int how);
569d3645 84static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
85
86static int ip6_pkt_discard(struct sk_buff *skb);
aad88724 87static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
7150aede 88static int ip6_pkt_prohibit(struct sk_buff *skb);
aad88724 89static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
1da177e4 90static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
91static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
92 struct sk_buff *skb, u32 mtu);
93static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
94 struct sk_buff *skb);
4b32b5ad 95static void rt6_dst_from_metrics_check(struct rt6_info *rt);
52bd4c0c 96static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
1da177e4 97
70ceb4f5 98#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 99static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
100 const struct in6_addr *prefix, int prefixlen,
101 const struct in6_addr *gwaddr, int ifindex,
95c96174 102 unsigned int pref);
efa2cea0 103static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
104 const struct in6_addr *prefix, int prefixlen,
105 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
106#endif
107
e8243534 108static void rt6_bind_peer(struct rt6_info *rt, int create)
109{
110 struct inet_peer_base *base;
111 struct inet_peer *peer;
112
113 base = inetpeer_base_ptr(rt->_rt6i_peer);
114 if (!base)
115 return;
116
117 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
118 if (peer) {
119 if (!rt6_set_peer(rt, peer))
120 inet_putpeer(peer);
121 }
122}
123
124static struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create)
125{
126 if (rt6_has_peer(rt))
127 return rt6_peer_ptr(rt);
128
129 rt6_bind_peer(rt, create);
130 return (rt6_has_peer(rt) ? rt6_peer_ptr(rt) : NULL);
131}
132
133static struct inet_peer *rt6_get_peer_create(struct rt6_info *rt)
134{
135 return __rt6_get_peer(rt, 1);
136}
137
06582540
DM
138static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
139{
4b32b5ad 140 struct rt6_info *rt = (struct rt6_info *)dst;
06582540 141
4b32b5ad
MKL
142 if (rt->rt6i_flags & RTF_CACHE)
143 return NULL;
144 else
3b471175 145 return dst_cow_metrics_generic(dst, old);
06582540
DM
146}
147
f894cbf8
DM
148static inline const void *choose_neigh_daddr(struct rt6_info *rt,
149 struct sk_buff *skb,
150 const void *daddr)
39232973
DM
151{
152 struct in6_addr *p = &rt->rt6i_gateway;
153
a7563f34 154 if (!ipv6_addr_any(p))
39232973 155 return (const void *) p;
f894cbf8
DM
156 else if (skb)
157 return &ipv6_hdr(skb)->daddr;
39232973
DM
158 return daddr;
159}
160
f894cbf8
DM
161static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
162 struct sk_buff *skb,
163 const void *daddr)
d3aaeb38 164{
39232973
DM
165 struct rt6_info *rt = (struct rt6_info *) dst;
166 struct neighbour *n;
167
f894cbf8 168 daddr = choose_neigh_daddr(rt, skb, daddr);
8e022ee6 169 n = __ipv6_neigh_lookup(dst->dev, daddr);
f83c7790
DM
170 if (n)
171 return n;
172 return neigh_create(&nd_tbl, daddr, dst->dev);
173}
174
9a7ec3a9 175static struct dst_ops ip6_dst_ops_template = {
1da177e4 176 .family = AF_INET6,
1da177e4
LT
177 .gc = ip6_dst_gc,
178 .gc_thresh = 1024,
179 .check = ip6_dst_check,
0dbaee3b 180 .default_advmss = ip6_default_advmss,
ebb762f2 181 .mtu = ip6_mtu,
06582540 182 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
183 .destroy = ip6_dst_destroy,
184 .ifdown = ip6_dst_ifdown,
185 .negative_advice = ip6_negative_advice,
186 .link_failure = ip6_link_failure,
187 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 188 .redirect = rt6_do_redirect,
1ac06e03 189 .local_out = __ip6_local_out,
d3aaeb38 190 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
191};
192
ebb762f2 193static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 194{
618f9bc7
SK
195 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
196
197 return mtu ? : dst->dev->mtu;
ec831ea7
RD
198}
199
6700c270
DM
200static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
201 struct sk_buff *skb, u32 mtu)
14e50e57
DM
202{
203}
204
6700c270
DM
205static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
206 struct sk_buff *skb)
b587ee3b
DM
207{
208}
209
0972ddb2
HB
210static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
211 unsigned long old)
212{
213 return NULL;
214}
215
14e50e57
DM
216static struct dst_ops ip6_dst_blackhole_ops = {
217 .family = AF_INET6,
14e50e57
DM
218 .destroy = ip6_dst_destroy,
219 .check = ip6_dst_check,
ebb762f2 220 .mtu = ip6_blackhole_mtu,
214f45c9 221 .default_advmss = ip6_default_advmss,
14e50e57 222 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 223 .redirect = ip6_rt_blackhole_redirect,
0972ddb2 224 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 225 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
226};
227
62fa8a84 228static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 229 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
230};
231
fb0af4c7 232static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
233 .dst = {
234 .__refcnt = ATOMIC_INIT(1),
235 .__use = 1,
2c20cbd7 236 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 237 .error = -ENETUNREACH,
d8d1f30b
CG
238 .input = ip6_pkt_discard,
239 .output = ip6_pkt_discard_out,
1da177e4
LT
240 },
241 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 242 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
243 .rt6i_metric = ~(u32) 0,
244 .rt6i_ref = ATOMIC_INIT(1),
245};
246
101367c2
TG
247#ifdef CONFIG_IPV6_MULTIPLE_TABLES
248
fb0af4c7 249static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
250 .dst = {
251 .__refcnt = ATOMIC_INIT(1),
252 .__use = 1,
2c20cbd7 253 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 254 .error = -EACCES,
d8d1f30b
CG
255 .input = ip6_pkt_prohibit,
256 .output = ip6_pkt_prohibit_out,
101367c2
TG
257 },
258 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 259 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
260 .rt6i_metric = ~(u32) 0,
261 .rt6i_ref = ATOMIC_INIT(1),
262};
263
fb0af4c7 264static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
265 .dst = {
266 .__refcnt = ATOMIC_INIT(1),
267 .__use = 1,
2c20cbd7 268 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 269 .error = -EINVAL,
d8d1f30b 270 .input = dst_discard,
aad88724 271 .output = dst_discard_sk,
101367c2
TG
272 },
273 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 274 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
275 .rt6i_metric = ~(u32) 0,
276 .rt6i_ref = ATOMIC_INIT(1),
277};
278
279#endif
280
1da177e4 281/* allocate dst with ip6_dst_ops */
97bab73f 282static inline struct rt6_info *ip6_dst_alloc(struct net *net,
957c665f 283 struct net_device *dev,
8b96d22d
DM
284 int flags,
285 struct fib6_table *table)
1da177e4 286{
97bab73f 287 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
6f3118b5 288 0, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 289
97bab73f 290 if (rt) {
8104891b
SK
291 struct dst_entry *dst = &rt->dst;
292
293 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
8b96d22d 294 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
51ebd318 295 INIT_LIST_HEAD(&rt->rt6i_siblings);
97bab73f 296 }
cf911662 297 return rt;
1da177e4
LT
298}
299
300static void ip6_dst_destroy(struct dst_entry *dst)
301{
302 struct rt6_info *rt = (struct rt6_info *)dst;
303 struct inet6_dev *idev = rt->rt6i_idev;
ecd98837 304 struct dst_entry *from = dst->from;
1da177e4 305
4b32b5ad 306 dst_destroy_metrics_generic(dst);
8e2ec639 307
38308473 308 if (idev) {
1da177e4
LT
309 rt->rt6i_idev = NULL;
310 in6_dev_put(idev);
1ab1457c 311 }
1716a961 312
ecd98837
YH
313 dst->from = NULL;
314 dst_release(from);
b3419363
DM
315}
316
1da177e4
LT
317static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
318 int how)
319{
320 struct rt6_info *rt = (struct rt6_info *)dst;
321 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 322 struct net_device *loopback_dev =
c346dca1 323 dev_net(dev)->loopback_dev;
1da177e4 324
97cac082
DM
325 if (dev != loopback_dev) {
326 if (idev && idev->dev == dev) {
327 struct inet6_dev *loopback_idev =
328 in6_dev_get(loopback_dev);
329 if (loopback_idev) {
330 rt->rt6i_idev = loopback_idev;
331 in6_dev_put(idev);
332 }
333 }
1da177e4
LT
334 }
335}
336
a50feda5 337static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 338{
1716a961
G
339 if (rt->rt6i_flags & RTF_EXPIRES) {
340 if (time_after(jiffies, rt->dst.expires))
a50feda5 341 return true;
1716a961 342 } else if (rt->dst.from) {
3fd91fb3 343 return rt6_check_expired((struct rt6_info *) rt->dst.from);
1716a961 344 }
a50feda5 345 return false;
1da177e4
LT
346}
347
51ebd318
ND
348/* Multipath route selection:
349 * Hash based function using packet header and flowlabel.
350 * Adapted from fib_info_hashfn()
351 */
352static int rt6_info_hash_nhsfn(unsigned int candidate_count,
353 const struct flowi6 *fl6)
354{
355 unsigned int val = fl6->flowi6_proto;
356
c08977bb
YH
357 val ^= ipv6_addr_hash(&fl6->daddr);
358 val ^= ipv6_addr_hash(&fl6->saddr);
51ebd318
ND
359
360 /* Work only if this not encapsulated */
361 switch (fl6->flowi6_proto) {
362 case IPPROTO_UDP:
363 case IPPROTO_TCP:
364 case IPPROTO_SCTP:
b3ce5ae1
ND
365 val ^= (__force u16)fl6->fl6_sport;
366 val ^= (__force u16)fl6->fl6_dport;
51ebd318
ND
367 break;
368
369 case IPPROTO_ICMPV6:
b3ce5ae1
ND
370 val ^= (__force u16)fl6->fl6_icmp_type;
371 val ^= (__force u16)fl6->fl6_icmp_code;
51ebd318
ND
372 break;
373 }
374 /* RFC6438 recommands to use flowlabel */
b3ce5ae1 375 val ^= (__force u32)fl6->flowlabel;
51ebd318
ND
376
377 /* Perhaps, we need to tune, this function? */
378 val = val ^ (val >> 7) ^ (val >> 12);
379 return val % candidate_count;
380}
381
382static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
52bd4c0c
ND
383 struct flowi6 *fl6, int oif,
384 int strict)
51ebd318
ND
385{
386 struct rt6_info *sibling, *next_sibling;
387 int route_choosen;
388
389 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
390 /* Don't change the route, if route_choosen == 0
391 * (siblings does not include ourself)
392 */
393 if (route_choosen)
394 list_for_each_entry_safe(sibling, next_sibling,
395 &match->rt6i_siblings, rt6i_siblings) {
396 route_choosen--;
397 if (route_choosen == 0) {
52bd4c0c
ND
398 if (rt6_score_route(sibling, oif, strict) < 0)
399 break;
51ebd318
ND
400 match = sibling;
401 break;
402 }
403 }
404 return match;
405}
406
1da177e4 407/*
c71099ac 408 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
409 */
410
8ed67789
DL
411static inline struct rt6_info *rt6_device_match(struct net *net,
412 struct rt6_info *rt,
b71d1d42 413 const struct in6_addr *saddr,
1da177e4 414 int oif,
d420895e 415 int flags)
1da177e4
LT
416{
417 struct rt6_info *local = NULL;
418 struct rt6_info *sprt;
419
dd3abc4e
YH
420 if (!oif && ipv6_addr_any(saddr))
421 goto out;
422
d8d1f30b 423 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 424 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
425
426 if (oif) {
1da177e4
LT
427 if (dev->ifindex == oif)
428 return sprt;
429 if (dev->flags & IFF_LOOPBACK) {
38308473 430 if (!sprt->rt6i_idev ||
1da177e4 431 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 432 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 433 continue;
1ab1457c 434 if (local && (!oif ||
1da177e4
LT
435 local->rt6i_idev->dev->ifindex == oif))
436 continue;
437 }
438 local = sprt;
439 }
dd3abc4e
YH
440 } else {
441 if (ipv6_chk_addr(net, saddr, dev,
442 flags & RT6_LOOKUP_F_IFACE))
443 return sprt;
1da177e4 444 }
dd3abc4e 445 }
1da177e4 446
dd3abc4e 447 if (oif) {
1da177e4
LT
448 if (local)
449 return local;
450
d420895e 451 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 452 return net->ipv6.ip6_null_entry;
1da177e4 453 }
dd3abc4e 454out:
1da177e4
LT
455 return rt;
456}
457
27097255 458#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
459struct __rt6_probe_work {
460 struct work_struct work;
461 struct in6_addr target;
462 struct net_device *dev;
463};
464
465static void rt6_probe_deferred(struct work_struct *w)
466{
467 struct in6_addr mcaddr;
468 struct __rt6_probe_work *work =
469 container_of(w, struct __rt6_probe_work, work);
470
471 addrconf_addr_solict_mult(&work->target, &mcaddr);
472 ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
473 dev_put(work->dev);
662f5533 474 kfree(work);
c2f17e82
HFS
475}
476
27097255
YH
477static void rt6_probe(struct rt6_info *rt)
478{
f2c31e32 479 struct neighbour *neigh;
27097255
YH
480 /*
481 * Okay, this does not seem to be appropriate
482 * for now, however, we need to check if it
483 * is really so; aka Router Reachability Probing.
484 *
485 * Router Reachability Probe MUST be rate-limited
486 * to no more than one per minute.
487 */
2152caea 488 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
7ff74a59 489 return;
2152caea
YH
490 rcu_read_lock_bh();
491 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
492 if (neigh) {
493 write_lock(&neigh->lock);
494 if (neigh->nud_state & NUD_VALID)
495 goto out;
7ff74a59 496 }
2152caea
YH
497
498 if (!neigh ||
52e16356 499 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
c2f17e82 500 struct __rt6_probe_work *work;
27097255 501
c2f17e82
HFS
502 work = kmalloc(sizeof(*work), GFP_ATOMIC);
503
504 if (neigh && work)
7e980569 505 __neigh_set_probe_once(neigh);
c2f17e82
HFS
506
507 if (neigh)
2152caea
YH
508 write_unlock(&neigh->lock);
509
c2f17e82
HFS
510 if (work) {
511 INIT_WORK(&work->work, rt6_probe_deferred);
512 work->target = rt->rt6i_gateway;
513 dev_hold(rt->dst.dev);
514 work->dev = rt->dst.dev;
515 schedule_work(&work->work);
516 }
f2c31e32 517 } else {
2152caea
YH
518out:
519 write_unlock(&neigh->lock);
f2c31e32 520 }
2152caea 521 rcu_read_unlock_bh();
27097255
YH
522}
523#else
524static inline void rt6_probe(struct rt6_info *rt)
525{
27097255
YH
526}
527#endif
528
1da177e4 529/*
554cfb7e 530 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 531 */
b6f99a21 532static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 533{
d1918542 534 struct net_device *dev = rt->dst.dev;
161980f4 535 if (!oif || dev->ifindex == oif)
554cfb7e 536 return 2;
161980f4
DM
537 if ((dev->flags & IFF_LOOPBACK) &&
538 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
539 return 1;
540 return 0;
554cfb7e 541}
1da177e4 542
afc154e9 543static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
1da177e4 544{
f2c31e32 545 struct neighbour *neigh;
afc154e9 546 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
f2c31e32 547
4d0c5911
YH
548 if (rt->rt6i_flags & RTF_NONEXTHOP ||
549 !(rt->rt6i_flags & RTF_GATEWAY))
afc154e9 550 return RT6_NUD_SUCCEED;
145a3621
YH
551
552 rcu_read_lock_bh();
553 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
554 if (neigh) {
555 read_lock(&neigh->lock);
554cfb7e 556 if (neigh->nud_state & NUD_VALID)
afc154e9 557 ret = RT6_NUD_SUCCEED;
398bcbeb 558#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 559 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 560 ret = RT6_NUD_SUCCEED;
7e980569
JB
561 else
562 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 563#endif
145a3621 564 read_unlock(&neigh->lock);
afc154e9
HFS
565 } else {
566 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 567 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 568 }
145a3621
YH
569 rcu_read_unlock_bh();
570
a5a81f0b 571 return ret;
1da177e4
LT
572}
573
554cfb7e
YH
574static int rt6_score_route(struct rt6_info *rt, int oif,
575 int strict)
1da177e4 576{
a5a81f0b 577 int m;
1ab1457c 578
4d0c5911 579 m = rt6_check_dev(rt, oif);
77d16f45 580 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 581 return RT6_NUD_FAIL_HARD;
ebacaaa0
YH
582#ifdef CONFIG_IPV6_ROUTER_PREF
583 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
584#endif
afc154e9
HFS
585 if (strict & RT6_LOOKUP_F_REACHABLE) {
586 int n = rt6_check_neigh(rt);
587 if (n < 0)
588 return n;
589 }
554cfb7e
YH
590 return m;
591}
592
f11e6659 593static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
afc154e9
HFS
594 int *mpri, struct rt6_info *match,
595 bool *do_rr)
554cfb7e 596{
f11e6659 597 int m;
afc154e9 598 bool match_do_rr = false;
f11e6659
DM
599
600 if (rt6_check_expired(rt))
601 goto out;
602
603 m = rt6_score_route(rt, oif, strict);
7e980569 604 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
605 match_do_rr = true;
606 m = 0; /* lowest valid score */
7e980569 607 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 608 goto out;
afc154e9
HFS
609 }
610
611 if (strict & RT6_LOOKUP_F_REACHABLE)
612 rt6_probe(rt);
f11e6659 613
7e980569 614 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 615 if (m > *mpri) {
afc154e9 616 *do_rr = match_do_rr;
f11e6659
DM
617 *mpri = m;
618 match = rt;
f11e6659 619 }
f11e6659
DM
620out:
621 return match;
622}
623
624static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
625 struct rt6_info *rr_head,
afc154e9
HFS
626 u32 metric, int oif, int strict,
627 bool *do_rr)
f11e6659 628{
9fbdcfaf 629 struct rt6_info *rt, *match, *cont;
554cfb7e 630 int mpri = -1;
1da177e4 631
f11e6659 632 match = NULL;
9fbdcfaf
SK
633 cont = NULL;
634 for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
635 if (rt->rt6i_metric != metric) {
636 cont = rt;
637 break;
638 }
639
640 match = find_match(rt, oif, strict, &mpri, match, do_rr);
641 }
642
643 for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
644 if (rt->rt6i_metric != metric) {
645 cont = rt;
646 break;
647 }
648
afc154e9 649 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
650 }
651
652 if (match || !cont)
653 return match;
654
655 for (rt = cont; rt; rt = rt->dst.rt6_next)
afc154e9 656 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 657
f11e6659
DM
658 return match;
659}
1da177e4 660
f11e6659
DM
661static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
662{
663 struct rt6_info *match, *rt0;
8ed67789 664 struct net *net;
afc154e9 665 bool do_rr = false;
1da177e4 666
f11e6659
DM
667 rt0 = fn->rr_ptr;
668 if (!rt0)
669 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 670
afc154e9
HFS
671 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
672 &do_rr);
1da177e4 673
afc154e9 674 if (do_rr) {
d8d1f30b 675 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 676
554cfb7e 677 /* no entries matched; do round-robin */
f11e6659
DM
678 if (!next || next->rt6i_metric != rt0->rt6i_metric)
679 next = fn->leaf;
680
681 if (next != rt0)
682 fn->rr_ptr = next;
1da177e4 683 }
1da177e4 684
d1918542 685 net = dev_net(rt0->dst.dev);
a02cec21 686 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
687}
688
70ceb4f5
YH
689#ifdef CONFIG_IPV6_ROUTE_INFO
690int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 691 const struct in6_addr *gwaddr)
70ceb4f5 692{
c346dca1 693 struct net *net = dev_net(dev);
70ceb4f5
YH
694 struct route_info *rinfo = (struct route_info *) opt;
695 struct in6_addr prefix_buf, *prefix;
696 unsigned int pref;
4bed72e4 697 unsigned long lifetime;
70ceb4f5
YH
698 struct rt6_info *rt;
699
700 if (len < sizeof(struct route_info)) {
701 return -EINVAL;
702 }
703
704 /* Sanity check for prefix_len and length */
705 if (rinfo->length > 3) {
706 return -EINVAL;
707 } else if (rinfo->prefix_len > 128) {
708 return -EINVAL;
709 } else if (rinfo->prefix_len > 64) {
710 if (rinfo->length < 2) {
711 return -EINVAL;
712 }
713 } else if (rinfo->prefix_len > 0) {
714 if (rinfo->length < 1) {
715 return -EINVAL;
716 }
717 }
718
719 pref = rinfo->route_pref;
720 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 721 return -EINVAL;
70ceb4f5 722
4bed72e4 723 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
724
725 if (rinfo->length == 3)
726 prefix = (struct in6_addr *)rinfo->prefix;
727 else {
728 /* this function is safe */
729 ipv6_addr_prefix(&prefix_buf,
730 (struct in6_addr *)rinfo->prefix,
731 rinfo->prefix_len);
732 prefix = &prefix_buf;
733 }
734
f104a567
DJ
735 if (rinfo->prefix_len == 0)
736 rt = rt6_get_dflt_router(gwaddr, dev);
737 else
738 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
739 gwaddr, dev->ifindex);
70ceb4f5
YH
740
741 if (rt && !lifetime) {
e0a1ad73 742 ip6_del_rt(rt);
70ceb4f5
YH
743 rt = NULL;
744 }
745
746 if (!rt && lifetime)
efa2cea0 747 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
748 pref);
749 else if (rt)
750 rt->rt6i_flags = RTF_ROUTEINFO |
751 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
752
753 if (rt) {
1716a961
G
754 if (!addrconf_finite_timeout(lifetime))
755 rt6_clean_expires(rt);
756 else
757 rt6_set_expires(rt, jiffies + HZ * lifetime);
758
94e187c0 759 ip6_rt_put(rt);
70ceb4f5
YH
760 }
761 return 0;
762}
763#endif
764
a3c00e46
MKL
765static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
766 struct in6_addr *saddr)
767{
768 struct fib6_node *pn;
769 while (1) {
770 if (fn->fn_flags & RTN_TL_ROOT)
771 return NULL;
772 pn = fn->parent;
773 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
774 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
775 else
776 fn = pn;
777 if (fn->fn_flags & RTN_RTINFO)
778 return fn;
779 }
780}
c71099ac 781
8ed67789
DL
782static struct rt6_info *ip6_pol_route_lookup(struct net *net,
783 struct fib6_table *table,
4c9483b2 784 struct flowi6 *fl6, int flags)
1da177e4
LT
785{
786 struct fib6_node *fn;
787 struct rt6_info *rt;
788
c71099ac 789 read_lock_bh(&table->tb6_lock);
4c9483b2 790 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
791restart:
792 rt = fn->leaf;
4c9483b2 793 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
51ebd318 794 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
52bd4c0c 795 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
a3c00e46
MKL
796 if (rt == net->ipv6.ip6_null_entry) {
797 fn = fib6_backtrack(fn, &fl6->saddr);
798 if (fn)
799 goto restart;
800 }
d8d1f30b 801 dst_use(&rt->dst, jiffies);
c71099ac 802 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
803 return rt;
804
805}
806
67ba4152 807struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
ea6e574e
FW
808 int flags)
809{
810 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
811}
812EXPORT_SYMBOL_GPL(ip6_route_lookup);
813
9acd9f3a
YH
814struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
815 const struct in6_addr *saddr, int oif, int strict)
c71099ac 816{
4c9483b2
DM
817 struct flowi6 fl6 = {
818 .flowi6_oif = oif,
819 .daddr = *daddr,
c71099ac
TG
820 };
821 struct dst_entry *dst;
77d16f45 822 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 823
adaa70bb 824 if (saddr) {
4c9483b2 825 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
826 flags |= RT6_LOOKUP_F_HAS_SADDR;
827 }
828
4c9483b2 829 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
830 if (dst->error == 0)
831 return (struct rt6_info *) dst;
832
833 dst_release(dst);
834
1da177e4
LT
835 return NULL;
836}
7159039a
YH
837EXPORT_SYMBOL(rt6_lookup);
838
c71099ac 839/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
840 It takes new route entry, the addition fails by any reason the
841 route is freed. In any case, if caller does not hold it, it may
842 be destroyed.
843 */
844
e5fd387a 845static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
e715b6d3 846 struct mx6_config *mxc)
1da177e4
LT
847{
848 int err;
c71099ac 849 struct fib6_table *table;
1da177e4 850
c71099ac
TG
851 table = rt->rt6i_table;
852 write_lock_bh(&table->tb6_lock);
e715b6d3 853 err = fib6_add(&table->tb6_root, rt, info, mxc);
c71099ac 854 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
855
856 return err;
857}
858
40e22e8f
TG
859int ip6_ins_rt(struct rt6_info *rt)
860{
e715b6d3
FW
861 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
862 struct mx6_config mxc = { .mx = NULL, };
863
864 return __ip6_ins_rt(rt, &info, &mxc);
40e22e8f
TG
865}
866
1716a961 867static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
21efcfa0 868 const struct in6_addr *daddr,
b71d1d42 869 const struct in6_addr *saddr)
1da177e4 870{
1da177e4
LT
871 struct rt6_info *rt;
872
873 /*
874 * Clone the route.
875 */
876
21efcfa0 877 rt = ip6_rt_copy(ort, daddr);
1da177e4
LT
878
879 if (rt) {
249a3630
DJ
880 if (ort->rt6i_dst.plen != 128 &&
881 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
882 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 883
1da177e4 884 rt->rt6i_flags |= RTF_CACHE;
1da177e4
LT
885
886#ifdef CONFIG_IPV6_SUBTREES
887 if (rt->rt6i_src.plen && saddr) {
4e3fd7a0 888 rt->rt6i_src.addr = *saddr;
1da177e4
LT
889 rt->rt6i_src.plen = 128;
890 }
891#endif
95a9a5ba 892 }
1da177e4 893
95a9a5ba
YH
894 return rt;
895}
1da177e4 896
21efcfa0
ED
897static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
898 const struct in6_addr *daddr)
299d9939 899{
21efcfa0
ED
900 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
901
887c95cc 902 if (rt)
299d9939 903 rt->rt6i_flags |= RTF_CACHE;
299d9939
YH
904 return rt;
905}
906
8ed67789 907static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 908 struct flowi6 *fl6, int flags)
1da177e4 909{
367efcb9 910 struct fib6_node *fn, *saved_fn;
519fbd87 911 struct rt6_info *rt, *nrt;
c71099ac 912 int strict = 0;
1da177e4 913 int attempts = 3;
519fbd87 914 int err;
1da177e4 915
77d16f45 916 strict |= flags & RT6_LOOKUP_F_IFACE;
367efcb9
MKL
917 if (net->ipv6.devconf_all->forwarding == 0)
918 strict |= RT6_LOOKUP_F_REACHABLE;
1da177e4 919
a3c00e46 920redo_fib6_lookup_lock:
c71099ac 921 read_lock_bh(&table->tb6_lock);
1da177e4 922
4c9483b2 923 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 924 saved_fn = fn;
1da177e4 925
a3c00e46 926redo_rt6_select:
367efcb9 927 rt = rt6_select(fn, oif, strict);
52bd4c0c 928 if (rt->rt6i_nsiblings)
367efcb9 929 rt = rt6_multipath_select(rt, fl6, oif, strict);
a3c00e46
MKL
930 if (rt == net->ipv6.ip6_null_entry) {
931 fn = fib6_backtrack(fn, &fl6->saddr);
932 if (fn)
933 goto redo_rt6_select;
367efcb9
MKL
934 else if (strict & RT6_LOOKUP_F_REACHABLE) {
935 /* also consider unreachable route */
936 strict &= ~RT6_LOOKUP_F_REACHABLE;
937 fn = saved_fn;
938 goto redo_rt6_select;
939 } else {
940 dst_hold(&rt->dst);
941 read_unlock_bh(&table->tb6_lock);
942 goto out2;
943 }
a3c00e46
MKL
944 }
945
d8d1f30b 946 dst_hold(&rt->dst);
c71099ac 947 read_unlock_bh(&table->tb6_lock);
fb9de91e 948
94c77bb4
MKL
949 if (rt->rt6i_flags & RTF_CACHE)
950 goto out2;
951
c440f160 952 if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
4c9483b2 953 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
653437d0 954 else if (!(rt->dst.flags & DST_HOST) || !(rt->dst.flags & RTF_LOCAL))
4c9483b2 955 nrt = rt6_alloc_clone(rt, &fl6->daddr);
7343ff31
DM
956 else
957 goto out2;
e40cf353 958
94e187c0 959 ip6_rt_put(rt);
8ed67789 960 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 961
d8d1f30b 962 dst_hold(&rt->dst);
519fbd87 963 if (nrt) {
40e22e8f 964 err = ip6_ins_rt(nrt);
519fbd87 965 if (!err)
1da177e4 966 goto out2;
1da177e4 967 }
1da177e4 968
519fbd87
YH
969 if (--attempts <= 0)
970 goto out2;
971
972 /*
c71099ac 973 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
974 * released someone could insert this route. Relookup.
975 */
94e187c0 976 ip6_rt_put(rt);
a3c00e46 977 goto redo_fib6_lookup_lock;
519fbd87 978
1da177e4 979out2:
4b32b5ad 980 rt6_dst_from_metrics_check(rt);
d8d1f30b
CG
981 rt->dst.lastuse = jiffies;
982 rt->dst.__use++;
c71099ac
TG
983
984 return rt;
1da177e4
LT
985}
986
8ed67789 987static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 988 struct flowi6 *fl6, int flags)
4acad72d 989{
4c9483b2 990 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
991}
992
72331bc0
SL
993static struct dst_entry *ip6_route_input_lookup(struct net *net,
994 struct net_device *dev,
995 struct flowi6 *fl6, int flags)
996{
997 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
998 flags |= RT6_LOOKUP_F_IFACE;
999
1000 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1001}
1002
c71099ac
TG
1003void ip6_route_input(struct sk_buff *skb)
1004{
b71d1d42 1005 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 1006 struct net *net = dev_net(skb->dev);
adaa70bb 1007 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
1008 struct flowi6 fl6 = {
1009 .flowi6_iif = skb->dev->ifindex,
1010 .daddr = iph->daddr,
1011 .saddr = iph->saddr,
6502ca52 1012 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
1013 .flowi6_mark = skb->mark,
1014 .flowi6_proto = iph->nexthdr,
c71099ac 1015 };
adaa70bb 1016
72331bc0 1017 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
1018}
1019
8ed67789 1020static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 1021 struct flowi6 *fl6, int flags)
1da177e4 1022{
4c9483b2 1023 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
1024}
1025
67ba4152 1026struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 1027 struct flowi6 *fl6)
c71099ac
TG
1028{
1029 int flags = 0;
1030
1fb9489b 1031 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 1032
4c9483b2 1033 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 1034 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 1035
4c9483b2 1036 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 1037 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
1038 else if (sk)
1039 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 1040
4c9483b2 1041 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4 1042}
7159039a 1043EXPORT_SYMBOL(ip6_route_output);
1da177e4 1044
2774c131 1045struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 1046{
5c1e6aa3 1047 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
1048 struct dst_entry *new = NULL;
1049
f5b0a874 1050 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
14e50e57 1051 if (rt) {
d8d1f30b 1052 new = &rt->dst;
14e50e57 1053
8104891b
SK
1054 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1055 rt6_init_peer(rt, net->ipv6.peers);
1056
14e50e57 1057 new->__use = 1;
352e512c 1058 new->input = dst_discard;
aad88724 1059 new->output = dst_discard_sk;
14e50e57 1060
21efcfa0
ED
1061 if (dst_metrics_read_only(&ort->dst))
1062 new->_metrics = ort->dst._metrics;
1063 else
1064 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
1065 rt->rt6i_idev = ort->rt6i_idev;
1066 if (rt->rt6i_idev)
1067 in6_dev_hold(rt->rt6i_idev);
14e50e57 1068
4e3fd7a0 1069 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961 1070 rt->rt6i_flags = ort->rt6i_flags;
14e50e57
DM
1071 rt->rt6i_metric = 0;
1072
1073 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1074#ifdef CONFIG_IPV6_SUBTREES
1075 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1076#endif
1077
1078 dst_free(new);
1079 }
1080
69ead7af
DM
1081 dst_release(dst_orig);
1082 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 1083}
14e50e57 1084
1da177e4
LT
1085/*
1086 * Destination cache support functions
1087 */
1088
4b32b5ad
MKL
1089static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1090{
1091 if (rt->dst.from &&
1092 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1093 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1094}
1095
1da177e4
LT
1096static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1097{
1098 struct rt6_info *rt;
1099
1100 rt = (struct rt6_info *) dst;
1101
6f3118b5
ND
1102 /* All IPV6 dsts are created with ->obsolete set to the value
1103 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1104 * into this function always.
1105 */
e3bc10bd
HFS
1106 if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1107 return NULL;
a4477c4d 1108
e3bc10bd
HFS
1109 if (rt6_check_expired(rt))
1110 return NULL;
1111
4b32b5ad
MKL
1112 rt6_dst_from_metrics_check(rt);
1113
e3bc10bd 1114 return dst;
1da177e4
LT
1115}
1116
1117static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1118{
1119 struct rt6_info *rt = (struct rt6_info *) dst;
1120
1121 if (rt) {
54c1a859
YH
1122 if (rt->rt6i_flags & RTF_CACHE) {
1123 if (rt6_check_expired(rt)) {
1124 ip6_del_rt(rt);
1125 dst = NULL;
1126 }
1127 } else {
1da177e4 1128 dst_release(dst);
54c1a859
YH
1129 dst = NULL;
1130 }
1da177e4 1131 }
54c1a859 1132 return dst;
1da177e4
LT
1133}
1134
1135static void ip6_link_failure(struct sk_buff *skb)
1136{
1137 struct rt6_info *rt;
1138
3ffe533c 1139 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1140
adf30907 1141 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1142 if (rt) {
1eb4f758
HFS
1143 if (rt->rt6i_flags & RTF_CACHE) {
1144 dst_hold(&rt->dst);
1145 if (ip6_del_rt(rt))
1146 dst_free(&rt->dst);
1147 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1da177e4 1148 rt->rt6i_node->fn_sernum = -1;
1eb4f758 1149 }
1da177e4
LT
1150 }
1151}
1152
6700c270
DM
1153static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1154 struct sk_buff *skb, u32 mtu)
1da177e4 1155{
67ba4152 1156 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 1157
81aded24 1158 dst_confirm(dst);
653437d0 1159 if (mtu < dst_mtu(dst) && (rt6->rt6i_flags & RTF_CACHE)) {
81aded24
DM
1160 struct net *net = dev_net(dst->dev);
1161
1da177e4 1162 rt6->rt6i_flags |= RTF_MODIFIED;
9d289715 1163 if (mtu < IPV6_MIN_MTU)
1da177e4 1164 mtu = IPV6_MIN_MTU;
9d289715 1165
4b32b5ad 1166 rt6->rt6i_pmtu = mtu;
81aded24 1167 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1168 }
1169}
1170
42ae66c8
DM
1171void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1172 int oif, u32 mark)
81aded24
DM
1173{
1174 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1175 struct dst_entry *dst;
1176 struct flowi6 fl6;
1177
1178 memset(&fl6, 0, sizeof(fl6));
1179 fl6.flowi6_oif = oif;
1b3c61dc 1180 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
1181 fl6.daddr = iph->daddr;
1182 fl6.saddr = iph->saddr;
6502ca52 1183 fl6.flowlabel = ip6_flowinfo(iph);
81aded24
DM
1184
1185 dst = ip6_route_output(net, NULL, &fl6);
1186 if (!dst->error)
6700c270 1187 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
81aded24
DM
1188 dst_release(dst);
1189}
1190EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1191
1192void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1193{
1194 ip6_update_pmtu(skb, sock_net(sk), mtu,
1195 sk->sk_bound_dev_if, sk->sk_mark);
1196}
1197EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1198
b55b76b2
DJ
1199/* Handle redirects */
1200struct ip6rd_flowi {
1201 struct flowi6 fl6;
1202 struct in6_addr gateway;
1203};
1204
1205static struct rt6_info *__ip6_route_redirect(struct net *net,
1206 struct fib6_table *table,
1207 struct flowi6 *fl6,
1208 int flags)
1209{
1210 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1211 struct rt6_info *rt;
1212 struct fib6_node *fn;
1213
1214 /* Get the "current" route for this destination and
1215 * check if the redirect has come from approriate router.
1216 *
1217 * RFC 4861 specifies that redirects should only be
1218 * accepted if they come from the nexthop to the target.
1219 * Due to the way the routes are chosen, this notion
1220 * is a bit fuzzy and one might need to check all possible
1221 * routes.
1222 */
1223
1224 read_lock_bh(&table->tb6_lock);
1225 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1226restart:
1227 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1228 if (rt6_check_expired(rt))
1229 continue;
1230 if (rt->dst.error)
1231 break;
1232 if (!(rt->rt6i_flags & RTF_GATEWAY))
1233 continue;
1234 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1235 continue;
1236 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1237 continue;
1238 break;
1239 }
1240
1241 if (!rt)
1242 rt = net->ipv6.ip6_null_entry;
1243 else if (rt->dst.error) {
1244 rt = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
1245 goto out;
1246 }
1247
1248 if (rt == net->ipv6.ip6_null_entry) {
a3c00e46
MKL
1249 fn = fib6_backtrack(fn, &fl6->saddr);
1250 if (fn)
1251 goto restart;
b55b76b2 1252 }
a3c00e46 1253
b0a1ba59 1254out:
b55b76b2
DJ
1255 dst_hold(&rt->dst);
1256
1257 read_unlock_bh(&table->tb6_lock);
1258
1259 return rt;
1260};
1261
1262static struct dst_entry *ip6_route_redirect(struct net *net,
1263 const struct flowi6 *fl6,
1264 const struct in6_addr *gateway)
1265{
1266 int flags = RT6_LOOKUP_F_HAS_SADDR;
1267 struct ip6rd_flowi rdfl;
1268
1269 rdfl.fl6 = *fl6;
1270 rdfl.gateway = *gateway;
1271
1272 return fib6_rule_lookup(net, &rdfl.fl6,
1273 flags, __ip6_route_redirect);
1274}
1275
3a5ad2ee
DM
1276void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1277{
1278 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1279 struct dst_entry *dst;
1280 struct flowi6 fl6;
1281
1282 memset(&fl6, 0, sizeof(fl6));
e374c618 1283 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
1284 fl6.flowi6_oif = oif;
1285 fl6.flowi6_mark = mark;
3a5ad2ee
DM
1286 fl6.daddr = iph->daddr;
1287 fl6.saddr = iph->saddr;
6502ca52 1288 fl6.flowlabel = ip6_flowinfo(iph);
3a5ad2ee 1289
b55b76b2
DJ
1290 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1291 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
1292 dst_release(dst);
1293}
1294EXPORT_SYMBOL_GPL(ip6_redirect);
1295
c92a59ec
DJ
1296void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1297 u32 mark)
1298{
1299 const struct ipv6hdr *iph = ipv6_hdr(skb);
1300 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1301 struct dst_entry *dst;
1302 struct flowi6 fl6;
1303
1304 memset(&fl6, 0, sizeof(fl6));
e374c618 1305 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
1306 fl6.flowi6_oif = oif;
1307 fl6.flowi6_mark = mark;
c92a59ec
DJ
1308 fl6.daddr = msg->dest;
1309 fl6.saddr = iph->daddr;
1310
b55b76b2
DJ
1311 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1312 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
1313 dst_release(dst);
1314}
1315
3a5ad2ee
DM
1316void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1317{
1318 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1319}
1320EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1321
0dbaee3b 1322static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1323{
0dbaee3b
DM
1324 struct net_device *dev = dst->dev;
1325 unsigned int mtu = dst_mtu(dst);
1326 struct net *net = dev_net(dev);
1327
1da177e4
LT
1328 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1329
5578689a
DL
1330 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1331 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1332
1333 /*
1ab1457c
YH
1334 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1335 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1336 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1337 * rely only on pmtu discovery"
1338 */
1339 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1340 mtu = IPV6_MAXPLEN;
1341 return mtu;
1342}
1343
ebb762f2 1344static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1345{
4b32b5ad
MKL
1346 const struct rt6_info *rt = (const struct rt6_info *)dst;
1347 unsigned int mtu = rt->rt6i_pmtu;
d33e4553 1348 struct inet6_dev *idev;
618f9bc7 1349
4b32b5ad
MKL
1350 if (mtu)
1351 goto out;
1352
1353 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 1354 if (mtu)
30f78d8e 1355 goto out;
618f9bc7
SK
1356
1357 mtu = IPV6_MIN_MTU;
d33e4553
DM
1358
1359 rcu_read_lock();
1360 idev = __in6_dev_get(dst->dev);
1361 if (idev)
1362 mtu = idev->cnf.mtu6;
1363 rcu_read_unlock();
1364
30f78d8e
ED
1365out:
1366 return min_t(unsigned int, mtu, IP6_MAX_MTU);
d33e4553
DM
1367}
1368
3b00944c
YH
1369static struct dst_entry *icmp6_dst_gc_list;
1370static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1371
3b00944c 1372struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 1373 struct flowi6 *fl6)
1da177e4 1374{
87a11578 1375 struct dst_entry *dst;
1da177e4
LT
1376 struct rt6_info *rt;
1377 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1378 struct net *net = dev_net(dev);
1da177e4 1379
38308473 1380 if (unlikely(!idev))
122bdf67 1381 return ERR_PTR(-ENODEV);
1da177e4 1382
8b96d22d 1383 rt = ip6_dst_alloc(net, dev, 0, NULL);
38308473 1384 if (unlikely(!rt)) {
1da177e4 1385 in6_dev_put(idev);
87a11578 1386 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1387 goto out;
1388 }
1389
8e2ec639
YZ
1390 rt->dst.flags |= DST_HOST;
1391 rt->dst.output = ip6_output;
d8d1f30b 1392 atomic_set(&rt->dst.__refcnt, 1);
550bab42 1393 rt->rt6i_gateway = fl6->daddr;
87a11578 1394 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1395 rt->rt6i_dst.plen = 128;
1396 rt->rt6i_idev = idev;
14edd87d 1397 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 1398
3b00944c 1399 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1400 rt->dst.next = icmp6_dst_gc_list;
1401 icmp6_dst_gc_list = &rt->dst;
3b00944c 1402 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1403
5578689a 1404 fib6_force_start_gc(net);
1da177e4 1405
87a11578
DM
1406 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1407
1da177e4 1408out:
87a11578 1409 return dst;
1da177e4
LT
1410}
1411
3d0f24a7 1412int icmp6_dst_gc(void)
1da177e4 1413{
e9476e95 1414 struct dst_entry *dst, **pprev;
3d0f24a7 1415 int more = 0;
1da177e4 1416
3b00944c
YH
1417 spin_lock_bh(&icmp6_dst_lock);
1418 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1419
1da177e4
LT
1420 while ((dst = *pprev) != NULL) {
1421 if (!atomic_read(&dst->__refcnt)) {
1422 *pprev = dst->next;
1423 dst_free(dst);
1da177e4
LT
1424 } else {
1425 pprev = &dst->next;
3d0f24a7 1426 ++more;
1da177e4
LT
1427 }
1428 }
1429
3b00944c 1430 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1431
3d0f24a7 1432 return more;
1da177e4
LT
1433}
1434
1e493d19
DM
1435static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1436 void *arg)
1437{
1438 struct dst_entry *dst, **pprev;
1439
1440 spin_lock_bh(&icmp6_dst_lock);
1441 pprev = &icmp6_dst_gc_list;
1442 while ((dst = *pprev) != NULL) {
1443 struct rt6_info *rt = (struct rt6_info *) dst;
1444 if (func(rt, arg)) {
1445 *pprev = dst->next;
1446 dst_free(dst);
1447 } else {
1448 pprev = &dst->next;
1449 }
1450 }
1451 spin_unlock_bh(&icmp6_dst_lock);
1452}
1453
569d3645 1454static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1455{
86393e52 1456 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1457 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1458 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1459 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1460 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1461 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1462 int entries;
7019b78e 1463
fc66f95c 1464 entries = dst_entries_get_fast(ops);
49a18d86 1465 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 1466 entries <= rt_max_size)
1da177e4
LT
1467 goto out;
1468
6891a346 1469 net->ipv6.ip6_rt_gc_expire++;
14956643 1470 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
1471 entries = dst_entries_get_slow(ops);
1472 if (entries < ops->gc_thresh)
7019b78e 1473 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1474out:
7019b78e 1475 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1476 return entries > rt_max_size;
1da177e4
LT
1477}
1478
e715b6d3
FW
1479static int ip6_convert_metrics(struct mx6_config *mxc,
1480 const struct fib6_config *cfg)
1481{
1482 struct nlattr *nla;
1483 int remaining;
1484 u32 *mp;
1485
63159f29 1486 if (!cfg->fc_mx)
e715b6d3
FW
1487 return 0;
1488
1489 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1490 if (unlikely(!mp))
1491 return -ENOMEM;
1492
1493 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1494 int type = nla_type(nla);
1495
1496 if (type) {
ea697639
DB
1497 u32 val;
1498
e715b6d3
FW
1499 if (unlikely(type > RTAX_MAX))
1500 goto err;
ea697639
DB
1501 if (type == RTAX_CC_ALGO) {
1502 char tmp[TCP_CA_NAME_MAX];
1503
1504 nla_strlcpy(tmp, nla, sizeof(tmp));
1505 val = tcp_ca_get_key_by_name(tmp);
1506 if (val == TCP_CA_UNSPEC)
1507 goto err;
1508 } else {
1509 val = nla_get_u32(nla);
1510 }
e715b6d3 1511
ea697639 1512 mp[type - 1] = val;
e715b6d3
FW
1513 __set_bit(type - 1, mxc->mx_valid);
1514 }
1515 }
1516
1517 mxc->mx = mp;
1518
1519 return 0;
1520 err:
1521 kfree(mp);
1522 return -EINVAL;
1523}
1da177e4 1524
86872cb5 1525int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1526{
1527 int err;
5578689a 1528 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1529 struct rt6_info *rt = NULL;
1530 struct net_device *dev = NULL;
1531 struct inet6_dev *idev = NULL;
c71099ac 1532 struct fib6_table *table;
e715b6d3 1533 struct mx6_config mxc = { .mx = NULL, };
1da177e4
LT
1534 int addr_type;
1535
86872cb5 1536 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1537 return -EINVAL;
1538#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1539 if (cfg->fc_src_len)
1da177e4
LT
1540 return -EINVAL;
1541#endif
86872cb5 1542 if (cfg->fc_ifindex) {
1da177e4 1543 err = -ENODEV;
5578689a 1544 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1545 if (!dev)
1546 goto out;
1547 idev = in6_dev_get(dev);
1548 if (!idev)
1549 goto out;
1550 }
1551
86872cb5
TG
1552 if (cfg->fc_metric == 0)
1553 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1554
d71314b4 1555 err = -ENOBUFS;
38308473
DM
1556 if (cfg->fc_nlinfo.nlh &&
1557 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1558 table = fib6_get_table(net, cfg->fc_table);
38308473 1559 if (!table) {
f3213831 1560 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
1561 table = fib6_new_table(net, cfg->fc_table);
1562 }
1563 } else {
1564 table = fib6_new_table(net, cfg->fc_table);
1565 }
38308473
DM
1566
1567 if (!table)
c71099ac 1568 goto out;
c71099ac 1569
c88507fb 1570 rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);
1da177e4 1571
38308473 1572 if (!rt) {
1da177e4
LT
1573 err = -ENOMEM;
1574 goto out;
1575 }
1576
1716a961
G
1577 if (cfg->fc_flags & RTF_EXPIRES)
1578 rt6_set_expires(rt, jiffies +
1579 clock_t_to_jiffies(cfg->fc_expires));
1580 else
1581 rt6_clean_expires(rt);
1da177e4 1582
86872cb5
TG
1583 if (cfg->fc_protocol == RTPROT_UNSPEC)
1584 cfg->fc_protocol = RTPROT_BOOT;
1585 rt->rt6i_protocol = cfg->fc_protocol;
1586
1587 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1588
1589 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1590 rt->dst.input = ip6_mc_input;
ab79ad14
1591 else if (cfg->fc_flags & RTF_LOCAL)
1592 rt->dst.input = ip6_input;
1da177e4 1593 else
d8d1f30b 1594 rt->dst.input = ip6_forward;
1da177e4 1595
d8d1f30b 1596 rt->dst.output = ip6_output;
1da177e4 1597
86872cb5
TG
1598 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1599 rt->rt6i_dst.plen = cfg->fc_dst_len;
e5fd387a
MK
1600 if (rt->rt6i_dst.plen == 128) {
1601 rt->dst.flags |= DST_HOST;
1602 dst_metrics_set_force_overwrite(&rt->dst);
8e2ec639 1603 }
e5fd387a 1604
1da177e4 1605#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1606 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1607 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1608#endif
1609
86872cb5 1610 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1611
1612 /* We cannot add true routes via loopback here,
1613 they would result in kernel looping; promote them to reject routes
1614 */
86872cb5 1615 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1616 (dev && (dev->flags & IFF_LOOPBACK) &&
1617 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1618 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1619 /* hold loopback dev/idev if we haven't done so. */
5578689a 1620 if (dev != net->loopback_dev) {
1da177e4
LT
1621 if (dev) {
1622 dev_put(dev);
1623 in6_dev_put(idev);
1624 }
5578689a 1625 dev = net->loopback_dev;
1da177e4
LT
1626 dev_hold(dev);
1627 idev = in6_dev_get(dev);
1628 if (!idev) {
1629 err = -ENODEV;
1630 goto out;
1631 }
1632 }
1da177e4 1633 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
ef2c7d7b
ND
1634 switch (cfg->fc_type) {
1635 case RTN_BLACKHOLE:
1636 rt->dst.error = -EINVAL;
aad88724 1637 rt->dst.output = dst_discard_sk;
7150aede 1638 rt->dst.input = dst_discard;
ef2c7d7b
ND
1639 break;
1640 case RTN_PROHIBIT:
1641 rt->dst.error = -EACCES;
7150aede
K
1642 rt->dst.output = ip6_pkt_prohibit_out;
1643 rt->dst.input = ip6_pkt_prohibit;
ef2c7d7b 1644 break;
b4949ab2 1645 case RTN_THROW:
ef2c7d7b 1646 default:
7150aede
K
1647 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1648 : -ENETUNREACH;
1649 rt->dst.output = ip6_pkt_discard_out;
1650 rt->dst.input = ip6_pkt_discard;
ef2c7d7b
ND
1651 break;
1652 }
1da177e4
LT
1653 goto install_route;
1654 }
1655
86872cb5 1656 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1657 const struct in6_addr *gw_addr;
1da177e4
LT
1658 int gwa_type;
1659
86872cb5 1660 gw_addr = &cfg->fc_gateway;
4e3fd7a0 1661 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1662 gwa_type = ipv6_addr_type(gw_addr);
1663
1664 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1665 struct rt6_info *grt;
1666
1667 /* IPv6 strictly inhibits using not link-local
1668 addresses as nexthop address.
1669 Otherwise, router will not able to send redirects.
1670 It is very good, but in some (rare!) circumstances
1671 (SIT, PtP, NBMA NOARP links) it is handy to allow
1672 some exceptions. --ANK
1673 */
1674 err = -EINVAL;
38308473 1675 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1676 goto out;
1677
5578689a 1678 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1679
1680 err = -EHOSTUNREACH;
38308473 1681 if (!grt)
1da177e4
LT
1682 goto out;
1683 if (dev) {
d1918542 1684 if (dev != grt->dst.dev) {
94e187c0 1685 ip6_rt_put(grt);
1da177e4
LT
1686 goto out;
1687 }
1688 } else {
d1918542 1689 dev = grt->dst.dev;
1da177e4
LT
1690 idev = grt->rt6i_idev;
1691 dev_hold(dev);
1692 in6_dev_hold(grt->rt6i_idev);
1693 }
38308473 1694 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1695 err = 0;
94e187c0 1696 ip6_rt_put(grt);
1da177e4
LT
1697
1698 if (err)
1699 goto out;
1700 }
1701 err = -EINVAL;
38308473 1702 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1703 goto out;
1704 }
1705
1706 err = -ENODEV;
38308473 1707 if (!dev)
1da177e4
LT
1708 goto out;
1709
c3968a85
DW
1710 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1711 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1712 err = -EINVAL;
1713 goto out;
1714 }
4e3fd7a0 1715 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1716 rt->rt6i_prefsrc.plen = 128;
1717 } else
1718 rt->rt6i_prefsrc.plen = 0;
1719
86872cb5 1720 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1721
1722install_route:
d8d1f30b 1723 rt->dst.dev = dev;
1da177e4 1724 rt->rt6i_idev = idev;
c71099ac 1725 rt->rt6i_table = table;
63152fc0 1726
c346dca1 1727 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1728
e715b6d3
FW
1729 err = ip6_convert_metrics(&mxc, cfg);
1730 if (err)
1731 goto out;
1da177e4 1732
e715b6d3
FW
1733 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
1734
1735 kfree(mxc.mx);
1736 return err;
1da177e4
LT
1737out:
1738 if (dev)
1739 dev_put(dev);
1740 if (idev)
1741 in6_dev_put(idev);
1742 if (rt)
d8d1f30b 1743 dst_free(&rt->dst);
1da177e4
LT
1744 return err;
1745}
1746
86872cb5 1747static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1748{
1749 int err;
c71099ac 1750 struct fib6_table *table;
d1918542 1751 struct net *net = dev_net(rt->dst.dev);
1da177e4 1752
6825a26c
G
1753 if (rt == net->ipv6.ip6_null_entry) {
1754 err = -ENOENT;
1755 goto out;
1756 }
6c813a72 1757
c71099ac
TG
1758 table = rt->rt6i_table;
1759 write_lock_bh(&table->tb6_lock);
86872cb5 1760 err = fib6_del(rt, info);
c71099ac 1761 write_unlock_bh(&table->tb6_lock);
1da177e4 1762
6825a26c 1763out:
94e187c0 1764 ip6_rt_put(rt);
1da177e4
LT
1765 return err;
1766}
1767
e0a1ad73
TG
1768int ip6_del_rt(struct rt6_info *rt)
1769{
4d1169c1 1770 struct nl_info info = {
d1918542 1771 .nl_net = dev_net(rt->dst.dev),
4d1169c1 1772 };
528c4ceb 1773 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1774}
1775
86872cb5 1776static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1777{
c71099ac 1778 struct fib6_table *table;
1da177e4
LT
1779 struct fib6_node *fn;
1780 struct rt6_info *rt;
1781 int err = -ESRCH;
1782
5578689a 1783 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 1784 if (!table)
c71099ac
TG
1785 return err;
1786
1787 read_lock_bh(&table->tb6_lock);
1da177e4 1788
c71099ac 1789 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1790 &cfg->fc_dst, cfg->fc_dst_len,
1791 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1792
1da177e4 1793 if (fn) {
d8d1f30b 1794 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1f56a01f
MKL
1795 if ((rt->rt6i_flags & RTF_CACHE) &&
1796 !(cfg->fc_flags & RTF_CACHE))
1797 continue;
86872cb5 1798 if (cfg->fc_ifindex &&
d1918542
DM
1799 (!rt->dst.dev ||
1800 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 1801 continue;
86872cb5
TG
1802 if (cfg->fc_flags & RTF_GATEWAY &&
1803 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1804 continue;
86872cb5 1805 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1806 continue;
d8d1f30b 1807 dst_hold(&rt->dst);
c71099ac 1808 read_unlock_bh(&table->tb6_lock);
1da177e4 1809
86872cb5 1810 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1811 }
1812 }
c71099ac 1813 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1814
1815 return err;
1816}
1817
6700c270 1818static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 1819{
e8599ff4 1820 struct net *net = dev_net(skb->dev);
a6279458 1821 struct netevent_redirect netevent;
e8599ff4 1822 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
1823 struct ndisc_options ndopts;
1824 struct inet6_dev *in6_dev;
1825 struct neighbour *neigh;
71bcdba0 1826 struct rd_msg *msg;
6e157b6a
DM
1827 int optlen, on_link;
1828 u8 *lladdr;
e8599ff4 1829
29a3cad5 1830 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 1831 optlen -= sizeof(*msg);
e8599ff4
DM
1832
1833 if (optlen < 0) {
6e157b6a 1834 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
1835 return;
1836 }
1837
71bcdba0 1838 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 1839
71bcdba0 1840 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 1841 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
1842 return;
1843 }
1844
6e157b6a 1845 on_link = 0;
71bcdba0 1846 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 1847 on_link = 1;
71bcdba0 1848 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 1849 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 1850 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
1851 return;
1852 }
1853
1854 in6_dev = __in6_dev_get(skb->dev);
1855 if (!in6_dev)
1856 return;
1857 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1858 return;
1859
1860 /* RFC2461 8.1:
1861 * The IP source address of the Redirect MUST be the same as the current
1862 * first-hop router for the specified ICMP Destination Address.
1863 */
1864
71bcdba0 1865 if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
e8599ff4
DM
1866 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1867 return;
1868 }
6e157b6a
DM
1869
1870 lladdr = NULL;
e8599ff4
DM
1871 if (ndopts.nd_opts_tgt_lladdr) {
1872 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1873 skb->dev);
1874 if (!lladdr) {
1875 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1876 return;
1877 }
1878 }
1879
6e157b6a
DM
1880 rt = (struct rt6_info *) dst;
1881 if (rt == net->ipv6.ip6_null_entry) {
1882 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 1883 return;
6e157b6a 1884 }
e8599ff4 1885
6e157b6a
DM
1886 /* Redirect received -> path was valid.
1887 * Look, redirects are sent only in response to data packets,
1888 * so that this nexthop apparently is reachable. --ANK
1889 */
1890 dst_confirm(&rt->dst);
a6279458 1891
71bcdba0 1892 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
1893 if (!neigh)
1894 return;
a6279458 1895
1da177e4
LT
1896 /*
1897 * We have finally decided to accept it.
1898 */
1899
1ab1457c 1900 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1901 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1902 NEIGH_UPDATE_F_OVERRIDE|
1903 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1904 NEIGH_UPDATE_F_ISROUTER))
1905 );
1906
71bcdba0 1907 nrt = ip6_rt_copy(rt, &msg->dest);
38308473 1908 if (!nrt)
1da177e4
LT
1909 goto out;
1910
1911 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1912 if (on_link)
1913 nrt->rt6i_flags &= ~RTF_GATEWAY;
1914
4e3fd7a0 1915 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 1916
40e22e8f 1917 if (ip6_ins_rt(nrt))
1da177e4
LT
1918 goto out;
1919
d8d1f30b
CG
1920 netevent.old = &rt->dst;
1921 netevent.new = &nrt->dst;
71bcdba0 1922 netevent.daddr = &msg->dest;
60592833 1923 netevent.neigh = neigh;
8d71740c
TT
1924 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1925
38308473 1926 if (rt->rt6i_flags & RTF_CACHE) {
6e157b6a 1927 rt = (struct rt6_info *) dst_clone(&rt->dst);
e0a1ad73 1928 ip6_del_rt(rt);
1da177e4
LT
1929 }
1930
1931out:
e8599ff4 1932 neigh_release(neigh);
6e157b6a
DM
1933}
1934
1da177e4
LT
1935/*
1936 * Misc support functions
1937 */
1938
4b32b5ad
MKL
1939static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
1940{
1941 BUG_ON(from->dst.from);
1942
1943 rt->rt6i_flags &= ~RTF_EXPIRES;
1944 dst_hold(&from->dst);
1945 rt->dst.from = &from->dst;
1946 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
1947}
1948
1716a961 1949static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 1950 const struct in6_addr *dest)
1da177e4 1951{
d1918542 1952 struct net *net = dev_net(ort->dst.dev);
4b32b5ad
MKL
1953 struct rt6_info *rt;
1954
1955 if (ort->rt6i_flags & RTF_CACHE)
1956 ort = (struct rt6_info *)ort->dst.from;
1957
1958 rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1959 ort->rt6i_table);
1da177e4
LT
1960
1961 if (rt) {
d8d1f30b
CG
1962 rt->dst.input = ort->dst.input;
1963 rt->dst.output = ort->dst.output;
8e2ec639 1964 rt->dst.flags |= DST_HOST;
d8d1f30b 1965
4e3fd7a0 1966 rt->rt6i_dst.addr = *dest;
8e2ec639 1967 rt->rt6i_dst.plen = 128;
d8d1f30b 1968 rt->dst.error = ort->dst.error;
1da177e4
LT
1969 rt->rt6i_idev = ort->rt6i_idev;
1970 if (rt->rt6i_idev)
1971 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1972 rt->dst.lastuse = jiffies;
1da177e4 1973
550bab42
JA
1974 if (ort->rt6i_flags & RTF_GATEWAY)
1975 rt->rt6i_gateway = ort->rt6i_gateway;
1976 else
1977 rt->rt6i_gateway = *dest;
1716a961 1978 rt->rt6i_flags = ort->rt6i_flags;
24f5b855 1979 rt6_set_from(rt, ort);
1da177e4
LT
1980 rt->rt6i_metric = 0;
1981
1da177e4
LT
1982#ifdef CONFIG_IPV6_SUBTREES
1983 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1984#endif
0f6c6392 1985 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
c71099ac 1986 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1987 }
1988 return rt;
1989}
1990
70ceb4f5 1991#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 1992static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
1993 const struct in6_addr *prefix, int prefixlen,
1994 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
1995{
1996 struct fib6_node *fn;
1997 struct rt6_info *rt = NULL;
c71099ac
TG
1998 struct fib6_table *table;
1999
efa2cea0 2000 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 2001 if (!table)
c71099ac 2002 return NULL;
70ceb4f5 2003
5744dd9b 2004 read_lock_bh(&table->tb6_lock);
67ba4152 2005 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
70ceb4f5
YH
2006 if (!fn)
2007 goto out;
2008
d8d1f30b 2009 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 2010 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
2011 continue;
2012 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2013 continue;
2014 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2015 continue;
d8d1f30b 2016 dst_hold(&rt->dst);
70ceb4f5
YH
2017 break;
2018 }
2019out:
5744dd9b 2020 read_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
2021 return rt;
2022}
2023
efa2cea0 2024static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
2025 const struct in6_addr *prefix, int prefixlen,
2026 const struct in6_addr *gwaddr, int ifindex,
95c96174 2027 unsigned int pref)
70ceb4f5 2028{
86872cb5
TG
2029 struct fib6_config cfg = {
2030 .fc_table = RT6_TABLE_INFO,
238fc7ea 2031 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
2032 .fc_ifindex = ifindex,
2033 .fc_dst_len = prefixlen,
2034 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2035 RTF_UP | RTF_PREF(pref),
15e47304 2036 .fc_nlinfo.portid = 0,
efa2cea0
DL
2037 .fc_nlinfo.nlh = NULL,
2038 .fc_nlinfo.nl_net = net,
86872cb5
TG
2039 };
2040
4e3fd7a0
AD
2041 cfg.fc_dst = *prefix;
2042 cfg.fc_gateway = *gwaddr;
70ceb4f5 2043
e317da96
YH
2044 /* We should treat it as a default route if prefix length is 0. */
2045 if (!prefixlen)
86872cb5 2046 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 2047
86872cb5 2048 ip6_route_add(&cfg);
70ceb4f5 2049
efa2cea0 2050 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
2051}
2052#endif
2053
b71d1d42 2054struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 2055{
1da177e4 2056 struct rt6_info *rt;
c71099ac 2057 struct fib6_table *table;
1da177e4 2058
c346dca1 2059 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 2060 if (!table)
c71099ac 2061 return NULL;
1da177e4 2062
5744dd9b 2063 read_lock_bh(&table->tb6_lock);
67ba4152 2064 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
d1918542 2065 if (dev == rt->dst.dev &&
045927ff 2066 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
2067 ipv6_addr_equal(&rt->rt6i_gateway, addr))
2068 break;
2069 }
2070 if (rt)
d8d1f30b 2071 dst_hold(&rt->dst);
5744dd9b 2072 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2073 return rt;
2074}
2075
b71d1d42 2076struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
2077 struct net_device *dev,
2078 unsigned int pref)
1da177e4 2079{
86872cb5
TG
2080 struct fib6_config cfg = {
2081 .fc_table = RT6_TABLE_DFLT,
238fc7ea 2082 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
2083 .fc_ifindex = dev->ifindex,
2084 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2085 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
15e47304 2086 .fc_nlinfo.portid = 0,
5578689a 2087 .fc_nlinfo.nlh = NULL,
c346dca1 2088 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 2089 };
1da177e4 2090
4e3fd7a0 2091 cfg.fc_gateway = *gwaddr;
1da177e4 2092
86872cb5 2093 ip6_route_add(&cfg);
1da177e4 2094
1da177e4
LT
2095 return rt6_get_dflt_router(gwaddr, dev);
2096}
2097
7b4da532 2098void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
2099{
2100 struct rt6_info *rt;
c71099ac
TG
2101 struct fib6_table *table;
2102
2103 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 2104 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 2105 if (!table)
c71099ac 2106 return;
1da177e4
LT
2107
2108restart:
c71099ac 2109 read_lock_bh(&table->tb6_lock);
d8d1f30b 2110 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
3e8b0ac3
LC
2111 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2112 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
d8d1f30b 2113 dst_hold(&rt->dst);
c71099ac 2114 read_unlock_bh(&table->tb6_lock);
e0a1ad73 2115 ip6_del_rt(rt);
1da177e4
LT
2116 goto restart;
2117 }
2118 }
c71099ac 2119 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2120}
2121
5578689a
DL
2122static void rtmsg_to_fib6_config(struct net *net,
2123 struct in6_rtmsg *rtmsg,
86872cb5
TG
2124 struct fib6_config *cfg)
2125{
2126 memset(cfg, 0, sizeof(*cfg));
2127
2128 cfg->fc_table = RT6_TABLE_MAIN;
2129 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2130 cfg->fc_metric = rtmsg->rtmsg_metric;
2131 cfg->fc_expires = rtmsg->rtmsg_info;
2132 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2133 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2134 cfg->fc_flags = rtmsg->rtmsg_flags;
2135
5578689a 2136 cfg->fc_nlinfo.nl_net = net;
f1243c2d 2137
4e3fd7a0
AD
2138 cfg->fc_dst = rtmsg->rtmsg_dst;
2139 cfg->fc_src = rtmsg->rtmsg_src;
2140 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
2141}
2142
5578689a 2143int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 2144{
86872cb5 2145 struct fib6_config cfg;
1da177e4
LT
2146 struct in6_rtmsg rtmsg;
2147 int err;
2148
67ba4152 2149 switch (cmd) {
1da177e4
LT
2150 case SIOCADDRT: /* Add a route */
2151 case SIOCDELRT: /* Delete a route */
af31f412 2152 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
2153 return -EPERM;
2154 err = copy_from_user(&rtmsg, arg,
2155 sizeof(struct in6_rtmsg));
2156 if (err)
2157 return -EFAULT;
86872cb5 2158
5578689a 2159 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 2160
1da177e4
LT
2161 rtnl_lock();
2162 switch (cmd) {
2163 case SIOCADDRT:
86872cb5 2164 err = ip6_route_add(&cfg);
1da177e4
LT
2165 break;
2166 case SIOCDELRT:
86872cb5 2167 err = ip6_route_del(&cfg);
1da177e4
LT
2168 break;
2169 default:
2170 err = -EINVAL;
2171 }
2172 rtnl_unlock();
2173
2174 return err;
3ff50b79 2175 }
1da177e4
LT
2176
2177 return -EINVAL;
2178}
2179
2180/*
2181 * Drop the packet on the floor
2182 */
2183
d5fdd6ba 2184static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2185{
612f09e8 2186 int type;
adf30907 2187 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2188 switch (ipstats_mib_noroutes) {
2189 case IPSTATS_MIB_INNOROUTES:
0660e03f 2190 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2191 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2192 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2193 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2194 break;
2195 }
2196 /* FALLTHROUGH */
2197 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2198 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2199 ipstats_mib_noroutes);
612f09e8
YH
2200 break;
2201 }
3ffe533c 2202 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2203 kfree_skb(skb);
2204 return 0;
2205}
2206
9ce8ade0
TG
2207static int ip6_pkt_discard(struct sk_buff *skb)
2208{
612f09e8 2209 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2210}
2211
aad88724 2212static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
1da177e4 2213{
adf30907 2214 skb->dev = skb_dst(skb)->dev;
612f09e8 2215 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2216}
2217
9ce8ade0
TG
2218static int ip6_pkt_prohibit(struct sk_buff *skb)
2219{
612f09e8 2220 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2221}
2222
aad88724 2223static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
9ce8ade0 2224{
adf30907 2225 skb->dev = skb_dst(skb)->dev;
612f09e8 2226 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2227}
2228
1da177e4
LT
2229/*
2230 * Allocate a dst for local (unicast / anycast) address.
2231 */
2232
2233struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2234 const struct in6_addr *addr,
8f031519 2235 bool anycast)
1da177e4 2236{
c346dca1 2237 struct net *net = dev_net(idev->dev);
a3300ef4
HFS
2238 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2239 DST_NOCOUNT, NULL);
2240 if (!rt)
1da177e4
LT
2241 return ERR_PTR(-ENOMEM);
2242
1da177e4
LT
2243 in6_dev_hold(idev);
2244
11d53b49 2245 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2246 rt->dst.input = ip6_input;
2247 rt->dst.output = ip6_output;
1da177e4 2248 rt->rt6i_idev = idev;
1da177e4
LT
2249
2250 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2251 if (anycast)
2252 rt->rt6i_flags |= RTF_ANYCAST;
2253 else
1da177e4 2254 rt->rt6i_flags |= RTF_LOCAL;
1da177e4 2255
550bab42 2256 rt->rt6i_gateway = *addr;
4e3fd7a0 2257 rt->rt6i_dst.addr = *addr;
1da177e4 2258 rt->rt6i_dst.plen = 128;
5578689a 2259 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2260
d8d1f30b 2261 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2262
2263 return rt;
2264}
2265
c3968a85
DW
2266int ip6_route_get_saddr(struct net *net,
2267 struct rt6_info *rt,
b71d1d42 2268 const struct in6_addr *daddr,
c3968a85
DW
2269 unsigned int prefs,
2270 struct in6_addr *saddr)
2271{
67ba4152 2272 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry *)rt);
c3968a85
DW
2273 int err = 0;
2274 if (rt->rt6i_prefsrc.plen)
4e3fd7a0 2275 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2276 else
2277 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2278 daddr, prefs, saddr);
2279 return err;
2280}
2281
2282/* remove deleted ip from prefsrc entries */
2283struct arg_dev_net_ip {
2284 struct net_device *dev;
2285 struct net *net;
2286 struct in6_addr *addr;
2287};
2288
2289static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2290{
2291 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2292 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2293 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2294
d1918542 2295 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2296 rt != net->ipv6.ip6_null_entry &&
2297 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2298 /* remove prefsrc entry */
2299 rt->rt6i_prefsrc.plen = 0;
2300 }
2301 return 0;
2302}
2303
2304void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2305{
2306 struct net *net = dev_net(ifp->idev->dev);
2307 struct arg_dev_net_ip adni = {
2308 .dev = ifp->idev->dev,
2309 .net = net,
2310 .addr = &ifp->addr,
2311 };
0c3584d5 2312 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
2313}
2314
be7a010d
DJ
2315#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2316#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
2317
2318/* Remove routers and update dst entries when gateway turn into host. */
2319static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2320{
2321 struct in6_addr *gateway = (struct in6_addr *)arg;
2322
2323 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2324 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2325 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2326 return -1;
2327 }
2328 return 0;
2329}
2330
2331void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2332{
2333 fib6_clean_all(net, fib6_clean_tohost, gateway);
2334}
2335
8ed67789
DL
2336struct arg_dev_net {
2337 struct net_device *dev;
2338 struct net *net;
2339};
2340
1da177e4
LT
2341static int fib6_ifdown(struct rt6_info *rt, void *arg)
2342{
bc3ef660 2343 const struct arg_dev_net *adn = arg;
2344 const struct net_device *dev = adn->dev;
8ed67789 2345
d1918542 2346 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2347 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2348 return -1;
c159d30c 2349
1da177e4
LT
2350 return 0;
2351}
2352
f3db4851 2353void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2354{
8ed67789
DL
2355 struct arg_dev_net adn = {
2356 .dev = dev,
2357 .net = net,
2358 };
2359
0c3584d5 2360 fib6_clean_all(net, fib6_ifdown, &adn);
1e493d19 2361 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2362}
2363
95c96174 2364struct rt6_mtu_change_arg {
1da177e4 2365 struct net_device *dev;
95c96174 2366 unsigned int mtu;
1da177e4
LT
2367};
2368
2369static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2370{
2371 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2372 struct inet6_dev *idev;
2373
2374 /* In IPv6 pmtu discovery is not optional,
2375 so that RTAX_MTU lock cannot disable it.
2376 We still use this lock to block changes
2377 caused by addrconf/ndisc.
2378 */
2379
2380 idev = __in6_dev_get(arg->dev);
38308473 2381 if (!idev)
1da177e4
LT
2382 return 0;
2383
2384 /* For administrative MTU increase, there is no way to discover
2385 IPv6 PMTU increase, so PMTU increase should be updated here.
2386 Since RFC 1981 doesn't include administrative MTU increase
2387 update PMTU increase is a MUST. (i.e. jumbo frame)
2388 */
2389 /*
2390 If new MTU is less than route PMTU, this new MTU will be the
2391 lowest MTU in the path, update the route PMTU to reflect PMTU
2392 decreases; if new MTU is greater than route PMTU, and the
2393 old MTU is the lowest MTU in the path, update the route PMTU
2394 to reflect the increase. In this case if the other nodes' MTU
2395 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2396 PMTU discouvery.
2397 */
d1918542 2398 if (rt->dst.dev == arg->dev &&
4b32b5ad
MKL
2399 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2400 if (rt->rt6i_flags & RTF_CACHE) {
2401 /* For RTF_CACHE with rt6i_pmtu == 0
2402 * (i.e. a redirected route),
2403 * the metrics of its rt->dst.from has already
2404 * been updated.
2405 */
2406 if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2407 rt->rt6i_pmtu = arg->mtu;
2408 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2409 (dst_mtu(&rt->dst) < arg->mtu &&
2410 dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2411 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2412 }
566cfd8f 2413 }
1da177e4
LT
2414 return 0;
2415}
2416
95c96174 2417void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 2418{
c71099ac
TG
2419 struct rt6_mtu_change_arg arg = {
2420 .dev = dev,
2421 .mtu = mtu,
2422 };
1da177e4 2423
0c3584d5 2424 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
2425}
2426
ef7c79ed 2427static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2428 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2429 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2430 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2431 [RTA_PRIORITY] = { .type = NLA_U32 },
2432 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 2433 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 2434 [RTA_PREF] = { .type = NLA_U8 },
86872cb5
TG
2435};
2436
2437static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2438 struct fib6_config *cfg)
1da177e4 2439{
86872cb5
TG
2440 struct rtmsg *rtm;
2441 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 2442 unsigned int pref;
86872cb5 2443 int err;
1da177e4 2444
86872cb5
TG
2445 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2446 if (err < 0)
2447 goto errout;
1da177e4 2448
86872cb5
TG
2449 err = -EINVAL;
2450 rtm = nlmsg_data(nlh);
2451 memset(cfg, 0, sizeof(*cfg));
2452
2453 cfg->fc_table = rtm->rtm_table;
2454 cfg->fc_dst_len = rtm->rtm_dst_len;
2455 cfg->fc_src_len = rtm->rtm_src_len;
2456 cfg->fc_flags = RTF_UP;
2457 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 2458 cfg->fc_type = rtm->rtm_type;
86872cb5 2459
ef2c7d7b
ND
2460 if (rtm->rtm_type == RTN_UNREACHABLE ||
2461 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
2462 rtm->rtm_type == RTN_PROHIBIT ||
2463 rtm->rtm_type == RTN_THROW)
86872cb5
TG
2464 cfg->fc_flags |= RTF_REJECT;
2465
ab79ad14
2466 if (rtm->rtm_type == RTN_LOCAL)
2467 cfg->fc_flags |= RTF_LOCAL;
2468
1f56a01f
MKL
2469 if (rtm->rtm_flags & RTM_F_CLONED)
2470 cfg->fc_flags |= RTF_CACHE;
2471
15e47304 2472 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 2473 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2474 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2475
2476 if (tb[RTA_GATEWAY]) {
67b61f6c 2477 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 2478 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2479 }
86872cb5
TG
2480
2481 if (tb[RTA_DST]) {
2482 int plen = (rtm->rtm_dst_len + 7) >> 3;
2483
2484 if (nla_len(tb[RTA_DST]) < plen)
2485 goto errout;
2486
2487 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2488 }
86872cb5
TG
2489
2490 if (tb[RTA_SRC]) {
2491 int plen = (rtm->rtm_src_len + 7) >> 3;
2492
2493 if (nla_len(tb[RTA_SRC]) < plen)
2494 goto errout;
2495
2496 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2497 }
86872cb5 2498
c3968a85 2499 if (tb[RTA_PREFSRC])
67b61f6c 2500 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 2501
86872cb5
TG
2502 if (tb[RTA_OIF])
2503 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2504
2505 if (tb[RTA_PRIORITY])
2506 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2507
2508 if (tb[RTA_METRICS]) {
2509 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2510 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2511 }
86872cb5
TG
2512
2513 if (tb[RTA_TABLE])
2514 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2515
51ebd318
ND
2516 if (tb[RTA_MULTIPATH]) {
2517 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2518 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2519 }
2520
c78ba6d6
LR
2521 if (tb[RTA_PREF]) {
2522 pref = nla_get_u8(tb[RTA_PREF]);
2523 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2524 pref != ICMPV6_ROUTER_PREF_HIGH)
2525 pref = ICMPV6_ROUTER_PREF_MEDIUM;
2526 cfg->fc_flags |= RTF_PREF(pref);
2527 }
2528
86872cb5
TG
2529 err = 0;
2530errout:
2531 return err;
1da177e4
LT
2532}
2533
51ebd318
ND
2534static int ip6_route_multipath(struct fib6_config *cfg, int add)
2535{
2536 struct fib6_config r_cfg;
2537 struct rtnexthop *rtnh;
2538 int remaining;
2539 int attrlen;
2540 int err = 0, last_err = 0;
2541
2542beginning:
2543 rtnh = (struct rtnexthop *)cfg->fc_mp;
2544 remaining = cfg->fc_mp_len;
2545
2546 /* Parse a Multipath Entry */
2547 while (rtnh_ok(rtnh, remaining)) {
2548 memcpy(&r_cfg, cfg, sizeof(*cfg));
2549 if (rtnh->rtnh_ifindex)
2550 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2551
2552 attrlen = rtnh_attrlen(rtnh);
2553 if (attrlen > 0) {
2554 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2555
2556 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2557 if (nla) {
67b61f6c 2558 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
2559 r_cfg.fc_flags |= RTF_GATEWAY;
2560 }
2561 }
2562 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2563 if (err) {
2564 last_err = err;
2565 /* If we are trying to remove a route, do not stop the
2566 * loop when ip6_route_del() fails (because next hop is
2567 * already gone), we should try to remove all next hops.
2568 */
2569 if (add) {
2570 /* If add fails, we should try to delete all
2571 * next hops that have been already added.
2572 */
2573 add = 0;
2574 goto beginning;
2575 }
2576 }
1a72418b
ND
2577 /* Because each route is added like a single route we remove
2578 * this flag after the first nexthop (if there is a collision,
2579 * we have already fail to add the first nexthop:
2580 * fib6_add_rt2node() has reject it).
2581 */
2582 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
51ebd318
ND
2583 rtnh = rtnh_next(rtnh, &remaining);
2584 }
2585
2586 return last_err;
2587}
2588
67ba4152 2589static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
1da177e4 2590{
86872cb5
TG
2591 struct fib6_config cfg;
2592 int err;
1da177e4 2593
86872cb5
TG
2594 err = rtm_to_fib6_config(skb, nlh, &cfg);
2595 if (err < 0)
2596 return err;
2597
51ebd318
ND
2598 if (cfg.fc_mp)
2599 return ip6_route_multipath(&cfg, 0);
2600 else
2601 return ip6_route_del(&cfg);
1da177e4
LT
2602}
2603
67ba4152 2604static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
1da177e4 2605{
86872cb5
TG
2606 struct fib6_config cfg;
2607 int err;
1da177e4 2608
86872cb5
TG
2609 err = rtm_to_fib6_config(skb, nlh, &cfg);
2610 if (err < 0)
2611 return err;
2612
51ebd318
ND
2613 if (cfg.fc_mp)
2614 return ip6_route_multipath(&cfg, 1);
2615 else
2616 return ip6_route_add(&cfg);
1da177e4
LT
2617}
2618
339bf98f
TG
2619static inline size_t rt6_nlmsg_size(void)
2620{
2621 return NLMSG_ALIGN(sizeof(struct rtmsg))
2622 + nla_total_size(16) /* RTA_SRC */
2623 + nla_total_size(16) /* RTA_DST */
2624 + nla_total_size(16) /* RTA_GATEWAY */
2625 + nla_total_size(16) /* RTA_PREFSRC */
2626 + nla_total_size(4) /* RTA_TABLE */
2627 + nla_total_size(4) /* RTA_IIF */
2628 + nla_total_size(4) /* RTA_OIF */
2629 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2630 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 2631 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6
LR
2632 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
2633 + nla_total_size(1); /* RTA_PREF */
339bf98f
TG
2634}
2635
191cd582
BH
2636static int rt6_fill_node(struct net *net,
2637 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80 2638 struct in6_addr *dst, struct in6_addr *src,
15e47304 2639 int iif, int type, u32 portid, u32 seq,
7bc570c8 2640 int prefix, int nowait, unsigned int flags)
1da177e4 2641{
4b32b5ad 2642 u32 metrics[RTAX_MAX];
1da177e4 2643 struct rtmsg *rtm;
2d7202bf 2644 struct nlmsghdr *nlh;
e3703b3d 2645 long expires;
9e762a4a 2646 u32 table;
1da177e4
LT
2647
2648 if (prefix) { /* user wants prefix routes only */
2649 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2650 /* success since this is not a prefix route */
2651 return 1;
2652 }
2653 }
2654
15e47304 2655 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 2656 if (!nlh)
26932566 2657 return -EMSGSIZE;
2d7202bf
TG
2658
2659 rtm = nlmsg_data(nlh);
1da177e4
LT
2660 rtm->rtm_family = AF_INET6;
2661 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2662 rtm->rtm_src_len = rt->rt6i_src.plen;
2663 rtm->rtm_tos = 0;
c71099ac 2664 if (rt->rt6i_table)
9e762a4a 2665 table = rt->rt6i_table->tb6_id;
c71099ac 2666 else
9e762a4a
PM
2667 table = RT6_TABLE_UNSPEC;
2668 rtm->rtm_table = table;
c78679e8
DM
2669 if (nla_put_u32(skb, RTA_TABLE, table))
2670 goto nla_put_failure;
ef2c7d7b
ND
2671 if (rt->rt6i_flags & RTF_REJECT) {
2672 switch (rt->dst.error) {
2673 case -EINVAL:
2674 rtm->rtm_type = RTN_BLACKHOLE;
2675 break;
2676 case -EACCES:
2677 rtm->rtm_type = RTN_PROHIBIT;
2678 break;
b4949ab2
ND
2679 case -EAGAIN:
2680 rtm->rtm_type = RTN_THROW;
2681 break;
ef2c7d7b
ND
2682 default:
2683 rtm->rtm_type = RTN_UNREACHABLE;
2684 break;
2685 }
2686 }
38308473 2687 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 2688 rtm->rtm_type = RTN_LOCAL;
d1918542 2689 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
2690 rtm->rtm_type = RTN_LOCAL;
2691 else
2692 rtm->rtm_type = RTN_UNICAST;
2693 rtm->rtm_flags = 0;
2694 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2695 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 2696 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4 2697 rtm->rtm_protocol = RTPROT_REDIRECT;
f0396f60
DO
2698 else if (rt->rt6i_flags & RTF_ADDRCONF) {
2699 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2700 rtm->rtm_protocol = RTPROT_RA;
2701 else
2702 rtm->rtm_protocol = RTPROT_KERNEL;
2703 }
1da177e4 2704
38308473 2705 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
2706 rtm->rtm_flags |= RTM_F_CLONED;
2707
2708 if (dst) {
930345ea 2709 if (nla_put_in6_addr(skb, RTA_DST, dst))
c78679e8 2710 goto nla_put_failure;
1ab1457c 2711 rtm->rtm_dst_len = 128;
1da177e4 2712 } else if (rtm->rtm_dst_len)
930345ea 2713 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
c78679e8 2714 goto nla_put_failure;
1da177e4
LT
2715#ifdef CONFIG_IPV6_SUBTREES
2716 if (src) {
930345ea 2717 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 2718 goto nla_put_failure;
1ab1457c 2719 rtm->rtm_src_len = 128;
c78679e8 2720 } else if (rtm->rtm_src_len &&
930345ea 2721 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
c78679e8 2722 goto nla_put_failure;
1da177e4 2723#endif
7bc570c8
YH
2724 if (iif) {
2725#ifdef CONFIG_IPV6_MROUTE
2726 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2727 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2728 if (err <= 0) {
2729 if (!nowait) {
2730 if (err == 0)
2731 return 0;
2732 goto nla_put_failure;
2733 } else {
2734 if (err == -EMSGSIZE)
2735 goto nla_put_failure;
2736 }
2737 }
2738 } else
2739#endif
c78679e8
DM
2740 if (nla_put_u32(skb, RTA_IIF, iif))
2741 goto nla_put_failure;
7bc570c8 2742 } else if (dst) {
1da177e4 2743 struct in6_addr saddr_buf;
c78679e8 2744 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
930345ea 2745 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 2746 goto nla_put_failure;
1da177e4 2747 }
2d7202bf 2748
c3968a85
DW
2749 if (rt->rt6i_prefsrc.plen) {
2750 struct in6_addr saddr_buf;
4e3fd7a0 2751 saddr_buf = rt->rt6i_prefsrc.addr;
930345ea 2752 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 2753 goto nla_put_failure;
c3968a85
DW
2754 }
2755
4b32b5ad
MKL
2756 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
2757 if (rt->rt6i_pmtu)
2758 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
2759 if (rtnetlink_put_metrics(skb, metrics) < 0)
2d7202bf
TG
2760 goto nla_put_failure;
2761
dd0cbf29 2762 if (rt->rt6i_flags & RTF_GATEWAY) {
930345ea 2763 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
94f826b8 2764 goto nla_put_failure;
94f826b8 2765 }
2d7202bf 2766
c78679e8
DM
2767 if (rt->dst.dev &&
2768 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2769 goto nla_put_failure;
2770 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2771 goto nla_put_failure;
8253947e
LW
2772
2773 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
69cdf8f9 2774
87a50699 2775 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 2776 goto nla_put_failure;
2d7202bf 2777
c78ba6d6
LR
2778 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
2779 goto nla_put_failure;
2780
053c095a
JB
2781 nlmsg_end(skb, nlh);
2782 return 0;
2d7202bf
TG
2783
2784nla_put_failure:
26932566
PM
2785 nlmsg_cancel(skb, nlh);
2786 return -EMSGSIZE;
1da177e4
LT
2787}
2788
1b43af54 2789int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2790{
2791 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2792 int prefix;
2793
2d7202bf
TG
2794 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2795 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2796 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2797 } else
2798 prefix = 0;
2799
191cd582
BH
2800 return rt6_fill_node(arg->net,
2801 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
15e47304 2802 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2803 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2804}
2805
67ba4152 2806static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
1da177e4 2807{
3b1e0a65 2808 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2809 struct nlattr *tb[RTA_MAX+1];
2810 struct rt6_info *rt;
1da177e4 2811 struct sk_buff *skb;
ab364a6f 2812 struct rtmsg *rtm;
4c9483b2 2813 struct flowi6 fl6;
72331bc0 2814 int err, iif = 0, oif = 0;
1da177e4 2815
ab364a6f
TG
2816 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2817 if (err < 0)
2818 goto errout;
1da177e4 2819
ab364a6f 2820 err = -EINVAL;
4c9483b2 2821 memset(&fl6, 0, sizeof(fl6));
1da177e4 2822
ab364a6f
TG
2823 if (tb[RTA_SRC]) {
2824 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2825 goto errout;
2826
4e3fd7a0 2827 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
2828 }
2829
2830 if (tb[RTA_DST]) {
2831 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2832 goto errout;
2833
4e3fd7a0 2834 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
2835 }
2836
2837 if (tb[RTA_IIF])
2838 iif = nla_get_u32(tb[RTA_IIF]);
2839
2840 if (tb[RTA_OIF])
72331bc0 2841 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 2842
2e47b291
LC
2843 if (tb[RTA_MARK])
2844 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
2845
1da177e4
LT
2846 if (iif) {
2847 struct net_device *dev;
72331bc0
SL
2848 int flags = 0;
2849
5578689a 2850 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2851 if (!dev) {
2852 err = -ENODEV;
ab364a6f 2853 goto errout;
1da177e4 2854 }
72331bc0
SL
2855
2856 fl6.flowi6_iif = iif;
2857
2858 if (!ipv6_addr_any(&fl6.saddr))
2859 flags |= RT6_LOOKUP_F_HAS_SADDR;
2860
2861 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2862 flags);
2863 } else {
2864 fl6.flowi6_oif = oif;
2865
2866 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
1da177e4
LT
2867 }
2868
ab364a6f 2869 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 2870 if (!skb) {
94e187c0 2871 ip6_rt_put(rt);
ab364a6f
TG
2872 err = -ENOBUFS;
2873 goto errout;
2874 }
1da177e4 2875
ab364a6f
TG
2876 /* Reserve room for dummy headers, this skb can pass
2877 through good chunk of routing engine.
2878 */
459a98ed 2879 skb_reset_mac_header(skb);
ab364a6f 2880 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2881
d8d1f30b 2882 skb_dst_set(skb, &rt->dst);
1da177e4 2883
4c9483b2 2884 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
15e47304 2885 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
7bc570c8 2886 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2887 if (err < 0) {
ab364a6f
TG
2888 kfree_skb(skb);
2889 goto errout;
1da177e4
LT
2890 }
2891
15e47304 2892 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 2893errout:
1da177e4 2894 return err;
1da177e4
LT
2895}
2896
86872cb5 2897void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2898{
2899 struct sk_buff *skb;
5578689a 2900 struct net *net = info->nl_net;
528c4ceb
DL
2901 u32 seq;
2902 int err;
2903
2904 err = -ENOBUFS;
38308473 2905 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 2906
339bf98f 2907 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
38308473 2908 if (!skb)
21713ebc
TG
2909 goto errout;
2910
191cd582 2911 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
15e47304 2912 event, info->portid, seq, 0, 0, 0);
26932566
PM
2913 if (err < 0) {
2914 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2915 WARN_ON(err == -EMSGSIZE);
2916 kfree_skb(skb);
2917 goto errout;
2918 }
15e47304 2919 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
2920 info->nlh, gfp_any());
2921 return;
21713ebc
TG
2922errout:
2923 if (err < 0)
5578689a 2924 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2925}
2926
8ed67789 2927static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 2928 unsigned long event, void *ptr)
8ed67789 2929{
351638e7 2930 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 2931 struct net *net = dev_net(dev);
8ed67789
DL
2932
2933 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2934 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2935 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2936#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2937 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2938 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2939 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2940 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2941#endif
2942 }
2943
2944 return NOTIFY_OK;
2945}
2946
1da177e4
LT
2947/*
2948 * /proc
2949 */
2950
2951#ifdef CONFIG_PROC_FS
2952
33120b30
AD
2953static const struct file_operations ipv6_route_proc_fops = {
2954 .owner = THIS_MODULE,
2955 .open = ipv6_route_open,
2956 .read = seq_read,
2957 .llseek = seq_lseek,
8d2ca1d7 2958 .release = seq_release_net,
33120b30
AD
2959};
2960
1da177e4
LT
2961static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2962{
69ddb805 2963 struct net *net = (struct net *)seq->private;
1da177e4 2964 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2965 net->ipv6.rt6_stats->fib_nodes,
2966 net->ipv6.rt6_stats->fib_route_nodes,
2967 net->ipv6.rt6_stats->fib_rt_alloc,
2968 net->ipv6.rt6_stats->fib_rt_entries,
2969 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2970 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2971 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2972
2973 return 0;
2974}
2975
2976static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2977{
de05c557 2978 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2979}
2980
9a32144e 2981static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2982 .owner = THIS_MODULE,
2983 .open = rt6_stats_seq_open,
2984 .read = seq_read,
2985 .llseek = seq_lseek,
b6fcbdb4 2986 .release = single_release_net,
1da177e4
LT
2987};
2988#endif /* CONFIG_PROC_FS */
2989
2990#ifdef CONFIG_SYSCTL
2991
1da177e4 2992static
fe2c6338 2993int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
2994 void __user *buffer, size_t *lenp, loff_t *ppos)
2995{
c486da34
LAG
2996 struct net *net;
2997 int delay;
2998 if (!write)
1da177e4 2999 return -EINVAL;
c486da34
LAG
3000
3001 net = (struct net *)ctl->extra1;
3002 delay = net->ipv6.sysctl.flush_delay;
3003 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 3004 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 3005 return 0;
1da177e4
LT
3006}
3007
fe2c6338 3008struct ctl_table ipv6_route_table_template[] = {
1ab1457c 3009 {
1da177e4 3010 .procname = "flush",
4990509f 3011 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 3012 .maxlen = sizeof(int),
89c8b3a1 3013 .mode = 0200,
6d9f239a 3014 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
3015 },
3016 {
1da177e4 3017 .procname = "gc_thresh",
9a7ec3a9 3018 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
3019 .maxlen = sizeof(int),
3020 .mode = 0644,
6d9f239a 3021 .proc_handler = proc_dointvec,
1da177e4
LT
3022 },
3023 {
1da177e4 3024 .procname = "max_size",
4990509f 3025 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
3026 .maxlen = sizeof(int),
3027 .mode = 0644,
6d9f239a 3028 .proc_handler = proc_dointvec,
1da177e4
LT
3029 },
3030 {
1da177e4 3031 .procname = "gc_min_interval",
4990509f 3032 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
3033 .maxlen = sizeof(int),
3034 .mode = 0644,
6d9f239a 3035 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3036 },
3037 {
1da177e4 3038 .procname = "gc_timeout",
4990509f 3039 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
3040 .maxlen = sizeof(int),
3041 .mode = 0644,
6d9f239a 3042 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3043 },
3044 {
1da177e4 3045 .procname = "gc_interval",
4990509f 3046 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
3047 .maxlen = sizeof(int),
3048 .mode = 0644,
6d9f239a 3049 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3050 },
3051 {
1da177e4 3052 .procname = "gc_elasticity",
4990509f 3053 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
3054 .maxlen = sizeof(int),
3055 .mode = 0644,
f3d3f616 3056 .proc_handler = proc_dointvec,
1da177e4
LT
3057 },
3058 {
1da177e4 3059 .procname = "mtu_expires",
4990509f 3060 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
3061 .maxlen = sizeof(int),
3062 .mode = 0644,
6d9f239a 3063 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3064 },
3065 {
1da177e4 3066 .procname = "min_adv_mss",
4990509f 3067 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
3068 .maxlen = sizeof(int),
3069 .mode = 0644,
f3d3f616 3070 .proc_handler = proc_dointvec,
1da177e4
LT
3071 },
3072 {
1da177e4 3073 .procname = "gc_min_interval_ms",
4990509f 3074 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
3075 .maxlen = sizeof(int),
3076 .mode = 0644,
6d9f239a 3077 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 3078 },
f8572d8f 3079 { }
1da177e4
LT
3080};
3081
2c8c1e72 3082struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
3083{
3084 struct ctl_table *table;
3085
3086 table = kmemdup(ipv6_route_table_template,
3087 sizeof(ipv6_route_table_template),
3088 GFP_KERNEL);
5ee09105
YH
3089
3090 if (table) {
3091 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 3092 table[0].extra1 = net;
86393e52 3093 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
3094 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3095 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3096 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3097 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3098 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3099 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3100 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 3101 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
3102
3103 /* Don't export sysctls to unprivileged users */
3104 if (net->user_ns != &init_user_ns)
3105 table[0].procname = NULL;
5ee09105
YH
3106 }
3107
760f2d01
DL
3108 return table;
3109}
1da177e4
LT
3110#endif
3111
2c8c1e72 3112static int __net_init ip6_route_net_init(struct net *net)
cdb18761 3113{
633d424b 3114 int ret = -ENOMEM;
8ed67789 3115
86393e52
AD
3116 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3117 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 3118
fc66f95c
ED
3119 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3120 goto out_ip6_dst_ops;
3121
8ed67789
DL
3122 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3123 sizeof(*net->ipv6.ip6_null_entry),
3124 GFP_KERNEL);
3125 if (!net->ipv6.ip6_null_entry)
fc66f95c 3126 goto out_ip6_dst_entries;
d8d1f30b 3127 net->ipv6.ip6_null_entry->dst.path =
8ed67789 3128 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 3129 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3130 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3131 ip6_template_metrics, true);
8ed67789
DL
3132
3133#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3134 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3135 sizeof(*net->ipv6.ip6_prohibit_entry),
3136 GFP_KERNEL);
68fffc67
PZ
3137 if (!net->ipv6.ip6_prohibit_entry)
3138 goto out_ip6_null_entry;
d8d1f30b 3139 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 3140 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 3141 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3142 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3143 ip6_template_metrics, true);
8ed67789
DL
3144
3145 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3146 sizeof(*net->ipv6.ip6_blk_hole_entry),
3147 GFP_KERNEL);
68fffc67
PZ
3148 if (!net->ipv6.ip6_blk_hole_entry)
3149 goto out_ip6_prohibit_entry;
d8d1f30b 3150 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 3151 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 3152 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3153 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3154 ip6_template_metrics, true);
8ed67789
DL
3155#endif
3156
b339a47c
PZ
3157 net->ipv6.sysctl.flush_delay = 0;
3158 net->ipv6.sysctl.ip6_rt_max_size = 4096;
3159 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3160 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3161 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3162 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3163 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3164 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3165
6891a346
BT
3166 net->ipv6.ip6_rt_gc_expire = 30*HZ;
3167
8ed67789
DL
3168 ret = 0;
3169out:
3170 return ret;
f2fc6a54 3171
68fffc67
PZ
3172#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3173out_ip6_prohibit_entry:
3174 kfree(net->ipv6.ip6_prohibit_entry);
3175out_ip6_null_entry:
3176 kfree(net->ipv6.ip6_null_entry);
3177#endif
fc66f95c
ED
3178out_ip6_dst_entries:
3179 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 3180out_ip6_dst_ops:
f2fc6a54 3181 goto out;
cdb18761
DL
3182}
3183
2c8c1e72 3184static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 3185{
8ed67789
DL
3186 kfree(net->ipv6.ip6_null_entry);
3187#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3188 kfree(net->ipv6.ip6_prohibit_entry);
3189 kfree(net->ipv6.ip6_blk_hole_entry);
3190#endif
41bb78b4 3191 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
3192}
3193
d189634e
TG
3194static int __net_init ip6_route_net_init_late(struct net *net)
3195{
3196#ifdef CONFIG_PROC_FS
d4beaa66
G
3197 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3198 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
d189634e
TG
3199#endif
3200 return 0;
3201}
3202
3203static void __net_exit ip6_route_net_exit_late(struct net *net)
3204{
3205#ifdef CONFIG_PROC_FS
ece31ffd
G
3206 remove_proc_entry("ipv6_route", net->proc_net);
3207 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
3208#endif
3209}
3210
cdb18761
DL
3211static struct pernet_operations ip6_route_net_ops = {
3212 .init = ip6_route_net_init,
3213 .exit = ip6_route_net_exit,
3214};
3215
c3426b47
DM
3216static int __net_init ipv6_inetpeer_init(struct net *net)
3217{
3218 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3219
3220 if (!bp)
3221 return -ENOMEM;
3222 inet_peer_base_init(bp);
3223 net->ipv6.peers = bp;
3224 return 0;
3225}
3226
3227static void __net_exit ipv6_inetpeer_exit(struct net *net)
3228{
3229 struct inet_peer_base *bp = net->ipv6.peers;
3230
3231 net->ipv6.peers = NULL;
56a6b248 3232 inetpeer_invalidate_tree(bp);
c3426b47
DM
3233 kfree(bp);
3234}
3235
2b823f72 3236static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
3237 .init = ipv6_inetpeer_init,
3238 .exit = ipv6_inetpeer_exit,
3239};
3240
d189634e
TG
3241static struct pernet_operations ip6_route_net_late_ops = {
3242 .init = ip6_route_net_init_late,
3243 .exit = ip6_route_net_exit_late,
3244};
3245
8ed67789
DL
3246static struct notifier_block ip6_route_dev_notifier = {
3247 .notifier_call = ip6_route_dev_notify,
3248 .priority = 0,
3249};
3250
433d49c3 3251int __init ip6_route_init(void)
1da177e4 3252{
433d49c3
DL
3253 int ret;
3254
9a7ec3a9
DL
3255 ret = -ENOMEM;
3256 ip6_dst_ops_template.kmem_cachep =
e5d679f3 3257 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 3258 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 3259 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 3260 goto out;
14e50e57 3261
fc66f95c 3262 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 3263 if (ret)
bdb3289f 3264 goto out_kmem_cache;
bdb3289f 3265
c3426b47
DM
3266 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3267 if (ret)
e8803b6c 3268 goto out_dst_entries;
2a0c451a 3269
7e52b33b
DM
3270 ret = register_pernet_subsys(&ip6_route_net_ops);
3271 if (ret)
3272 goto out_register_inetpeer;
c3426b47 3273
5dc121e9
AE
3274 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3275
8ed67789
DL
3276 /* Registering of the loopback is done before this portion of code,
3277 * the loopback reference in rt6_info will not be taken, do it
3278 * manually for init_net */
d8d1f30b 3279 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3280 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3281 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3282 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 3283 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 3284 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3285 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3286 #endif
e8803b6c 3287 ret = fib6_init();
433d49c3 3288 if (ret)
8ed67789 3289 goto out_register_subsys;
433d49c3 3290
433d49c3
DL
3291 ret = xfrm6_init();
3292 if (ret)
e8803b6c 3293 goto out_fib6_init;
c35b7e72 3294
433d49c3
DL
3295 ret = fib6_rules_init();
3296 if (ret)
3297 goto xfrm6_init;
7e5449c2 3298
d189634e
TG
3299 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3300 if (ret)
3301 goto fib6_rules_init;
3302
433d49c3 3303 ret = -ENOBUFS;
c7ac8679
GR
3304 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3305 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3306 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
d189634e 3307 goto out_register_late_subsys;
c127ea2c 3308
8ed67789 3309 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 3310 if (ret)
d189634e 3311 goto out_register_late_subsys;
8ed67789 3312
433d49c3
DL
3313out:
3314 return ret;
3315
d189634e
TG
3316out_register_late_subsys:
3317 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 3318fib6_rules_init:
433d49c3
DL
3319 fib6_rules_cleanup();
3320xfrm6_init:
433d49c3 3321 xfrm6_fini();
2a0c451a
TG
3322out_fib6_init:
3323 fib6_gc_cleanup();
8ed67789
DL
3324out_register_subsys:
3325 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
3326out_register_inetpeer:
3327 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
3328out_dst_entries:
3329 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3330out_kmem_cache:
f2fc6a54 3331 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3332 goto out;
1da177e4
LT
3333}
3334
3335void ip6_route_cleanup(void)
3336{
8ed67789 3337 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 3338 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 3339 fib6_rules_cleanup();
1da177e4 3340 xfrm6_fini();
1da177e4 3341 fib6_gc_cleanup();
c3426b47 3342 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 3343 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3344 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3345 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3346}
This page took 1.249235 seconds and 5 git commands to generate.