ipv6: Move common init code for rt6_info to a new function rt6_info_init()
[deliverable/linux.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
457c4cbc 47#include <net/net_namespace.h>
1da177e4
LT
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
904af04d 57#include <net/dst_metadata.h>
1da177e4 58#include <net/xfrm.h>
8d71740c 59#include <net/netevent.h>
21713ebc 60#include <net/netlink.h>
51ebd318 61#include <net/nexthop.h>
19e42e45 62#include <net/lwtunnel.h>
904af04d 63#include <net/ip_tunnels.h>
1da177e4
LT
64
65#include <asm/uaccess.h>
66
67#ifdef CONFIG_SYSCTL
68#include <linux/sysctl.h>
69#endif
70
afc154e9 71enum rt6_nud_state {
7e980569
JB
72 RT6_NUD_FAIL_HARD = -3,
73 RT6_NUD_FAIL_PROBE = -2,
74 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
75 RT6_NUD_SUCCEED = 1
76};
77
83a09abd 78static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
1da177e4 79static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 80static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 81static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
82static struct dst_entry *ip6_negative_advice(struct dst_entry *);
83static void ip6_dst_destroy(struct dst_entry *);
84static void ip6_dst_ifdown(struct dst_entry *,
85 struct net_device *dev, int how);
569d3645 86static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
87
88static int ip6_pkt_discard(struct sk_buff *skb);
aad88724 89static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
7150aede 90static int ip6_pkt_prohibit(struct sk_buff *skb);
aad88724 91static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
1da177e4 92static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
93static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
94 struct sk_buff *skb, u32 mtu);
95static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
96 struct sk_buff *skb);
4b32b5ad 97static void rt6_dst_from_metrics_check(struct rt6_info *rt);
52bd4c0c 98static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
1da177e4 99
70ceb4f5 100#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 101static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
102 const struct in6_addr *prefix, int prefixlen,
103 const struct in6_addr *gwaddr, int ifindex,
95c96174 104 unsigned int pref);
efa2cea0 105static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
106 const struct in6_addr *prefix, int prefixlen,
107 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
108#endif
109
8d0b94af
MKL
110struct uncached_list {
111 spinlock_t lock;
112 struct list_head head;
113};
114
115static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
116
117static void rt6_uncached_list_add(struct rt6_info *rt)
118{
119 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
120
121 rt->dst.flags |= DST_NOCACHE;
122 rt->rt6i_uncached_list = ul;
123
124 spin_lock_bh(&ul->lock);
125 list_add_tail(&rt->rt6i_uncached, &ul->head);
126 spin_unlock_bh(&ul->lock);
127}
128
129static void rt6_uncached_list_del(struct rt6_info *rt)
130{
131 if (!list_empty(&rt->rt6i_uncached)) {
132 struct uncached_list *ul = rt->rt6i_uncached_list;
133
134 spin_lock_bh(&ul->lock);
135 list_del(&rt->rt6i_uncached);
136 spin_unlock_bh(&ul->lock);
137 }
138}
139
140static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
141{
142 struct net_device *loopback_dev = net->loopback_dev;
143 int cpu;
144
e332bc67
EB
145 if (dev == loopback_dev)
146 return;
147
8d0b94af
MKL
148 for_each_possible_cpu(cpu) {
149 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
150 struct rt6_info *rt;
151
152 spin_lock_bh(&ul->lock);
153 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
154 struct inet6_dev *rt_idev = rt->rt6i_idev;
155 struct net_device *rt_dev = rt->dst.dev;
156
e332bc67 157 if (rt_idev->dev == dev) {
8d0b94af
MKL
158 rt->rt6i_idev = in6_dev_get(loopback_dev);
159 in6_dev_put(rt_idev);
160 }
161
e332bc67 162 if (rt_dev == dev) {
8d0b94af
MKL
163 rt->dst.dev = loopback_dev;
164 dev_hold(rt->dst.dev);
165 dev_put(rt_dev);
166 }
167 }
168 spin_unlock_bh(&ul->lock);
169 }
170}
171
d52d3997
MKL
172static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
173{
174 return dst_metrics_write_ptr(rt->dst.from);
175}
176
06582540
DM
177static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
178{
4b32b5ad 179 struct rt6_info *rt = (struct rt6_info *)dst;
06582540 180
d52d3997
MKL
181 if (rt->rt6i_flags & RTF_PCPU)
182 return rt6_pcpu_cow_metrics(rt);
183 else if (rt->rt6i_flags & RTF_CACHE)
4b32b5ad
MKL
184 return NULL;
185 else
3b471175 186 return dst_cow_metrics_generic(dst, old);
06582540
DM
187}
188
f894cbf8
DM
189static inline const void *choose_neigh_daddr(struct rt6_info *rt,
190 struct sk_buff *skb,
191 const void *daddr)
39232973
DM
192{
193 struct in6_addr *p = &rt->rt6i_gateway;
194
a7563f34 195 if (!ipv6_addr_any(p))
39232973 196 return (const void *) p;
f894cbf8
DM
197 else if (skb)
198 return &ipv6_hdr(skb)->daddr;
39232973
DM
199 return daddr;
200}
201
f894cbf8
DM
202static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
203 struct sk_buff *skb,
204 const void *daddr)
d3aaeb38 205{
39232973
DM
206 struct rt6_info *rt = (struct rt6_info *) dst;
207 struct neighbour *n;
208
f894cbf8 209 daddr = choose_neigh_daddr(rt, skb, daddr);
8e022ee6 210 n = __ipv6_neigh_lookup(dst->dev, daddr);
f83c7790
DM
211 if (n)
212 return n;
213 return neigh_create(&nd_tbl, daddr, dst->dev);
214}
215
9a7ec3a9 216static struct dst_ops ip6_dst_ops_template = {
1da177e4 217 .family = AF_INET6,
1da177e4
LT
218 .gc = ip6_dst_gc,
219 .gc_thresh = 1024,
220 .check = ip6_dst_check,
0dbaee3b 221 .default_advmss = ip6_default_advmss,
ebb762f2 222 .mtu = ip6_mtu,
06582540 223 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
224 .destroy = ip6_dst_destroy,
225 .ifdown = ip6_dst_ifdown,
226 .negative_advice = ip6_negative_advice,
227 .link_failure = ip6_link_failure,
228 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 229 .redirect = rt6_do_redirect,
1ac06e03 230 .local_out = __ip6_local_out,
d3aaeb38 231 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
232};
233
ebb762f2 234static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 235{
618f9bc7
SK
236 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
237
238 return mtu ? : dst->dev->mtu;
ec831ea7
RD
239}
240
6700c270
DM
241static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
242 struct sk_buff *skb, u32 mtu)
14e50e57
DM
243{
244}
245
6700c270
DM
246static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
247 struct sk_buff *skb)
b587ee3b
DM
248{
249}
250
0972ddb2
HB
251static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
252 unsigned long old)
253{
254 return NULL;
255}
256
14e50e57
DM
257static struct dst_ops ip6_dst_blackhole_ops = {
258 .family = AF_INET6,
14e50e57
DM
259 .destroy = ip6_dst_destroy,
260 .check = ip6_dst_check,
ebb762f2 261 .mtu = ip6_blackhole_mtu,
214f45c9 262 .default_advmss = ip6_default_advmss,
14e50e57 263 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 264 .redirect = ip6_rt_blackhole_redirect,
0972ddb2 265 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 266 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
267};
268
62fa8a84 269static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 270 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
271};
272
fb0af4c7 273static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
274 .dst = {
275 .__refcnt = ATOMIC_INIT(1),
276 .__use = 1,
2c20cbd7 277 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 278 .error = -ENETUNREACH,
d8d1f30b
CG
279 .input = ip6_pkt_discard,
280 .output = ip6_pkt_discard_out,
1da177e4
LT
281 },
282 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 283 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
284 .rt6i_metric = ~(u32) 0,
285 .rt6i_ref = ATOMIC_INIT(1),
286};
287
101367c2
TG
288#ifdef CONFIG_IPV6_MULTIPLE_TABLES
289
fb0af4c7 290static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
291 .dst = {
292 .__refcnt = ATOMIC_INIT(1),
293 .__use = 1,
2c20cbd7 294 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 295 .error = -EACCES,
d8d1f30b
CG
296 .input = ip6_pkt_prohibit,
297 .output = ip6_pkt_prohibit_out,
101367c2
TG
298 },
299 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 300 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
301 .rt6i_metric = ~(u32) 0,
302 .rt6i_ref = ATOMIC_INIT(1),
303};
304
fb0af4c7 305static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
306 .dst = {
307 .__refcnt = ATOMIC_INIT(1),
308 .__use = 1,
2c20cbd7 309 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 310 .error = -EINVAL,
d8d1f30b 311 .input = dst_discard,
aad88724 312 .output = dst_discard_sk,
101367c2
TG
313 },
314 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 315 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
316 .rt6i_metric = ~(u32) 0,
317 .rt6i_ref = ATOMIC_INIT(1),
318};
319
320#endif
321
ebfa45f0
MKL
322static void rt6_info_init(struct rt6_info *rt)
323{
324 struct dst_entry *dst = &rt->dst;
325
326 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
327 INIT_LIST_HEAD(&rt->rt6i_siblings);
328 INIT_LIST_HEAD(&rt->rt6i_uncached);
329}
330
1da177e4 331/* allocate dst with ip6_dst_ops */
d52d3997
MKL
332static struct rt6_info *__ip6_dst_alloc(struct net *net,
333 struct net_device *dev,
ad706862 334 int flags)
1da177e4 335{
97bab73f 336 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
6f3118b5 337 0, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 338
ebfa45f0
MKL
339 if (rt)
340 rt6_info_init(rt);
8104891b 341
cf911662 342 return rt;
1da177e4
LT
343}
344
d52d3997
MKL
345static struct rt6_info *ip6_dst_alloc(struct net *net,
346 struct net_device *dev,
ad706862 347 int flags)
d52d3997 348{
ad706862 349 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
d52d3997
MKL
350
351 if (rt) {
352 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
353 if (rt->rt6i_pcpu) {
354 int cpu;
355
356 for_each_possible_cpu(cpu) {
357 struct rt6_info **p;
358
359 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
360 /* no one shares rt */
361 *p = NULL;
362 }
363 } else {
364 dst_destroy((struct dst_entry *)rt);
365 return NULL;
366 }
367 }
368
369 return rt;
370}
371
1da177e4
LT
372static void ip6_dst_destroy(struct dst_entry *dst)
373{
374 struct rt6_info *rt = (struct rt6_info *)dst;
ecd98837 375 struct dst_entry *from = dst->from;
8d0b94af 376 struct inet6_dev *idev;
1da177e4 377
4b32b5ad 378 dst_destroy_metrics_generic(dst);
87775312 379 free_percpu(rt->rt6i_pcpu);
8d0b94af
MKL
380 rt6_uncached_list_del(rt);
381
382 idev = rt->rt6i_idev;
38308473 383 if (idev) {
1da177e4
LT
384 rt->rt6i_idev = NULL;
385 in6_dev_put(idev);
1ab1457c 386 }
1716a961 387
ecd98837
YH
388 dst->from = NULL;
389 dst_release(from);
b3419363
DM
390}
391
1da177e4
LT
392static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
393 int how)
394{
395 struct rt6_info *rt = (struct rt6_info *)dst;
396 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 397 struct net_device *loopback_dev =
c346dca1 398 dev_net(dev)->loopback_dev;
1da177e4 399
97cac082
DM
400 if (dev != loopback_dev) {
401 if (idev && idev->dev == dev) {
402 struct inet6_dev *loopback_idev =
403 in6_dev_get(loopback_dev);
404 if (loopback_idev) {
405 rt->rt6i_idev = loopback_idev;
406 in6_dev_put(idev);
407 }
408 }
1da177e4
LT
409 }
410}
411
a50feda5 412static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 413{
1716a961
G
414 if (rt->rt6i_flags & RTF_EXPIRES) {
415 if (time_after(jiffies, rt->dst.expires))
a50feda5 416 return true;
1716a961 417 } else if (rt->dst.from) {
3fd91fb3 418 return rt6_check_expired((struct rt6_info *) rt->dst.from);
1716a961 419 }
a50feda5 420 return false;
1da177e4
LT
421}
422
51ebd318
ND
423/* Multipath route selection:
424 * Hash based function using packet header and flowlabel.
425 * Adapted from fib_info_hashfn()
426 */
427static int rt6_info_hash_nhsfn(unsigned int candidate_count,
428 const struct flowi6 *fl6)
429{
430 unsigned int val = fl6->flowi6_proto;
431
c08977bb
YH
432 val ^= ipv6_addr_hash(&fl6->daddr);
433 val ^= ipv6_addr_hash(&fl6->saddr);
51ebd318
ND
434
435 /* Work only if this not encapsulated */
436 switch (fl6->flowi6_proto) {
437 case IPPROTO_UDP:
438 case IPPROTO_TCP:
439 case IPPROTO_SCTP:
b3ce5ae1
ND
440 val ^= (__force u16)fl6->fl6_sport;
441 val ^= (__force u16)fl6->fl6_dport;
51ebd318
ND
442 break;
443
444 case IPPROTO_ICMPV6:
b3ce5ae1
ND
445 val ^= (__force u16)fl6->fl6_icmp_type;
446 val ^= (__force u16)fl6->fl6_icmp_code;
51ebd318
ND
447 break;
448 }
449 /* RFC6438 recommands to use flowlabel */
b3ce5ae1 450 val ^= (__force u32)fl6->flowlabel;
51ebd318
ND
451
452 /* Perhaps, we need to tune, this function? */
453 val = val ^ (val >> 7) ^ (val >> 12);
454 return val % candidate_count;
455}
456
457static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
52bd4c0c
ND
458 struct flowi6 *fl6, int oif,
459 int strict)
51ebd318
ND
460{
461 struct rt6_info *sibling, *next_sibling;
462 int route_choosen;
463
464 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
465 /* Don't change the route, if route_choosen == 0
466 * (siblings does not include ourself)
467 */
468 if (route_choosen)
469 list_for_each_entry_safe(sibling, next_sibling,
470 &match->rt6i_siblings, rt6i_siblings) {
471 route_choosen--;
472 if (route_choosen == 0) {
52bd4c0c
ND
473 if (rt6_score_route(sibling, oif, strict) < 0)
474 break;
51ebd318
ND
475 match = sibling;
476 break;
477 }
478 }
479 return match;
480}
481
1da177e4 482/*
c71099ac 483 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
484 */
485
8ed67789
DL
486static inline struct rt6_info *rt6_device_match(struct net *net,
487 struct rt6_info *rt,
b71d1d42 488 const struct in6_addr *saddr,
1da177e4 489 int oif,
d420895e 490 int flags)
1da177e4
LT
491{
492 struct rt6_info *local = NULL;
493 struct rt6_info *sprt;
494
dd3abc4e
YH
495 if (!oif && ipv6_addr_any(saddr))
496 goto out;
497
d8d1f30b 498 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 499 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
500
501 if (oif) {
1da177e4
LT
502 if (dev->ifindex == oif)
503 return sprt;
504 if (dev->flags & IFF_LOOPBACK) {
38308473 505 if (!sprt->rt6i_idev ||
1da177e4 506 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 507 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 508 continue;
1ab1457c 509 if (local && (!oif ||
1da177e4
LT
510 local->rt6i_idev->dev->ifindex == oif))
511 continue;
512 }
513 local = sprt;
514 }
dd3abc4e
YH
515 } else {
516 if (ipv6_chk_addr(net, saddr, dev,
517 flags & RT6_LOOKUP_F_IFACE))
518 return sprt;
1da177e4 519 }
dd3abc4e 520 }
1da177e4 521
dd3abc4e 522 if (oif) {
1da177e4
LT
523 if (local)
524 return local;
525
d420895e 526 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 527 return net->ipv6.ip6_null_entry;
1da177e4 528 }
dd3abc4e 529out:
1da177e4
LT
530 return rt;
531}
532
27097255 533#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
534struct __rt6_probe_work {
535 struct work_struct work;
536 struct in6_addr target;
537 struct net_device *dev;
538};
539
540static void rt6_probe_deferred(struct work_struct *w)
541{
542 struct in6_addr mcaddr;
543 struct __rt6_probe_work *work =
544 container_of(w, struct __rt6_probe_work, work);
545
546 addrconf_addr_solict_mult(&work->target, &mcaddr);
ab450605 547 ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL, NULL);
c2f17e82 548 dev_put(work->dev);
662f5533 549 kfree(work);
c2f17e82
HFS
550}
551
27097255
YH
552static void rt6_probe(struct rt6_info *rt)
553{
990edb42 554 struct __rt6_probe_work *work;
f2c31e32 555 struct neighbour *neigh;
27097255
YH
556 /*
557 * Okay, this does not seem to be appropriate
558 * for now, however, we need to check if it
559 * is really so; aka Router Reachability Probing.
560 *
561 * Router Reachability Probe MUST be rate-limited
562 * to no more than one per minute.
563 */
2152caea 564 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
7ff74a59 565 return;
2152caea
YH
566 rcu_read_lock_bh();
567 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
568 if (neigh) {
8d6c31bf
MKL
569 if (neigh->nud_state & NUD_VALID)
570 goto out;
571
990edb42 572 work = NULL;
2152caea 573 write_lock(&neigh->lock);
990edb42
MKL
574 if (!(neigh->nud_state & NUD_VALID) &&
575 time_after(jiffies,
576 neigh->updated +
577 rt->rt6i_idev->cnf.rtr_probe_interval)) {
578 work = kmalloc(sizeof(*work), GFP_ATOMIC);
579 if (work)
580 __neigh_set_probe_once(neigh);
c2f17e82 581 }
2152caea 582 write_unlock(&neigh->lock);
990edb42
MKL
583 } else {
584 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 585 }
990edb42
MKL
586
587 if (work) {
588 INIT_WORK(&work->work, rt6_probe_deferred);
589 work->target = rt->rt6i_gateway;
590 dev_hold(rt->dst.dev);
591 work->dev = rt->dst.dev;
592 schedule_work(&work->work);
593 }
594
8d6c31bf 595out:
2152caea 596 rcu_read_unlock_bh();
27097255
YH
597}
598#else
599static inline void rt6_probe(struct rt6_info *rt)
600{
27097255
YH
601}
602#endif
603
1da177e4 604/*
554cfb7e 605 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 606 */
b6f99a21 607static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 608{
d1918542 609 struct net_device *dev = rt->dst.dev;
161980f4 610 if (!oif || dev->ifindex == oif)
554cfb7e 611 return 2;
161980f4
DM
612 if ((dev->flags & IFF_LOOPBACK) &&
613 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
614 return 1;
615 return 0;
554cfb7e 616}
1da177e4 617
afc154e9 618static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
1da177e4 619{
f2c31e32 620 struct neighbour *neigh;
afc154e9 621 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
f2c31e32 622
4d0c5911
YH
623 if (rt->rt6i_flags & RTF_NONEXTHOP ||
624 !(rt->rt6i_flags & RTF_GATEWAY))
afc154e9 625 return RT6_NUD_SUCCEED;
145a3621
YH
626
627 rcu_read_lock_bh();
628 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
629 if (neigh) {
630 read_lock(&neigh->lock);
554cfb7e 631 if (neigh->nud_state & NUD_VALID)
afc154e9 632 ret = RT6_NUD_SUCCEED;
398bcbeb 633#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 634 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 635 ret = RT6_NUD_SUCCEED;
7e980569
JB
636 else
637 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 638#endif
145a3621 639 read_unlock(&neigh->lock);
afc154e9
HFS
640 } else {
641 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 642 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 643 }
145a3621
YH
644 rcu_read_unlock_bh();
645
a5a81f0b 646 return ret;
1da177e4
LT
647}
648
554cfb7e
YH
649static int rt6_score_route(struct rt6_info *rt, int oif,
650 int strict)
1da177e4 651{
a5a81f0b 652 int m;
1ab1457c 653
4d0c5911 654 m = rt6_check_dev(rt, oif);
77d16f45 655 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 656 return RT6_NUD_FAIL_HARD;
ebacaaa0
YH
657#ifdef CONFIG_IPV6_ROUTER_PREF
658 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
659#endif
afc154e9
HFS
660 if (strict & RT6_LOOKUP_F_REACHABLE) {
661 int n = rt6_check_neigh(rt);
662 if (n < 0)
663 return n;
664 }
554cfb7e
YH
665 return m;
666}
667
f11e6659 668static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
afc154e9
HFS
669 int *mpri, struct rt6_info *match,
670 bool *do_rr)
554cfb7e 671{
f11e6659 672 int m;
afc154e9 673 bool match_do_rr = false;
35103d11
AG
674 struct inet6_dev *idev = rt->rt6i_idev;
675 struct net_device *dev = rt->dst.dev;
676
677 if (dev && !netif_carrier_ok(dev) &&
678 idev->cnf.ignore_routes_with_linkdown)
679 goto out;
f11e6659
DM
680
681 if (rt6_check_expired(rt))
682 goto out;
683
684 m = rt6_score_route(rt, oif, strict);
7e980569 685 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
686 match_do_rr = true;
687 m = 0; /* lowest valid score */
7e980569 688 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 689 goto out;
afc154e9
HFS
690 }
691
692 if (strict & RT6_LOOKUP_F_REACHABLE)
693 rt6_probe(rt);
f11e6659 694
7e980569 695 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 696 if (m > *mpri) {
afc154e9 697 *do_rr = match_do_rr;
f11e6659
DM
698 *mpri = m;
699 match = rt;
f11e6659 700 }
f11e6659
DM
701out:
702 return match;
703}
704
705static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
706 struct rt6_info *rr_head,
afc154e9
HFS
707 u32 metric, int oif, int strict,
708 bool *do_rr)
f11e6659 709{
9fbdcfaf 710 struct rt6_info *rt, *match, *cont;
554cfb7e 711 int mpri = -1;
1da177e4 712
f11e6659 713 match = NULL;
9fbdcfaf
SK
714 cont = NULL;
715 for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
716 if (rt->rt6i_metric != metric) {
717 cont = rt;
718 break;
719 }
720
721 match = find_match(rt, oif, strict, &mpri, match, do_rr);
722 }
723
724 for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
725 if (rt->rt6i_metric != metric) {
726 cont = rt;
727 break;
728 }
729
afc154e9 730 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
731 }
732
733 if (match || !cont)
734 return match;
735
736 for (rt = cont; rt; rt = rt->dst.rt6_next)
afc154e9 737 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 738
f11e6659
DM
739 return match;
740}
1da177e4 741
f11e6659
DM
742static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
743{
744 struct rt6_info *match, *rt0;
8ed67789 745 struct net *net;
afc154e9 746 bool do_rr = false;
1da177e4 747
f11e6659
DM
748 rt0 = fn->rr_ptr;
749 if (!rt0)
750 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 751
afc154e9
HFS
752 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
753 &do_rr);
1da177e4 754
afc154e9 755 if (do_rr) {
d8d1f30b 756 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 757
554cfb7e 758 /* no entries matched; do round-robin */
f11e6659
DM
759 if (!next || next->rt6i_metric != rt0->rt6i_metric)
760 next = fn->leaf;
761
762 if (next != rt0)
763 fn->rr_ptr = next;
1da177e4 764 }
1da177e4 765
d1918542 766 net = dev_net(rt0->dst.dev);
a02cec21 767 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
768}
769
8b9df265
MKL
770static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
771{
772 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
773}
774
70ceb4f5
YH
775#ifdef CONFIG_IPV6_ROUTE_INFO
776int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 777 const struct in6_addr *gwaddr)
70ceb4f5 778{
c346dca1 779 struct net *net = dev_net(dev);
70ceb4f5
YH
780 struct route_info *rinfo = (struct route_info *) opt;
781 struct in6_addr prefix_buf, *prefix;
782 unsigned int pref;
4bed72e4 783 unsigned long lifetime;
70ceb4f5
YH
784 struct rt6_info *rt;
785
786 if (len < sizeof(struct route_info)) {
787 return -EINVAL;
788 }
789
790 /* Sanity check for prefix_len and length */
791 if (rinfo->length > 3) {
792 return -EINVAL;
793 } else if (rinfo->prefix_len > 128) {
794 return -EINVAL;
795 } else if (rinfo->prefix_len > 64) {
796 if (rinfo->length < 2) {
797 return -EINVAL;
798 }
799 } else if (rinfo->prefix_len > 0) {
800 if (rinfo->length < 1) {
801 return -EINVAL;
802 }
803 }
804
805 pref = rinfo->route_pref;
806 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 807 return -EINVAL;
70ceb4f5 808
4bed72e4 809 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
810
811 if (rinfo->length == 3)
812 prefix = (struct in6_addr *)rinfo->prefix;
813 else {
814 /* this function is safe */
815 ipv6_addr_prefix(&prefix_buf,
816 (struct in6_addr *)rinfo->prefix,
817 rinfo->prefix_len);
818 prefix = &prefix_buf;
819 }
820
f104a567
DJ
821 if (rinfo->prefix_len == 0)
822 rt = rt6_get_dflt_router(gwaddr, dev);
823 else
824 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
825 gwaddr, dev->ifindex);
70ceb4f5
YH
826
827 if (rt && !lifetime) {
e0a1ad73 828 ip6_del_rt(rt);
70ceb4f5
YH
829 rt = NULL;
830 }
831
832 if (!rt && lifetime)
efa2cea0 833 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
834 pref);
835 else if (rt)
836 rt->rt6i_flags = RTF_ROUTEINFO |
837 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
838
839 if (rt) {
1716a961
G
840 if (!addrconf_finite_timeout(lifetime))
841 rt6_clean_expires(rt);
842 else
843 rt6_set_expires(rt, jiffies + HZ * lifetime);
844
94e187c0 845 ip6_rt_put(rt);
70ceb4f5
YH
846 }
847 return 0;
848}
849#endif
850
a3c00e46
MKL
851static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
852 struct in6_addr *saddr)
853{
854 struct fib6_node *pn;
855 while (1) {
856 if (fn->fn_flags & RTN_TL_ROOT)
857 return NULL;
858 pn = fn->parent;
859 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
860 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
861 else
862 fn = pn;
863 if (fn->fn_flags & RTN_RTINFO)
864 return fn;
865 }
866}
c71099ac 867
8ed67789
DL
868static struct rt6_info *ip6_pol_route_lookup(struct net *net,
869 struct fib6_table *table,
4c9483b2 870 struct flowi6 *fl6, int flags)
1da177e4
LT
871{
872 struct fib6_node *fn;
873 struct rt6_info *rt;
874
c71099ac 875 read_lock_bh(&table->tb6_lock);
4c9483b2 876 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
877restart:
878 rt = fn->leaf;
4c9483b2 879 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
51ebd318 880 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
52bd4c0c 881 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
a3c00e46
MKL
882 if (rt == net->ipv6.ip6_null_entry) {
883 fn = fib6_backtrack(fn, &fl6->saddr);
884 if (fn)
885 goto restart;
886 }
d8d1f30b 887 dst_use(&rt->dst, jiffies);
c71099ac 888 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
889 return rt;
890
891}
892
67ba4152 893struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
ea6e574e
FW
894 int flags)
895{
896 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
897}
898EXPORT_SYMBOL_GPL(ip6_route_lookup);
899
9acd9f3a
YH
900struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
901 const struct in6_addr *saddr, int oif, int strict)
c71099ac 902{
4c9483b2
DM
903 struct flowi6 fl6 = {
904 .flowi6_oif = oif,
905 .daddr = *daddr,
c71099ac
TG
906 };
907 struct dst_entry *dst;
77d16f45 908 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 909
adaa70bb 910 if (saddr) {
4c9483b2 911 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
912 flags |= RT6_LOOKUP_F_HAS_SADDR;
913 }
914
4c9483b2 915 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
916 if (dst->error == 0)
917 return (struct rt6_info *) dst;
918
919 dst_release(dst);
920
1da177e4
LT
921 return NULL;
922}
7159039a
YH
923EXPORT_SYMBOL(rt6_lookup);
924
c71099ac 925/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
926 It takes new route entry, the addition fails by any reason the
927 route is freed. In any case, if caller does not hold it, it may
928 be destroyed.
929 */
930
e5fd387a 931static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
e715b6d3 932 struct mx6_config *mxc)
1da177e4
LT
933{
934 int err;
c71099ac 935 struct fib6_table *table;
1da177e4 936
c71099ac
TG
937 table = rt->rt6i_table;
938 write_lock_bh(&table->tb6_lock);
e715b6d3 939 err = fib6_add(&table->tb6_root, rt, info, mxc);
c71099ac 940 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
941
942 return err;
943}
944
40e22e8f
TG
945int ip6_ins_rt(struct rt6_info *rt)
946{
e715b6d3
FW
947 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
948 struct mx6_config mxc = { .mx = NULL, };
949
950 return __ip6_ins_rt(rt, &info, &mxc);
40e22e8f
TG
951}
952
8b9df265
MKL
953static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
954 const struct in6_addr *daddr,
955 const struct in6_addr *saddr)
1da177e4 956{
1da177e4
LT
957 struct rt6_info *rt;
958
959 /*
960 * Clone the route.
961 */
962
d52d3997 963 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
83a09abd 964 ort = (struct rt6_info *)ort->dst.from;
1da177e4 965
ad706862 966 rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
83a09abd
MKL
967
968 if (!rt)
969 return NULL;
970
971 ip6_rt_copy_init(rt, ort);
972 rt->rt6i_flags |= RTF_CACHE;
973 rt->rt6i_metric = 0;
974 rt->dst.flags |= DST_HOST;
975 rt->rt6i_dst.addr = *daddr;
976 rt->rt6i_dst.plen = 128;
1da177e4 977
83a09abd
MKL
978 if (!rt6_is_gw_or_nonexthop(ort)) {
979 if (ort->rt6i_dst.plen != 128 &&
980 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
981 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 982#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
983 if (rt->rt6i_src.plen && saddr) {
984 rt->rt6i_src.addr = *saddr;
985 rt->rt6i_src.plen = 128;
8b9df265 986 }
83a09abd 987#endif
95a9a5ba 988 }
1da177e4 989
95a9a5ba
YH
990 return rt;
991}
1da177e4 992
d52d3997
MKL
993static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
994{
995 struct rt6_info *pcpu_rt;
996
997 pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
ad706862 998 rt->dst.dev, rt->dst.flags);
d52d3997
MKL
999
1000 if (!pcpu_rt)
1001 return NULL;
1002 ip6_rt_copy_init(pcpu_rt, rt);
1003 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
1004 pcpu_rt->rt6i_flags |= RTF_PCPU;
1005 return pcpu_rt;
1006}
1007
1008/* It should be called with read_lock_bh(&tb6_lock) acquired */
1009static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1010{
a73e4195 1011 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
1012
1013 p = this_cpu_ptr(rt->rt6i_pcpu);
1014 pcpu_rt = *p;
1015
a73e4195
MKL
1016 if (pcpu_rt) {
1017 dst_hold(&pcpu_rt->dst);
1018 rt6_dst_from_metrics_check(pcpu_rt);
1019 }
1020 return pcpu_rt;
1021}
1022
1023static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1024{
9c7370a1 1025 struct fib6_table *table = rt->rt6i_table;
a73e4195 1026 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1027
1028 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1029 if (!pcpu_rt) {
1030 struct net *net = dev_net(rt->dst.dev);
1031
9c7370a1
MKL
1032 dst_hold(&net->ipv6.ip6_null_entry->dst);
1033 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1034 }
1035
9c7370a1
MKL
1036 read_lock_bh(&table->tb6_lock);
1037 if (rt->rt6i_pcpu) {
1038 p = this_cpu_ptr(rt->rt6i_pcpu);
1039 prev = cmpxchg(p, NULL, pcpu_rt);
1040 if (prev) {
1041 /* If someone did it before us, return prev instead */
1042 dst_destroy(&pcpu_rt->dst);
1043 pcpu_rt = prev;
1044 }
1045 } else {
1046 /* rt has been removed from the fib6 tree
1047 * before we have a chance to acquire the read_lock.
1048 * In this case, don't brother to create a pcpu rt
1049 * since rt is going away anyway. The next
1050 * dst_check() will trigger a re-lookup.
1051 */
d52d3997 1052 dst_destroy(&pcpu_rt->dst);
9c7370a1 1053 pcpu_rt = rt;
d52d3997 1054 }
d52d3997
MKL
1055 dst_hold(&pcpu_rt->dst);
1056 rt6_dst_from_metrics_check(pcpu_rt);
9c7370a1 1057 read_unlock_bh(&table->tb6_lock);
d52d3997
MKL
1058 return pcpu_rt;
1059}
1060
8ed67789 1061static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 1062 struct flowi6 *fl6, int flags)
1da177e4 1063{
367efcb9 1064 struct fib6_node *fn, *saved_fn;
45e4fd26 1065 struct rt6_info *rt;
c71099ac 1066 int strict = 0;
1da177e4 1067
77d16f45 1068 strict |= flags & RT6_LOOKUP_F_IFACE;
367efcb9
MKL
1069 if (net->ipv6.devconf_all->forwarding == 0)
1070 strict |= RT6_LOOKUP_F_REACHABLE;
1da177e4 1071
c71099ac 1072 read_lock_bh(&table->tb6_lock);
1da177e4 1073
4c9483b2 1074 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1075 saved_fn = fn;
1da177e4 1076
a3c00e46 1077redo_rt6_select:
367efcb9 1078 rt = rt6_select(fn, oif, strict);
52bd4c0c 1079 if (rt->rt6i_nsiblings)
367efcb9 1080 rt = rt6_multipath_select(rt, fl6, oif, strict);
a3c00e46
MKL
1081 if (rt == net->ipv6.ip6_null_entry) {
1082 fn = fib6_backtrack(fn, &fl6->saddr);
1083 if (fn)
1084 goto redo_rt6_select;
367efcb9
MKL
1085 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1086 /* also consider unreachable route */
1087 strict &= ~RT6_LOOKUP_F_REACHABLE;
1088 fn = saved_fn;
1089 goto redo_rt6_select;
367efcb9 1090 }
a3c00e46
MKL
1091 }
1092
fb9de91e 1093
3da59bd9 1094 if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
d52d3997
MKL
1095 dst_use(&rt->dst, jiffies);
1096 read_unlock_bh(&table->tb6_lock);
1097
1098 rt6_dst_from_metrics_check(rt);
1099 return rt;
3da59bd9
MKL
1100 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1101 !(rt->rt6i_flags & RTF_GATEWAY))) {
1102 /* Create a RTF_CACHE clone which will not be
1103 * owned by the fib6 tree. It is for the special case where
1104 * the daddr in the skb during the neighbor look-up is different
1105 * from the fl6->daddr used to look-up route here.
1106 */
1107
1108 struct rt6_info *uncached_rt;
1109
d52d3997
MKL
1110 dst_use(&rt->dst, jiffies);
1111 read_unlock_bh(&table->tb6_lock);
1112
3da59bd9
MKL
1113 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1114 dst_release(&rt->dst);
c71099ac 1115
3da59bd9 1116 if (uncached_rt)
8d0b94af 1117 rt6_uncached_list_add(uncached_rt);
3da59bd9
MKL
1118 else
1119 uncached_rt = net->ipv6.ip6_null_entry;
d52d3997 1120
3da59bd9
MKL
1121 dst_hold(&uncached_rt->dst);
1122 return uncached_rt;
3da59bd9 1123
d52d3997
MKL
1124 } else {
1125 /* Get a percpu copy */
1126
1127 struct rt6_info *pcpu_rt;
1128
1129 rt->dst.lastuse = jiffies;
1130 rt->dst.__use++;
1131 pcpu_rt = rt6_get_pcpu_route(rt);
d52d3997 1132
9c7370a1
MKL
1133 if (pcpu_rt) {
1134 read_unlock_bh(&table->tb6_lock);
1135 } else {
1136 /* We have to do the read_unlock first
1137 * because rt6_make_pcpu_route() may trigger
1138 * ip6_dst_gc() which will take the write_lock.
1139 */
1140 dst_hold(&rt->dst);
1141 read_unlock_bh(&table->tb6_lock);
a73e4195 1142 pcpu_rt = rt6_make_pcpu_route(rt);
9c7370a1
MKL
1143 dst_release(&rt->dst);
1144 }
d52d3997
MKL
1145
1146 return pcpu_rt;
9c7370a1 1147
d52d3997 1148 }
1da177e4
LT
1149}
1150
8ed67789 1151static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 1152 struct flowi6 *fl6, int flags)
4acad72d 1153{
4c9483b2 1154 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
1155}
1156
72331bc0
SL
1157static struct dst_entry *ip6_route_input_lookup(struct net *net,
1158 struct net_device *dev,
1159 struct flowi6 *fl6, int flags)
1160{
1161 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1162 flags |= RT6_LOOKUP_F_IFACE;
1163
1164 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1165}
1166
c71099ac
TG
1167void ip6_route_input(struct sk_buff *skb)
1168{
b71d1d42 1169 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 1170 struct net *net = dev_net(skb->dev);
adaa70bb 1171 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 1172 struct ip_tunnel_info *tun_info;
4c9483b2
DM
1173 struct flowi6 fl6 = {
1174 .flowi6_iif = skb->dev->ifindex,
1175 .daddr = iph->daddr,
1176 .saddr = iph->saddr,
6502ca52 1177 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
1178 .flowi6_mark = skb->mark,
1179 .flowi6_proto = iph->nexthdr,
c71099ac 1180 };
adaa70bb 1181
904af04d 1182 tun_info = skb_tunnel_info(skb);
46fa062a 1183 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 1184 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
06e9d040 1185 skb_dst_drop(skb);
72331bc0 1186 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
1187}
1188
8ed67789 1189static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 1190 struct flowi6 *fl6, int flags)
1da177e4 1191{
4c9483b2 1192 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
1193}
1194
67ba4152 1195struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 1196 struct flowi6 *fl6)
c71099ac
TG
1197{
1198 int flags = 0;
1199
1fb9489b 1200 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 1201
741a11d9
DA
1202 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1203 fl6->flowi6_oif)
77d16f45 1204 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 1205
4c9483b2 1206 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 1207 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
1208 else if (sk)
1209 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 1210
4c9483b2 1211 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4 1212}
7159039a 1213EXPORT_SYMBOL(ip6_route_output);
1da177e4 1214
2774c131 1215struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 1216{
5c1e6aa3 1217 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
1218 struct dst_entry *new = NULL;
1219
f5b0a874 1220 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
14e50e57 1221 if (rt) {
d8d1f30b 1222 new = &rt->dst;
14e50e57 1223
8104891b 1224 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
8104891b 1225
14e50e57 1226 new->__use = 1;
352e512c 1227 new->input = dst_discard;
aad88724 1228 new->output = dst_discard_sk;
14e50e57 1229
21efcfa0
ED
1230 if (dst_metrics_read_only(&ort->dst))
1231 new->_metrics = ort->dst._metrics;
1232 else
1233 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
1234 rt->rt6i_idev = ort->rt6i_idev;
1235 if (rt->rt6i_idev)
1236 in6_dev_hold(rt->rt6i_idev);
14e50e57 1237
4e3fd7a0 1238 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961 1239 rt->rt6i_flags = ort->rt6i_flags;
14e50e57
DM
1240 rt->rt6i_metric = 0;
1241
1242 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1243#ifdef CONFIG_IPV6_SUBTREES
1244 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1245#endif
1246
1247 dst_free(new);
1248 }
1249
69ead7af
DM
1250 dst_release(dst_orig);
1251 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 1252}
14e50e57 1253
1da177e4
LT
1254/*
1255 * Destination cache support functions
1256 */
1257
4b32b5ad
MKL
1258static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1259{
1260 if (rt->dst.from &&
1261 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1262 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1263}
1264
3da59bd9
MKL
1265static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1266{
1267 if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1268 return NULL;
1269
1270 if (rt6_check_expired(rt))
1271 return NULL;
1272
1273 return &rt->dst;
1274}
1275
1276static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1277{
1278 if (rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1279 rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1280 return &rt->dst;
1281 else
1282 return NULL;
1283}
1284
1da177e4
LT
1285static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1286{
1287 struct rt6_info *rt;
1288
1289 rt = (struct rt6_info *) dst;
1290
6f3118b5
ND
1291 /* All IPV6 dsts are created with ->obsolete set to the value
1292 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1293 * into this function always.
1294 */
e3bc10bd 1295
4b32b5ad
MKL
1296 rt6_dst_from_metrics_check(rt);
1297
d52d3997 1298 if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE))
3da59bd9
MKL
1299 return rt6_dst_from_check(rt, cookie);
1300 else
1301 return rt6_check(rt, cookie);
1da177e4
LT
1302}
1303
1304static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1305{
1306 struct rt6_info *rt = (struct rt6_info *) dst;
1307
1308 if (rt) {
54c1a859
YH
1309 if (rt->rt6i_flags & RTF_CACHE) {
1310 if (rt6_check_expired(rt)) {
1311 ip6_del_rt(rt);
1312 dst = NULL;
1313 }
1314 } else {
1da177e4 1315 dst_release(dst);
54c1a859
YH
1316 dst = NULL;
1317 }
1da177e4 1318 }
54c1a859 1319 return dst;
1da177e4
LT
1320}
1321
1322static void ip6_link_failure(struct sk_buff *skb)
1323{
1324 struct rt6_info *rt;
1325
3ffe533c 1326 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1327
adf30907 1328 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1329 if (rt) {
1eb4f758
HFS
1330 if (rt->rt6i_flags & RTF_CACHE) {
1331 dst_hold(&rt->dst);
8e3d5be7 1332 ip6_del_rt(rt);
1eb4f758 1333 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1da177e4 1334 rt->rt6i_node->fn_sernum = -1;
1eb4f758 1335 }
1da177e4
LT
1336 }
1337}
1338
45e4fd26
MKL
1339static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1340{
1341 struct net *net = dev_net(rt->dst.dev);
1342
1343 rt->rt6i_flags |= RTF_MODIFIED;
1344 rt->rt6i_pmtu = mtu;
1345 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1346}
1347
1348static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1349 const struct ipv6hdr *iph, u32 mtu)
1da177e4 1350{
67ba4152 1351 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 1352
45e4fd26
MKL
1353 if (rt6->rt6i_flags & RTF_LOCAL)
1354 return;
81aded24 1355
45e4fd26
MKL
1356 dst_confirm(dst);
1357 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1358 if (mtu >= dst_mtu(dst))
1359 return;
9d289715 1360
45e4fd26
MKL
1361 if (rt6->rt6i_flags & RTF_CACHE) {
1362 rt6_do_update_pmtu(rt6, mtu);
1363 } else {
1364 const struct in6_addr *daddr, *saddr;
1365 struct rt6_info *nrt6;
1366
1367 if (iph) {
1368 daddr = &iph->daddr;
1369 saddr = &iph->saddr;
1370 } else if (sk) {
1371 daddr = &sk->sk_v6_daddr;
1372 saddr = &inet6_sk(sk)->saddr;
1373 } else {
1374 return;
1375 }
1376 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1377 if (nrt6) {
1378 rt6_do_update_pmtu(nrt6, mtu);
1379
1380 /* ip6_ins_rt(nrt6) will bump the
1381 * rt6->rt6i_node->fn_sernum
1382 * which will fail the next rt6_check() and
1383 * invalidate the sk->sk_dst_cache.
1384 */
1385 ip6_ins_rt(nrt6);
1386 }
1da177e4
LT
1387 }
1388}
1389
45e4fd26
MKL
1390static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1391 struct sk_buff *skb, u32 mtu)
1392{
1393 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1394}
1395
42ae66c8
DM
1396void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1397 int oif, u32 mark)
81aded24
DM
1398{
1399 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1400 struct dst_entry *dst;
1401 struct flowi6 fl6;
1402
1403 memset(&fl6, 0, sizeof(fl6));
1404 fl6.flowi6_oif = oif;
1b3c61dc 1405 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
1406 fl6.daddr = iph->daddr;
1407 fl6.saddr = iph->saddr;
6502ca52 1408 fl6.flowlabel = ip6_flowinfo(iph);
81aded24
DM
1409
1410 dst = ip6_route_output(net, NULL, &fl6);
1411 if (!dst->error)
45e4fd26 1412 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
1413 dst_release(dst);
1414}
1415EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1416
1417void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1418{
1419 ip6_update_pmtu(skb, sock_net(sk), mtu,
1420 sk->sk_bound_dev_if, sk->sk_mark);
1421}
1422EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1423
b55b76b2
DJ
1424/* Handle redirects */
1425struct ip6rd_flowi {
1426 struct flowi6 fl6;
1427 struct in6_addr gateway;
1428};
1429
1430static struct rt6_info *__ip6_route_redirect(struct net *net,
1431 struct fib6_table *table,
1432 struct flowi6 *fl6,
1433 int flags)
1434{
1435 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1436 struct rt6_info *rt;
1437 struct fib6_node *fn;
1438
1439 /* Get the "current" route for this destination and
1440 * check if the redirect has come from approriate router.
1441 *
1442 * RFC 4861 specifies that redirects should only be
1443 * accepted if they come from the nexthop to the target.
1444 * Due to the way the routes are chosen, this notion
1445 * is a bit fuzzy and one might need to check all possible
1446 * routes.
1447 */
1448
1449 read_lock_bh(&table->tb6_lock);
1450 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1451restart:
1452 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1453 if (rt6_check_expired(rt))
1454 continue;
1455 if (rt->dst.error)
1456 break;
1457 if (!(rt->rt6i_flags & RTF_GATEWAY))
1458 continue;
1459 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1460 continue;
1461 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1462 continue;
1463 break;
1464 }
1465
1466 if (!rt)
1467 rt = net->ipv6.ip6_null_entry;
1468 else if (rt->dst.error) {
1469 rt = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
1470 goto out;
1471 }
1472
1473 if (rt == net->ipv6.ip6_null_entry) {
a3c00e46
MKL
1474 fn = fib6_backtrack(fn, &fl6->saddr);
1475 if (fn)
1476 goto restart;
b55b76b2 1477 }
a3c00e46 1478
b0a1ba59 1479out:
b55b76b2
DJ
1480 dst_hold(&rt->dst);
1481
1482 read_unlock_bh(&table->tb6_lock);
1483
1484 return rt;
1485};
1486
1487static struct dst_entry *ip6_route_redirect(struct net *net,
1488 const struct flowi6 *fl6,
1489 const struct in6_addr *gateway)
1490{
1491 int flags = RT6_LOOKUP_F_HAS_SADDR;
1492 struct ip6rd_flowi rdfl;
1493
1494 rdfl.fl6 = *fl6;
1495 rdfl.gateway = *gateway;
1496
1497 return fib6_rule_lookup(net, &rdfl.fl6,
1498 flags, __ip6_route_redirect);
1499}
1500
3a5ad2ee
DM
1501void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1502{
1503 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1504 struct dst_entry *dst;
1505 struct flowi6 fl6;
1506
1507 memset(&fl6, 0, sizeof(fl6));
e374c618 1508 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
1509 fl6.flowi6_oif = oif;
1510 fl6.flowi6_mark = mark;
3a5ad2ee
DM
1511 fl6.daddr = iph->daddr;
1512 fl6.saddr = iph->saddr;
6502ca52 1513 fl6.flowlabel = ip6_flowinfo(iph);
3a5ad2ee 1514
b55b76b2
DJ
1515 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1516 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
1517 dst_release(dst);
1518}
1519EXPORT_SYMBOL_GPL(ip6_redirect);
1520
c92a59ec
DJ
1521void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1522 u32 mark)
1523{
1524 const struct ipv6hdr *iph = ipv6_hdr(skb);
1525 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1526 struct dst_entry *dst;
1527 struct flowi6 fl6;
1528
1529 memset(&fl6, 0, sizeof(fl6));
e374c618 1530 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
1531 fl6.flowi6_oif = oif;
1532 fl6.flowi6_mark = mark;
c92a59ec
DJ
1533 fl6.daddr = msg->dest;
1534 fl6.saddr = iph->daddr;
1535
b55b76b2
DJ
1536 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1537 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
1538 dst_release(dst);
1539}
1540
3a5ad2ee
DM
1541void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1542{
1543 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1544}
1545EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1546
0dbaee3b 1547static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1548{
0dbaee3b
DM
1549 struct net_device *dev = dst->dev;
1550 unsigned int mtu = dst_mtu(dst);
1551 struct net *net = dev_net(dev);
1552
1da177e4
LT
1553 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1554
5578689a
DL
1555 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1556 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1557
1558 /*
1ab1457c
YH
1559 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1560 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1561 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1562 * rely only on pmtu discovery"
1563 */
1564 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1565 mtu = IPV6_MAXPLEN;
1566 return mtu;
1567}
1568
ebb762f2 1569static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1570{
4b32b5ad
MKL
1571 const struct rt6_info *rt = (const struct rt6_info *)dst;
1572 unsigned int mtu = rt->rt6i_pmtu;
d33e4553 1573 struct inet6_dev *idev;
618f9bc7 1574
4b32b5ad
MKL
1575 if (mtu)
1576 goto out;
1577
1578 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 1579 if (mtu)
30f78d8e 1580 goto out;
618f9bc7
SK
1581
1582 mtu = IPV6_MIN_MTU;
d33e4553
DM
1583
1584 rcu_read_lock();
1585 idev = __in6_dev_get(dst->dev);
1586 if (idev)
1587 mtu = idev->cnf.mtu6;
1588 rcu_read_unlock();
1589
30f78d8e
ED
1590out:
1591 return min_t(unsigned int, mtu, IP6_MAX_MTU);
d33e4553
DM
1592}
1593
3b00944c
YH
1594static struct dst_entry *icmp6_dst_gc_list;
1595static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1596
3b00944c 1597struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 1598 struct flowi6 *fl6)
1da177e4 1599{
87a11578 1600 struct dst_entry *dst;
1da177e4
LT
1601 struct rt6_info *rt;
1602 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1603 struct net *net = dev_net(dev);
1da177e4 1604
38308473 1605 if (unlikely(!idev))
122bdf67 1606 return ERR_PTR(-ENODEV);
1da177e4 1607
ad706862 1608 rt = ip6_dst_alloc(net, dev, 0);
38308473 1609 if (unlikely(!rt)) {
1da177e4 1610 in6_dev_put(idev);
87a11578 1611 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1612 goto out;
1613 }
1614
8e2ec639
YZ
1615 rt->dst.flags |= DST_HOST;
1616 rt->dst.output = ip6_output;
d8d1f30b 1617 atomic_set(&rt->dst.__refcnt, 1);
550bab42 1618 rt->rt6i_gateway = fl6->daddr;
87a11578 1619 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1620 rt->rt6i_dst.plen = 128;
1621 rt->rt6i_idev = idev;
14edd87d 1622 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 1623
3b00944c 1624 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1625 rt->dst.next = icmp6_dst_gc_list;
1626 icmp6_dst_gc_list = &rt->dst;
3b00944c 1627 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1628
5578689a 1629 fib6_force_start_gc(net);
1da177e4 1630
87a11578
DM
1631 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1632
1da177e4 1633out:
87a11578 1634 return dst;
1da177e4
LT
1635}
1636
3d0f24a7 1637int icmp6_dst_gc(void)
1da177e4 1638{
e9476e95 1639 struct dst_entry *dst, **pprev;
3d0f24a7 1640 int more = 0;
1da177e4 1641
3b00944c
YH
1642 spin_lock_bh(&icmp6_dst_lock);
1643 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1644
1da177e4
LT
1645 while ((dst = *pprev) != NULL) {
1646 if (!atomic_read(&dst->__refcnt)) {
1647 *pprev = dst->next;
1648 dst_free(dst);
1da177e4
LT
1649 } else {
1650 pprev = &dst->next;
3d0f24a7 1651 ++more;
1da177e4
LT
1652 }
1653 }
1654
3b00944c 1655 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1656
3d0f24a7 1657 return more;
1da177e4
LT
1658}
1659
1e493d19
DM
1660static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1661 void *arg)
1662{
1663 struct dst_entry *dst, **pprev;
1664
1665 spin_lock_bh(&icmp6_dst_lock);
1666 pprev = &icmp6_dst_gc_list;
1667 while ((dst = *pprev) != NULL) {
1668 struct rt6_info *rt = (struct rt6_info *) dst;
1669 if (func(rt, arg)) {
1670 *pprev = dst->next;
1671 dst_free(dst);
1672 } else {
1673 pprev = &dst->next;
1674 }
1675 }
1676 spin_unlock_bh(&icmp6_dst_lock);
1677}
1678
569d3645 1679static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1680{
86393e52 1681 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1682 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1683 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1684 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1685 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1686 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1687 int entries;
7019b78e 1688
fc66f95c 1689 entries = dst_entries_get_fast(ops);
49a18d86 1690 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 1691 entries <= rt_max_size)
1da177e4
LT
1692 goto out;
1693
6891a346 1694 net->ipv6.ip6_rt_gc_expire++;
14956643 1695 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
1696 entries = dst_entries_get_slow(ops);
1697 if (entries < ops->gc_thresh)
7019b78e 1698 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1699out:
7019b78e 1700 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1701 return entries > rt_max_size;
1da177e4
LT
1702}
1703
e715b6d3
FW
1704static int ip6_convert_metrics(struct mx6_config *mxc,
1705 const struct fib6_config *cfg)
1706{
c3a8d947 1707 bool ecn_ca = false;
e715b6d3
FW
1708 struct nlattr *nla;
1709 int remaining;
1710 u32 *mp;
1711
63159f29 1712 if (!cfg->fc_mx)
e715b6d3
FW
1713 return 0;
1714
1715 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1716 if (unlikely(!mp))
1717 return -ENOMEM;
1718
1719 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1720 int type = nla_type(nla);
1bb14807 1721 u32 val;
e715b6d3 1722
1bb14807
DB
1723 if (!type)
1724 continue;
1725 if (unlikely(type > RTAX_MAX))
1726 goto err;
ea697639 1727
1bb14807
DB
1728 if (type == RTAX_CC_ALGO) {
1729 char tmp[TCP_CA_NAME_MAX];
e715b6d3 1730
1bb14807 1731 nla_strlcpy(tmp, nla, sizeof(tmp));
c3a8d947 1732 val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1bb14807
DB
1733 if (val == TCP_CA_UNSPEC)
1734 goto err;
1735 } else {
1736 val = nla_get_u32(nla);
e715b6d3 1737 }
b8d3e416
DB
1738 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1739 goto err;
1bb14807
DB
1740
1741 mp[type - 1] = val;
1742 __set_bit(type - 1, mxc->mx_valid);
e715b6d3
FW
1743 }
1744
c3a8d947
DB
1745 if (ecn_ca) {
1746 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1747 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1748 }
e715b6d3 1749
c3a8d947 1750 mxc->mx = mp;
e715b6d3
FW
1751 return 0;
1752 err:
1753 kfree(mp);
1754 return -EINVAL;
1755}
1da177e4 1756
6b9ea5a6 1757int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret)
1da177e4
LT
1758{
1759 int err;
5578689a 1760 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1761 struct rt6_info *rt = NULL;
1762 struct net_device *dev = NULL;
1763 struct inet6_dev *idev = NULL;
c71099ac 1764 struct fib6_table *table;
1da177e4
LT
1765 int addr_type;
1766
86872cb5 1767 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1768 return -EINVAL;
1769#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1770 if (cfg->fc_src_len)
1da177e4
LT
1771 return -EINVAL;
1772#endif
86872cb5 1773 if (cfg->fc_ifindex) {
1da177e4 1774 err = -ENODEV;
5578689a 1775 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1776 if (!dev)
1777 goto out;
1778 idev = in6_dev_get(dev);
1779 if (!idev)
1780 goto out;
1781 }
1782
86872cb5
TG
1783 if (cfg->fc_metric == 0)
1784 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1785
d71314b4 1786 err = -ENOBUFS;
38308473
DM
1787 if (cfg->fc_nlinfo.nlh &&
1788 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1789 table = fib6_get_table(net, cfg->fc_table);
38308473 1790 if (!table) {
f3213831 1791 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
1792 table = fib6_new_table(net, cfg->fc_table);
1793 }
1794 } else {
1795 table = fib6_new_table(net, cfg->fc_table);
1796 }
38308473
DM
1797
1798 if (!table)
c71099ac 1799 goto out;
c71099ac 1800
ad706862
MKL
1801 rt = ip6_dst_alloc(net, NULL,
1802 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1da177e4 1803
38308473 1804 if (!rt) {
1da177e4
LT
1805 err = -ENOMEM;
1806 goto out;
1807 }
1808
1716a961
G
1809 if (cfg->fc_flags & RTF_EXPIRES)
1810 rt6_set_expires(rt, jiffies +
1811 clock_t_to_jiffies(cfg->fc_expires));
1812 else
1813 rt6_clean_expires(rt);
1da177e4 1814
86872cb5
TG
1815 if (cfg->fc_protocol == RTPROT_UNSPEC)
1816 cfg->fc_protocol = RTPROT_BOOT;
1817 rt->rt6i_protocol = cfg->fc_protocol;
1818
1819 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1820
1821 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1822 rt->dst.input = ip6_mc_input;
ab79ad14
1823 else if (cfg->fc_flags & RTF_LOCAL)
1824 rt->dst.input = ip6_input;
1da177e4 1825 else
d8d1f30b 1826 rt->dst.input = ip6_forward;
1da177e4 1827
d8d1f30b 1828 rt->dst.output = ip6_output;
1da177e4 1829
19e42e45
RP
1830 if (cfg->fc_encap) {
1831 struct lwtunnel_state *lwtstate;
1832
1833 err = lwtunnel_build_state(dev, cfg->fc_encap_type,
127eb7cd
TH
1834 cfg->fc_encap, AF_INET6, cfg,
1835 &lwtstate);
19e42e45
RP
1836 if (err)
1837 goto out;
61adedf3
JB
1838 rt->dst.lwtstate = lwtstate_get(lwtstate);
1839 if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1840 rt->dst.lwtstate->orig_output = rt->dst.output;
1841 rt->dst.output = lwtunnel_output;
25368623 1842 }
61adedf3
JB
1843 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1844 rt->dst.lwtstate->orig_input = rt->dst.input;
1845 rt->dst.input = lwtunnel_input;
25368623 1846 }
19e42e45
RP
1847 }
1848
86872cb5
TG
1849 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1850 rt->rt6i_dst.plen = cfg->fc_dst_len;
afc4eef8 1851 if (rt->rt6i_dst.plen == 128)
e5fd387a 1852 rt->dst.flags |= DST_HOST;
e5fd387a 1853
1da177e4 1854#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1855 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1856 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1857#endif
1858
86872cb5 1859 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1860
1861 /* We cannot add true routes via loopback here,
1862 they would result in kernel looping; promote them to reject routes
1863 */
86872cb5 1864 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1865 (dev && (dev->flags & IFF_LOOPBACK) &&
1866 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1867 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1868 /* hold loopback dev/idev if we haven't done so. */
5578689a 1869 if (dev != net->loopback_dev) {
1da177e4
LT
1870 if (dev) {
1871 dev_put(dev);
1872 in6_dev_put(idev);
1873 }
5578689a 1874 dev = net->loopback_dev;
1da177e4
LT
1875 dev_hold(dev);
1876 idev = in6_dev_get(dev);
1877 if (!idev) {
1878 err = -ENODEV;
1879 goto out;
1880 }
1881 }
1da177e4 1882 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
ef2c7d7b
ND
1883 switch (cfg->fc_type) {
1884 case RTN_BLACKHOLE:
1885 rt->dst.error = -EINVAL;
aad88724 1886 rt->dst.output = dst_discard_sk;
7150aede 1887 rt->dst.input = dst_discard;
ef2c7d7b
ND
1888 break;
1889 case RTN_PROHIBIT:
1890 rt->dst.error = -EACCES;
7150aede
K
1891 rt->dst.output = ip6_pkt_prohibit_out;
1892 rt->dst.input = ip6_pkt_prohibit;
ef2c7d7b 1893 break;
b4949ab2 1894 case RTN_THROW:
0315e382 1895 case RTN_UNREACHABLE:
ef2c7d7b 1896 default:
7150aede 1897 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
0315e382
NF
1898 : (cfg->fc_type == RTN_UNREACHABLE)
1899 ? -EHOSTUNREACH : -ENETUNREACH;
7150aede
K
1900 rt->dst.output = ip6_pkt_discard_out;
1901 rt->dst.input = ip6_pkt_discard;
ef2c7d7b
ND
1902 break;
1903 }
1da177e4
LT
1904 goto install_route;
1905 }
1906
86872cb5 1907 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1908 const struct in6_addr *gw_addr;
1da177e4
LT
1909 int gwa_type;
1910
86872cb5 1911 gw_addr = &cfg->fc_gateway;
330567b7 1912 gwa_type = ipv6_addr_type(gw_addr);
48ed7b26
FW
1913
1914 /* if gw_addr is local we will fail to detect this in case
1915 * address is still TENTATIVE (DAD in progress). rt6_lookup()
1916 * will return already-added prefix route via interface that
1917 * prefix route was assigned to, which might be non-loopback.
1918 */
1919 err = -EINVAL;
330567b7
FW
1920 if (ipv6_chk_addr_and_flags(net, gw_addr,
1921 gwa_type & IPV6_ADDR_LINKLOCAL ?
1922 dev : NULL, 0, 0))
48ed7b26
FW
1923 goto out;
1924
4e3fd7a0 1925 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1926
1927 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1928 struct rt6_info *grt;
1929
1930 /* IPv6 strictly inhibits using not link-local
1931 addresses as nexthop address.
1932 Otherwise, router will not able to send redirects.
1933 It is very good, but in some (rare!) circumstances
1934 (SIT, PtP, NBMA NOARP links) it is handy to allow
1935 some exceptions. --ANK
1936 */
38308473 1937 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1938 goto out;
1939
5578689a 1940 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1941
1942 err = -EHOSTUNREACH;
38308473 1943 if (!grt)
1da177e4
LT
1944 goto out;
1945 if (dev) {
d1918542 1946 if (dev != grt->dst.dev) {
94e187c0 1947 ip6_rt_put(grt);
1da177e4
LT
1948 goto out;
1949 }
1950 } else {
d1918542 1951 dev = grt->dst.dev;
1da177e4
LT
1952 idev = grt->rt6i_idev;
1953 dev_hold(dev);
1954 in6_dev_hold(grt->rt6i_idev);
1955 }
38308473 1956 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1957 err = 0;
94e187c0 1958 ip6_rt_put(grt);
1da177e4
LT
1959
1960 if (err)
1961 goto out;
1962 }
1963 err = -EINVAL;
38308473 1964 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1965 goto out;
1966 }
1967
1968 err = -ENODEV;
38308473 1969 if (!dev)
1da177e4
LT
1970 goto out;
1971
c3968a85
DW
1972 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1973 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1974 err = -EINVAL;
1975 goto out;
1976 }
4e3fd7a0 1977 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1978 rt->rt6i_prefsrc.plen = 128;
1979 } else
1980 rt->rt6i_prefsrc.plen = 0;
1981
86872cb5 1982 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1983
1984install_route:
d8d1f30b 1985 rt->dst.dev = dev;
1da177e4 1986 rt->rt6i_idev = idev;
c71099ac 1987 rt->rt6i_table = table;
63152fc0 1988
c346dca1 1989 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1990
6b9ea5a6
RP
1991 *rt_ret = rt;
1992
1993 return 0;
1994out:
1995 if (dev)
1996 dev_put(dev);
1997 if (idev)
1998 in6_dev_put(idev);
1999 if (rt)
2000 dst_free(&rt->dst);
2001
2002 *rt_ret = NULL;
2003
2004 return err;
2005}
2006
2007int ip6_route_add(struct fib6_config *cfg)
2008{
2009 struct mx6_config mxc = { .mx = NULL, };
2010 struct rt6_info *rt = NULL;
2011 int err;
2012
2013 err = ip6_route_info_create(cfg, &rt);
2014 if (err)
2015 goto out;
2016
e715b6d3
FW
2017 err = ip6_convert_metrics(&mxc, cfg);
2018 if (err)
2019 goto out;
1da177e4 2020
e715b6d3
FW
2021 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
2022
2023 kfree(mxc.mx);
6b9ea5a6 2024
e715b6d3 2025 return err;
1da177e4 2026out:
1da177e4 2027 if (rt)
d8d1f30b 2028 dst_free(&rt->dst);
6b9ea5a6 2029
1da177e4
LT
2030 return err;
2031}
2032
86872cb5 2033static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2034{
2035 int err;
c71099ac 2036 struct fib6_table *table;
d1918542 2037 struct net *net = dev_net(rt->dst.dev);
1da177e4 2038
8e3d5be7
MKL
2039 if (rt == net->ipv6.ip6_null_entry ||
2040 rt->dst.flags & DST_NOCACHE) {
6825a26c
G
2041 err = -ENOENT;
2042 goto out;
2043 }
6c813a72 2044
c71099ac
TG
2045 table = rt->rt6i_table;
2046 write_lock_bh(&table->tb6_lock);
86872cb5 2047 err = fib6_del(rt, info);
c71099ac 2048 write_unlock_bh(&table->tb6_lock);
1da177e4 2049
6825a26c 2050out:
94e187c0 2051 ip6_rt_put(rt);
1da177e4
LT
2052 return err;
2053}
2054
e0a1ad73
TG
2055int ip6_del_rt(struct rt6_info *rt)
2056{
4d1169c1 2057 struct nl_info info = {
d1918542 2058 .nl_net = dev_net(rt->dst.dev),
4d1169c1 2059 };
528c4ceb 2060 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
2061}
2062
86872cb5 2063static int ip6_route_del(struct fib6_config *cfg)
1da177e4 2064{
c71099ac 2065 struct fib6_table *table;
1da177e4
LT
2066 struct fib6_node *fn;
2067 struct rt6_info *rt;
2068 int err = -ESRCH;
2069
5578689a 2070 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 2071 if (!table)
c71099ac
TG
2072 return err;
2073
2074 read_lock_bh(&table->tb6_lock);
1da177e4 2075
c71099ac 2076 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
2077 &cfg->fc_dst, cfg->fc_dst_len,
2078 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 2079
1da177e4 2080 if (fn) {
d8d1f30b 2081 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1f56a01f
MKL
2082 if ((rt->rt6i_flags & RTF_CACHE) &&
2083 !(cfg->fc_flags & RTF_CACHE))
2084 continue;
86872cb5 2085 if (cfg->fc_ifindex &&
d1918542
DM
2086 (!rt->dst.dev ||
2087 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 2088 continue;
86872cb5
TG
2089 if (cfg->fc_flags & RTF_GATEWAY &&
2090 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 2091 continue;
86872cb5 2092 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 2093 continue;
d8d1f30b 2094 dst_hold(&rt->dst);
c71099ac 2095 read_unlock_bh(&table->tb6_lock);
1da177e4 2096
86872cb5 2097 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
2098 }
2099 }
c71099ac 2100 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2101
2102 return err;
2103}
2104
6700c270 2105static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 2106{
e8599ff4 2107 struct net *net = dev_net(skb->dev);
a6279458 2108 struct netevent_redirect netevent;
e8599ff4 2109 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
2110 struct ndisc_options ndopts;
2111 struct inet6_dev *in6_dev;
2112 struct neighbour *neigh;
71bcdba0 2113 struct rd_msg *msg;
6e157b6a
DM
2114 int optlen, on_link;
2115 u8 *lladdr;
e8599ff4 2116
29a3cad5 2117 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 2118 optlen -= sizeof(*msg);
e8599ff4
DM
2119
2120 if (optlen < 0) {
6e157b6a 2121 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
2122 return;
2123 }
2124
71bcdba0 2125 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 2126
71bcdba0 2127 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 2128 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
2129 return;
2130 }
2131
6e157b6a 2132 on_link = 0;
71bcdba0 2133 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 2134 on_link = 1;
71bcdba0 2135 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 2136 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 2137 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
2138 return;
2139 }
2140
2141 in6_dev = __in6_dev_get(skb->dev);
2142 if (!in6_dev)
2143 return;
2144 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2145 return;
2146
2147 /* RFC2461 8.1:
2148 * The IP source address of the Redirect MUST be the same as the current
2149 * first-hop router for the specified ICMP Destination Address.
2150 */
2151
71bcdba0 2152 if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
e8599ff4
DM
2153 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2154 return;
2155 }
6e157b6a
DM
2156
2157 lladdr = NULL;
e8599ff4
DM
2158 if (ndopts.nd_opts_tgt_lladdr) {
2159 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2160 skb->dev);
2161 if (!lladdr) {
2162 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2163 return;
2164 }
2165 }
2166
6e157b6a
DM
2167 rt = (struct rt6_info *) dst;
2168 if (rt == net->ipv6.ip6_null_entry) {
2169 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 2170 return;
6e157b6a 2171 }
e8599ff4 2172
6e157b6a
DM
2173 /* Redirect received -> path was valid.
2174 * Look, redirects are sent only in response to data packets,
2175 * so that this nexthop apparently is reachable. --ANK
2176 */
2177 dst_confirm(&rt->dst);
a6279458 2178
71bcdba0 2179 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
2180 if (!neigh)
2181 return;
a6279458 2182
1da177e4
LT
2183 /*
2184 * We have finally decided to accept it.
2185 */
2186
1ab1457c 2187 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
2188 NEIGH_UPDATE_F_WEAK_OVERRIDE|
2189 NEIGH_UPDATE_F_OVERRIDE|
2190 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2191 NEIGH_UPDATE_F_ISROUTER))
2192 );
2193
83a09abd 2194 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
38308473 2195 if (!nrt)
1da177e4
LT
2196 goto out;
2197
2198 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2199 if (on_link)
2200 nrt->rt6i_flags &= ~RTF_GATEWAY;
2201
4e3fd7a0 2202 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 2203
40e22e8f 2204 if (ip6_ins_rt(nrt))
1da177e4
LT
2205 goto out;
2206
d8d1f30b
CG
2207 netevent.old = &rt->dst;
2208 netevent.new = &nrt->dst;
71bcdba0 2209 netevent.daddr = &msg->dest;
60592833 2210 netevent.neigh = neigh;
8d71740c
TT
2211 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2212
38308473 2213 if (rt->rt6i_flags & RTF_CACHE) {
6e157b6a 2214 rt = (struct rt6_info *) dst_clone(&rt->dst);
e0a1ad73 2215 ip6_del_rt(rt);
1da177e4
LT
2216 }
2217
2218out:
e8599ff4 2219 neigh_release(neigh);
6e157b6a
DM
2220}
2221
1da177e4
LT
2222/*
2223 * Misc support functions
2224 */
2225
4b32b5ad
MKL
2226static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2227{
2228 BUG_ON(from->dst.from);
2229
2230 rt->rt6i_flags &= ~RTF_EXPIRES;
2231 dst_hold(&from->dst);
2232 rt->dst.from = &from->dst;
2233 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2234}
2235
83a09abd
MKL
2236static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2237{
2238 rt->dst.input = ort->dst.input;
2239 rt->dst.output = ort->dst.output;
2240 rt->rt6i_dst = ort->rt6i_dst;
2241 rt->dst.error = ort->dst.error;
2242 rt->rt6i_idev = ort->rt6i_idev;
2243 if (rt->rt6i_idev)
2244 in6_dev_hold(rt->rt6i_idev);
2245 rt->dst.lastuse = jiffies;
2246 rt->rt6i_gateway = ort->rt6i_gateway;
2247 rt->rt6i_flags = ort->rt6i_flags;
2248 rt6_set_from(rt, ort);
2249 rt->rt6i_metric = ort->rt6i_metric;
1da177e4 2250#ifdef CONFIG_IPV6_SUBTREES
83a09abd 2251 rt->rt6i_src = ort->rt6i_src;
1da177e4 2252#endif
83a09abd
MKL
2253 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2254 rt->rt6i_table = ort->rt6i_table;
61adedf3 2255 rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
1da177e4
LT
2256}
2257
70ceb4f5 2258#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 2259static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
2260 const struct in6_addr *prefix, int prefixlen,
2261 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
2262{
2263 struct fib6_node *fn;
2264 struct rt6_info *rt = NULL;
c71099ac
TG
2265 struct fib6_table *table;
2266
efa2cea0 2267 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 2268 if (!table)
c71099ac 2269 return NULL;
70ceb4f5 2270
5744dd9b 2271 read_lock_bh(&table->tb6_lock);
67ba4152 2272 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
70ceb4f5
YH
2273 if (!fn)
2274 goto out;
2275
d8d1f30b 2276 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 2277 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
2278 continue;
2279 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2280 continue;
2281 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2282 continue;
d8d1f30b 2283 dst_hold(&rt->dst);
70ceb4f5
YH
2284 break;
2285 }
2286out:
5744dd9b 2287 read_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
2288 return rt;
2289}
2290
efa2cea0 2291static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
2292 const struct in6_addr *prefix, int prefixlen,
2293 const struct in6_addr *gwaddr, int ifindex,
95c96174 2294 unsigned int pref)
70ceb4f5 2295{
86872cb5
TG
2296 struct fib6_config cfg = {
2297 .fc_table = RT6_TABLE_INFO,
238fc7ea 2298 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
2299 .fc_ifindex = ifindex,
2300 .fc_dst_len = prefixlen,
2301 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2302 RTF_UP | RTF_PREF(pref),
15e47304 2303 .fc_nlinfo.portid = 0,
efa2cea0
DL
2304 .fc_nlinfo.nlh = NULL,
2305 .fc_nlinfo.nl_net = net,
86872cb5
TG
2306 };
2307
4e3fd7a0
AD
2308 cfg.fc_dst = *prefix;
2309 cfg.fc_gateway = *gwaddr;
70ceb4f5 2310
e317da96
YH
2311 /* We should treat it as a default route if prefix length is 0. */
2312 if (!prefixlen)
86872cb5 2313 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 2314
86872cb5 2315 ip6_route_add(&cfg);
70ceb4f5 2316
efa2cea0 2317 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
2318}
2319#endif
2320
b71d1d42 2321struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 2322{
1da177e4 2323 struct rt6_info *rt;
c71099ac 2324 struct fib6_table *table;
1da177e4 2325
c346dca1 2326 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 2327 if (!table)
c71099ac 2328 return NULL;
1da177e4 2329
5744dd9b 2330 read_lock_bh(&table->tb6_lock);
67ba4152 2331 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
d1918542 2332 if (dev == rt->dst.dev &&
045927ff 2333 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
2334 ipv6_addr_equal(&rt->rt6i_gateway, addr))
2335 break;
2336 }
2337 if (rt)
d8d1f30b 2338 dst_hold(&rt->dst);
5744dd9b 2339 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2340 return rt;
2341}
2342
b71d1d42 2343struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
2344 struct net_device *dev,
2345 unsigned int pref)
1da177e4 2346{
86872cb5
TG
2347 struct fib6_config cfg = {
2348 .fc_table = RT6_TABLE_DFLT,
238fc7ea 2349 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
2350 .fc_ifindex = dev->ifindex,
2351 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2352 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
15e47304 2353 .fc_nlinfo.portid = 0,
5578689a 2354 .fc_nlinfo.nlh = NULL,
c346dca1 2355 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 2356 };
1da177e4 2357
4e3fd7a0 2358 cfg.fc_gateway = *gwaddr;
1da177e4 2359
86872cb5 2360 ip6_route_add(&cfg);
1da177e4 2361
1da177e4
LT
2362 return rt6_get_dflt_router(gwaddr, dev);
2363}
2364
7b4da532 2365void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
2366{
2367 struct rt6_info *rt;
c71099ac
TG
2368 struct fib6_table *table;
2369
2370 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 2371 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 2372 if (!table)
c71099ac 2373 return;
1da177e4
LT
2374
2375restart:
c71099ac 2376 read_lock_bh(&table->tb6_lock);
d8d1f30b 2377 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
3e8b0ac3
LC
2378 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2379 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
d8d1f30b 2380 dst_hold(&rt->dst);
c71099ac 2381 read_unlock_bh(&table->tb6_lock);
e0a1ad73 2382 ip6_del_rt(rt);
1da177e4
LT
2383 goto restart;
2384 }
2385 }
c71099ac 2386 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2387}
2388
5578689a
DL
2389static void rtmsg_to_fib6_config(struct net *net,
2390 struct in6_rtmsg *rtmsg,
86872cb5
TG
2391 struct fib6_config *cfg)
2392{
2393 memset(cfg, 0, sizeof(*cfg));
2394
2395 cfg->fc_table = RT6_TABLE_MAIN;
2396 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2397 cfg->fc_metric = rtmsg->rtmsg_metric;
2398 cfg->fc_expires = rtmsg->rtmsg_info;
2399 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2400 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2401 cfg->fc_flags = rtmsg->rtmsg_flags;
2402
5578689a 2403 cfg->fc_nlinfo.nl_net = net;
f1243c2d 2404
4e3fd7a0
AD
2405 cfg->fc_dst = rtmsg->rtmsg_dst;
2406 cfg->fc_src = rtmsg->rtmsg_src;
2407 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
2408}
2409
5578689a 2410int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 2411{
86872cb5 2412 struct fib6_config cfg;
1da177e4
LT
2413 struct in6_rtmsg rtmsg;
2414 int err;
2415
67ba4152 2416 switch (cmd) {
1da177e4
LT
2417 case SIOCADDRT: /* Add a route */
2418 case SIOCDELRT: /* Delete a route */
af31f412 2419 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
2420 return -EPERM;
2421 err = copy_from_user(&rtmsg, arg,
2422 sizeof(struct in6_rtmsg));
2423 if (err)
2424 return -EFAULT;
86872cb5 2425
5578689a 2426 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 2427
1da177e4
LT
2428 rtnl_lock();
2429 switch (cmd) {
2430 case SIOCADDRT:
86872cb5 2431 err = ip6_route_add(&cfg);
1da177e4
LT
2432 break;
2433 case SIOCDELRT:
86872cb5 2434 err = ip6_route_del(&cfg);
1da177e4
LT
2435 break;
2436 default:
2437 err = -EINVAL;
2438 }
2439 rtnl_unlock();
2440
2441 return err;
3ff50b79 2442 }
1da177e4
LT
2443
2444 return -EINVAL;
2445}
2446
2447/*
2448 * Drop the packet on the floor
2449 */
2450
d5fdd6ba 2451static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2452{
612f09e8 2453 int type;
adf30907 2454 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2455 switch (ipstats_mib_noroutes) {
2456 case IPSTATS_MIB_INNOROUTES:
0660e03f 2457 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2458 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2459 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2460 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2461 break;
2462 }
2463 /* FALLTHROUGH */
2464 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2465 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2466 ipstats_mib_noroutes);
612f09e8
YH
2467 break;
2468 }
3ffe533c 2469 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2470 kfree_skb(skb);
2471 return 0;
2472}
2473
9ce8ade0
TG
2474static int ip6_pkt_discard(struct sk_buff *skb)
2475{
612f09e8 2476 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2477}
2478
aad88724 2479static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
1da177e4 2480{
adf30907 2481 skb->dev = skb_dst(skb)->dev;
612f09e8 2482 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2483}
2484
9ce8ade0
TG
2485static int ip6_pkt_prohibit(struct sk_buff *skb)
2486{
612f09e8 2487 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2488}
2489
aad88724 2490static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
9ce8ade0 2491{
adf30907 2492 skb->dev = skb_dst(skb)->dev;
612f09e8 2493 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2494}
2495
1da177e4
LT
2496/*
2497 * Allocate a dst for local (unicast / anycast) address.
2498 */
2499
2500struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2501 const struct in6_addr *addr,
8f031519 2502 bool anycast)
1da177e4 2503{
c346dca1 2504 struct net *net = dev_net(idev->dev);
a3300ef4 2505 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
ad706862 2506 DST_NOCOUNT);
a3300ef4 2507 if (!rt)
1da177e4
LT
2508 return ERR_PTR(-ENOMEM);
2509
1da177e4
LT
2510 in6_dev_hold(idev);
2511
11d53b49 2512 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2513 rt->dst.input = ip6_input;
2514 rt->dst.output = ip6_output;
1da177e4 2515 rt->rt6i_idev = idev;
1da177e4
LT
2516
2517 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2518 if (anycast)
2519 rt->rt6i_flags |= RTF_ANYCAST;
2520 else
1da177e4 2521 rt->rt6i_flags |= RTF_LOCAL;
1da177e4 2522
550bab42 2523 rt->rt6i_gateway = *addr;
4e3fd7a0 2524 rt->rt6i_dst.addr = *addr;
1da177e4 2525 rt->rt6i_dst.plen = 128;
5578689a 2526 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
8e3d5be7 2527 rt->dst.flags |= DST_NOCACHE;
1da177e4 2528
d8d1f30b 2529 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2530
2531 return rt;
2532}
2533
c3968a85
DW
2534int ip6_route_get_saddr(struct net *net,
2535 struct rt6_info *rt,
b71d1d42 2536 const struct in6_addr *daddr,
c3968a85
DW
2537 unsigned int prefs,
2538 struct in6_addr *saddr)
2539{
e16e888b
MS
2540 struct inet6_dev *idev =
2541 rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
c3968a85 2542 int err = 0;
e16e888b 2543 if (rt && rt->rt6i_prefsrc.plen)
4e3fd7a0 2544 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2545 else
2546 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2547 daddr, prefs, saddr);
2548 return err;
2549}
2550
2551/* remove deleted ip from prefsrc entries */
2552struct arg_dev_net_ip {
2553 struct net_device *dev;
2554 struct net *net;
2555 struct in6_addr *addr;
2556};
2557
2558static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2559{
2560 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2561 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2562 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2563
d1918542 2564 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2565 rt != net->ipv6.ip6_null_entry &&
2566 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2567 /* remove prefsrc entry */
2568 rt->rt6i_prefsrc.plen = 0;
2569 }
2570 return 0;
2571}
2572
2573void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2574{
2575 struct net *net = dev_net(ifp->idev->dev);
2576 struct arg_dev_net_ip adni = {
2577 .dev = ifp->idev->dev,
2578 .net = net,
2579 .addr = &ifp->addr,
2580 };
0c3584d5 2581 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
2582}
2583
be7a010d
DJ
2584#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2585#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
2586
2587/* Remove routers and update dst entries when gateway turn into host. */
2588static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2589{
2590 struct in6_addr *gateway = (struct in6_addr *)arg;
2591
2592 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2593 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2594 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2595 return -1;
2596 }
2597 return 0;
2598}
2599
2600void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2601{
2602 fib6_clean_all(net, fib6_clean_tohost, gateway);
2603}
2604
8ed67789
DL
2605struct arg_dev_net {
2606 struct net_device *dev;
2607 struct net *net;
2608};
2609
1da177e4
LT
2610static int fib6_ifdown(struct rt6_info *rt, void *arg)
2611{
bc3ef660 2612 const struct arg_dev_net *adn = arg;
2613 const struct net_device *dev = adn->dev;
8ed67789 2614
d1918542 2615 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2616 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2617 return -1;
c159d30c 2618
1da177e4
LT
2619 return 0;
2620}
2621
f3db4851 2622void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2623{
8ed67789
DL
2624 struct arg_dev_net adn = {
2625 .dev = dev,
2626 .net = net,
2627 };
2628
0c3584d5 2629 fib6_clean_all(net, fib6_ifdown, &adn);
1e493d19 2630 icmp6_clean_all(fib6_ifdown, &adn);
e332bc67
EB
2631 if (dev)
2632 rt6_uncached_list_flush_dev(net, dev);
1da177e4
LT
2633}
2634
95c96174 2635struct rt6_mtu_change_arg {
1da177e4 2636 struct net_device *dev;
95c96174 2637 unsigned int mtu;
1da177e4
LT
2638};
2639
2640static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2641{
2642 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2643 struct inet6_dev *idev;
2644
2645 /* In IPv6 pmtu discovery is not optional,
2646 so that RTAX_MTU lock cannot disable it.
2647 We still use this lock to block changes
2648 caused by addrconf/ndisc.
2649 */
2650
2651 idev = __in6_dev_get(arg->dev);
38308473 2652 if (!idev)
1da177e4
LT
2653 return 0;
2654
2655 /* For administrative MTU increase, there is no way to discover
2656 IPv6 PMTU increase, so PMTU increase should be updated here.
2657 Since RFC 1981 doesn't include administrative MTU increase
2658 update PMTU increase is a MUST. (i.e. jumbo frame)
2659 */
2660 /*
2661 If new MTU is less than route PMTU, this new MTU will be the
2662 lowest MTU in the path, update the route PMTU to reflect PMTU
2663 decreases; if new MTU is greater than route PMTU, and the
2664 old MTU is the lowest MTU in the path, update the route PMTU
2665 to reflect the increase. In this case if the other nodes' MTU
2666 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2667 PMTU discouvery.
2668 */
d1918542 2669 if (rt->dst.dev == arg->dev &&
4b32b5ad
MKL
2670 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2671 if (rt->rt6i_flags & RTF_CACHE) {
2672 /* For RTF_CACHE with rt6i_pmtu == 0
2673 * (i.e. a redirected route),
2674 * the metrics of its rt->dst.from has already
2675 * been updated.
2676 */
2677 if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2678 rt->rt6i_pmtu = arg->mtu;
2679 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2680 (dst_mtu(&rt->dst) < arg->mtu &&
2681 dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2682 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2683 }
566cfd8f 2684 }
1da177e4
LT
2685 return 0;
2686}
2687
95c96174 2688void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 2689{
c71099ac
TG
2690 struct rt6_mtu_change_arg arg = {
2691 .dev = dev,
2692 .mtu = mtu,
2693 };
1da177e4 2694
0c3584d5 2695 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
2696}
2697
ef7c79ed 2698static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2699 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2700 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2701 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2702 [RTA_PRIORITY] = { .type = NLA_U32 },
2703 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 2704 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 2705 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
2706 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
2707 [RTA_ENCAP] = { .type = NLA_NESTED },
86872cb5
TG
2708};
2709
2710static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2711 struct fib6_config *cfg)
1da177e4 2712{
86872cb5
TG
2713 struct rtmsg *rtm;
2714 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 2715 unsigned int pref;
86872cb5 2716 int err;
1da177e4 2717
86872cb5
TG
2718 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2719 if (err < 0)
2720 goto errout;
1da177e4 2721
86872cb5
TG
2722 err = -EINVAL;
2723 rtm = nlmsg_data(nlh);
2724 memset(cfg, 0, sizeof(*cfg));
2725
2726 cfg->fc_table = rtm->rtm_table;
2727 cfg->fc_dst_len = rtm->rtm_dst_len;
2728 cfg->fc_src_len = rtm->rtm_src_len;
2729 cfg->fc_flags = RTF_UP;
2730 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 2731 cfg->fc_type = rtm->rtm_type;
86872cb5 2732
ef2c7d7b
ND
2733 if (rtm->rtm_type == RTN_UNREACHABLE ||
2734 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
2735 rtm->rtm_type == RTN_PROHIBIT ||
2736 rtm->rtm_type == RTN_THROW)
86872cb5
TG
2737 cfg->fc_flags |= RTF_REJECT;
2738
ab79ad14
2739 if (rtm->rtm_type == RTN_LOCAL)
2740 cfg->fc_flags |= RTF_LOCAL;
2741
1f56a01f
MKL
2742 if (rtm->rtm_flags & RTM_F_CLONED)
2743 cfg->fc_flags |= RTF_CACHE;
2744
15e47304 2745 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 2746 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2747 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2748
2749 if (tb[RTA_GATEWAY]) {
67b61f6c 2750 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 2751 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2752 }
86872cb5
TG
2753
2754 if (tb[RTA_DST]) {
2755 int plen = (rtm->rtm_dst_len + 7) >> 3;
2756
2757 if (nla_len(tb[RTA_DST]) < plen)
2758 goto errout;
2759
2760 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2761 }
86872cb5
TG
2762
2763 if (tb[RTA_SRC]) {
2764 int plen = (rtm->rtm_src_len + 7) >> 3;
2765
2766 if (nla_len(tb[RTA_SRC]) < plen)
2767 goto errout;
2768
2769 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2770 }
86872cb5 2771
c3968a85 2772 if (tb[RTA_PREFSRC])
67b61f6c 2773 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 2774
86872cb5
TG
2775 if (tb[RTA_OIF])
2776 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2777
2778 if (tb[RTA_PRIORITY])
2779 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2780
2781 if (tb[RTA_METRICS]) {
2782 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2783 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2784 }
86872cb5
TG
2785
2786 if (tb[RTA_TABLE])
2787 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2788
51ebd318
ND
2789 if (tb[RTA_MULTIPATH]) {
2790 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2791 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2792 }
2793
c78ba6d6
LR
2794 if (tb[RTA_PREF]) {
2795 pref = nla_get_u8(tb[RTA_PREF]);
2796 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2797 pref != ICMPV6_ROUTER_PREF_HIGH)
2798 pref = ICMPV6_ROUTER_PREF_MEDIUM;
2799 cfg->fc_flags |= RTF_PREF(pref);
2800 }
2801
19e42e45
RP
2802 if (tb[RTA_ENCAP])
2803 cfg->fc_encap = tb[RTA_ENCAP];
2804
2805 if (tb[RTA_ENCAP_TYPE])
2806 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2807
86872cb5
TG
2808 err = 0;
2809errout:
2810 return err;
1da177e4
LT
2811}
2812
6b9ea5a6
RP
2813struct rt6_nh {
2814 struct rt6_info *rt6_info;
2815 struct fib6_config r_cfg;
2816 struct mx6_config mxc;
2817 struct list_head next;
2818};
2819
2820static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2821{
2822 struct rt6_nh *nh;
2823
2824 list_for_each_entry(nh, rt6_nh_list, next) {
2825 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2826 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2827 nh->r_cfg.fc_ifindex);
2828 }
2829}
2830
2831static int ip6_route_info_append(struct list_head *rt6_nh_list,
2832 struct rt6_info *rt, struct fib6_config *r_cfg)
2833{
2834 struct rt6_nh *nh;
2835 struct rt6_info *rtnh;
2836 int err = -EEXIST;
2837
2838 list_for_each_entry(nh, rt6_nh_list, next) {
2839 /* check if rt6_info already exists */
2840 rtnh = nh->rt6_info;
2841
2842 if (rtnh->dst.dev == rt->dst.dev &&
2843 rtnh->rt6i_idev == rt->rt6i_idev &&
2844 ipv6_addr_equal(&rtnh->rt6i_gateway,
2845 &rt->rt6i_gateway))
2846 return err;
2847 }
2848
2849 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2850 if (!nh)
2851 return -ENOMEM;
2852 nh->rt6_info = rt;
2853 err = ip6_convert_metrics(&nh->mxc, r_cfg);
2854 if (err) {
2855 kfree(nh);
2856 return err;
2857 }
2858 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2859 list_add_tail(&nh->next, rt6_nh_list);
2860
2861 return 0;
2862}
2863
2864static int ip6_route_multipath_add(struct fib6_config *cfg)
51ebd318
ND
2865{
2866 struct fib6_config r_cfg;
2867 struct rtnexthop *rtnh;
6b9ea5a6
RP
2868 struct rt6_info *rt;
2869 struct rt6_nh *err_nh;
2870 struct rt6_nh *nh, *nh_safe;
51ebd318
ND
2871 int remaining;
2872 int attrlen;
6b9ea5a6
RP
2873 int err = 1;
2874 int nhn = 0;
2875 int replace = (cfg->fc_nlinfo.nlh &&
2876 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2877 LIST_HEAD(rt6_nh_list);
51ebd318 2878
35f1b4e9 2879 remaining = cfg->fc_mp_len;
51ebd318 2880 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 2881
6b9ea5a6
RP
2882 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
2883 * rt6_info structs per nexthop
2884 */
51ebd318
ND
2885 while (rtnh_ok(rtnh, remaining)) {
2886 memcpy(&r_cfg, cfg, sizeof(*cfg));
2887 if (rtnh->rtnh_ifindex)
2888 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2889
2890 attrlen = rtnh_attrlen(rtnh);
2891 if (attrlen > 0) {
2892 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2893
2894 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2895 if (nla) {
67b61f6c 2896 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
2897 r_cfg.fc_flags |= RTF_GATEWAY;
2898 }
19e42e45
RP
2899 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2900 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
2901 if (nla)
2902 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 2903 }
6b9ea5a6
RP
2904
2905 err = ip6_route_info_create(&r_cfg, &rt);
2906 if (err)
2907 goto cleanup;
2908
2909 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
51ebd318 2910 if (err) {
6b9ea5a6
RP
2911 dst_free(&rt->dst);
2912 goto cleanup;
2913 }
2914
2915 rtnh = rtnh_next(rtnh, &remaining);
2916 }
2917
2918 err_nh = NULL;
2919 list_for_each_entry(nh, &rt6_nh_list, next) {
2920 err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
2921 /* nh->rt6_info is used or freed at this point, reset to NULL*/
2922 nh->rt6_info = NULL;
2923 if (err) {
2924 if (replace && nhn)
2925 ip6_print_replace_route_err(&rt6_nh_list);
2926 err_nh = nh;
2927 goto add_errout;
51ebd318 2928 }
6b9ea5a6 2929
1a72418b 2930 /* Because each route is added like a single route we remove
27596472
MK
2931 * these flags after the first nexthop: if there is a collision,
2932 * we have already failed to add the first nexthop:
2933 * fib6_add_rt2node() has rejected it; when replacing, old
2934 * nexthops have been replaced by first new, the rest should
2935 * be added to it.
1a72418b 2936 */
27596472
MK
2937 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2938 NLM_F_REPLACE);
6b9ea5a6
RP
2939 nhn++;
2940 }
2941
2942 goto cleanup;
2943
2944add_errout:
2945 /* Delete routes that were already added */
2946 list_for_each_entry(nh, &rt6_nh_list, next) {
2947 if (err_nh == nh)
2948 break;
2949 ip6_route_del(&nh->r_cfg);
2950 }
2951
2952cleanup:
2953 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
2954 if (nh->rt6_info)
2955 dst_free(&nh->rt6_info->dst);
52fe51f8 2956 kfree(nh->mxc.mx);
6b9ea5a6
RP
2957 list_del(&nh->next);
2958 kfree(nh);
2959 }
2960
2961 return err;
2962}
2963
2964static int ip6_route_multipath_del(struct fib6_config *cfg)
2965{
2966 struct fib6_config r_cfg;
2967 struct rtnexthop *rtnh;
2968 int remaining;
2969 int attrlen;
2970 int err = 1, last_err = 0;
2971
2972 remaining = cfg->fc_mp_len;
2973 rtnh = (struct rtnexthop *)cfg->fc_mp;
2974
2975 /* Parse a Multipath Entry */
2976 while (rtnh_ok(rtnh, remaining)) {
2977 memcpy(&r_cfg, cfg, sizeof(*cfg));
2978 if (rtnh->rtnh_ifindex)
2979 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2980
2981 attrlen = rtnh_attrlen(rtnh);
2982 if (attrlen > 0) {
2983 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2984
2985 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2986 if (nla) {
2987 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2988 r_cfg.fc_flags |= RTF_GATEWAY;
2989 }
2990 }
2991 err = ip6_route_del(&r_cfg);
2992 if (err)
2993 last_err = err;
2994
51ebd318
ND
2995 rtnh = rtnh_next(rtnh, &remaining);
2996 }
2997
2998 return last_err;
2999}
3000
67ba4152 3001static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
1da177e4 3002{
86872cb5
TG
3003 struct fib6_config cfg;
3004 int err;
1da177e4 3005
86872cb5
TG
3006 err = rtm_to_fib6_config(skb, nlh, &cfg);
3007 if (err < 0)
3008 return err;
3009
51ebd318 3010 if (cfg.fc_mp)
6b9ea5a6 3011 return ip6_route_multipath_del(&cfg);
51ebd318
ND
3012 else
3013 return ip6_route_del(&cfg);
1da177e4
LT
3014}
3015
67ba4152 3016static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
1da177e4 3017{
86872cb5
TG
3018 struct fib6_config cfg;
3019 int err;
1da177e4 3020
86872cb5
TG
3021 err = rtm_to_fib6_config(skb, nlh, &cfg);
3022 if (err < 0)
3023 return err;
3024
51ebd318 3025 if (cfg.fc_mp)
6b9ea5a6 3026 return ip6_route_multipath_add(&cfg);
51ebd318
ND
3027 else
3028 return ip6_route_add(&cfg);
1da177e4
LT
3029}
3030
19e42e45 3031static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
339bf98f
TG
3032{
3033 return NLMSG_ALIGN(sizeof(struct rtmsg))
3034 + nla_total_size(16) /* RTA_SRC */
3035 + nla_total_size(16) /* RTA_DST */
3036 + nla_total_size(16) /* RTA_GATEWAY */
3037 + nla_total_size(16) /* RTA_PREFSRC */
3038 + nla_total_size(4) /* RTA_TABLE */
3039 + nla_total_size(4) /* RTA_IIF */
3040 + nla_total_size(4) /* RTA_OIF */
3041 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 3042 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 3043 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 3044 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 3045 + nla_total_size(1) /* RTA_PREF */
61adedf3 3046 + lwtunnel_get_encap_size(rt->dst.lwtstate);
339bf98f
TG
3047}
3048
191cd582
BH
3049static int rt6_fill_node(struct net *net,
3050 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80 3051 struct in6_addr *dst, struct in6_addr *src,
15e47304 3052 int iif, int type, u32 portid, u32 seq,
7bc570c8 3053 int prefix, int nowait, unsigned int flags)
1da177e4 3054{
4b32b5ad 3055 u32 metrics[RTAX_MAX];
1da177e4 3056 struct rtmsg *rtm;
2d7202bf 3057 struct nlmsghdr *nlh;
e3703b3d 3058 long expires;
9e762a4a 3059 u32 table;
1da177e4
LT
3060
3061 if (prefix) { /* user wants prefix routes only */
3062 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3063 /* success since this is not a prefix route */
3064 return 1;
3065 }
3066 }
3067
15e47304 3068 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 3069 if (!nlh)
26932566 3070 return -EMSGSIZE;
2d7202bf
TG
3071
3072 rtm = nlmsg_data(nlh);
1da177e4
LT
3073 rtm->rtm_family = AF_INET6;
3074 rtm->rtm_dst_len = rt->rt6i_dst.plen;
3075 rtm->rtm_src_len = rt->rt6i_src.plen;
3076 rtm->rtm_tos = 0;
c71099ac 3077 if (rt->rt6i_table)
9e762a4a 3078 table = rt->rt6i_table->tb6_id;
c71099ac 3079 else
9e762a4a
PM
3080 table = RT6_TABLE_UNSPEC;
3081 rtm->rtm_table = table;
c78679e8
DM
3082 if (nla_put_u32(skb, RTA_TABLE, table))
3083 goto nla_put_failure;
ef2c7d7b
ND
3084 if (rt->rt6i_flags & RTF_REJECT) {
3085 switch (rt->dst.error) {
3086 case -EINVAL:
3087 rtm->rtm_type = RTN_BLACKHOLE;
3088 break;
3089 case -EACCES:
3090 rtm->rtm_type = RTN_PROHIBIT;
3091 break;
b4949ab2
ND
3092 case -EAGAIN:
3093 rtm->rtm_type = RTN_THROW;
3094 break;
ef2c7d7b
ND
3095 default:
3096 rtm->rtm_type = RTN_UNREACHABLE;
3097 break;
3098 }
3099 }
38308473 3100 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 3101 rtm->rtm_type = RTN_LOCAL;
d1918542 3102 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
3103 rtm->rtm_type = RTN_LOCAL;
3104 else
3105 rtm->rtm_type = RTN_UNICAST;
3106 rtm->rtm_flags = 0;
35103d11 3107 if (!netif_carrier_ok(rt->dst.dev)) {
cea45e20 3108 rtm->rtm_flags |= RTNH_F_LINKDOWN;
35103d11
AG
3109 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3110 rtm->rtm_flags |= RTNH_F_DEAD;
3111 }
1da177e4
LT
3112 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3113 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 3114 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4 3115 rtm->rtm_protocol = RTPROT_REDIRECT;
f0396f60
DO
3116 else if (rt->rt6i_flags & RTF_ADDRCONF) {
3117 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3118 rtm->rtm_protocol = RTPROT_RA;
3119 else
3120 rtm->rtm_protocol = RTPROT_KERNEL;
3121 }
1da177e4 3122
38308473 3123 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
3124 rtm->rtm_flags |= RTM_F_CLONED;
3125
3126 if (dst) {
930345ea 3127 if (nla_put_in6_addr(skb, RTA_DST, dst))
c78679e8 3128 goto nla_put_failure;
1ab1457c 3129 rtm->rtm_dst_len = 128;
1da177e4 3130 } else if (rtm->rtm_dst_len)
930345ea 3131 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
c78679e8 3132 goto nla_put_failure;
1da177e4
LT
3133#ifdef CONFIG_IPV6_SUBTREES
3134 if (src) {
930345ea 3135 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 3136 goto nla_put_failure;
1ab1457c 3137 rtm->rtm_src_len = 128;
c78679e8 3138 } else if (rtm->rtm_src_len &&
930345ea 3139 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
c78679e8 3140 goto nla_put_failure;
1da177e4 3141#endif
7bc570c8
YH
3142 if (iif) {
3143#ifdef CONFIG_IPV6_MROUTE
3144 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 3145 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
3146 if (err <= 0) {
3147 if (!nowait) {
3148 if (err == 0)
3149 return 0;
3150 goto nla_put_failure;
3151 } else {
3152 if (err == -EMSGSIZE)
3153 goto nla_put_failure;
3154 }
3155 }
3156 } else
3157#endif
c78679e8
DM
3158 if (nla_put_u32(skb, RTA_IIF, iif))
3159 goto nla_put_failure;
7bc570c8 3160 } else if (dst) {
1da177e4 3161 struct in6_addr saddr_buf;
c78679e8 3162 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
930345ea 3163 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 3164 goto nla_put_failure;
1da177e4 3165 }
2d7202bf 3166
c3968a85
DW
3167 if (rt->rt6i_prefsrc.plen) {
3168 struct in6_addr saddr_buf;
4e3fd7a0 3169 saddr_buf = rt->rt6i_prefsrc.addr;
930345ea 3170 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 3171 goto nla_put_failure;
c3968a85
DW
3172 }
3173
4b32b5ad
MKL
3174 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3175 if (rt->rt6i_pmtu)
3176 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3177 if (rtnetlink_put_metrics(skb, metrics) < 0)
2d7202bf
TG
3178 goto nla_put_failure;
3179
dd0cbf29 3180 if (rt->rt6i_flags & RTF_GATEWAY) {
930345ea 3181 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
94f826b8 3182 goto nla_put_failure;
94f826b8 3183 }
2d7202bf 3184
c78679e8
DM
3185 if (rt->dst.dev &&
3186 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3187 goto nla_put_failure;
3188 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3189 goto nla_put_failure;
8253947e
LW
3190
3191 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
69cdf8f9 3192
87a50699 3193 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 3194 goto nla_put_failure;
2d7202bf 3195
c78ba6d6
LR
3196 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3197 goto nla_put_failure;
3198
61adedf3 3199 lwtunnel_fill_encap(skb, rt->dst.lwtstate);
19e42e45 3200
053c095a
JB
3201 nlmsg_end(skb, nlh);
3202 return 0;
2d7202bf
TG
3203
3204nla_put_failure:
26932566
PM
3205 nlmsg_cancel(skb, nlh);
3206 return -EMSGSIZE;
1da177e4
LT
3207}
3208
1b43af54 3209int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
3210{
3211 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3212 int prefix;
3213
2d7202bf
TG
3214 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3215 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
3216 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3217 } else
3218 prefix = 0;
3219
191cd582
BH
3220 return rt6_fill_node(arg->net,
3221 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
15e47304 3222 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
7bc570c8 3223 prefix, 0, NLM_F_MULTI);
1da177e4
LT
3224}
3225
67ba4152 3226static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
1da177e4 3227{
3b1e0a65 3228 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
3229 struct nlattr *tb[RTA_MAX+1];
3230 struct rt6_info *rt;
1da177e4 3231 struct sk_buff *skb;
ab364a6f 3232 struct rtmsg *rtm;
4c9483b2 3233 struct flowi6 fl6;
72331bc0 3234 int err, iif = 0, oif = 0;
1da177e4 3235
ab364a6f
TG
3236 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3237 if (err < 0)
3238 goto errout;
1da177e4 3239
ab364a6f 3240 err = -EINVAL;
4c9483b2 3241 memset(&fl6, 0, sizeof(fl6));
1da177e4 3242
ab364a6f
TG
3243 if (tb[RTA_SRC]) {
3244 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3245 goto errout;
3246
4e3fd7a0 3247 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
3248 }
3249
3250 if (tb[RTA_DST]) {
3251 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3252 goto errout;
3253
4e3fd7a0 3254 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
3255 }
3256
3257 if (tb[RTA_IIF])
3258 iif = nla_get_u32(tb[RTA_IIF]);
3259
3260 if (tb[RTA_OIF])
72331bc0 3261 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 3262
2e47b291
LC
3263 if (tb[RTA_MARK])
3264 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3265
1da177e4
LT
3266 if (iif) {
3267 struct net_device *dev;
72331bc0
SL
3268 int flags = 0;
3269
5578689a 3270 dev = __dev_get_by_index(net, iif);
1da177e4
LT
3271 if (!dev) {
3272 err = -ENODEV;
ab364a6f 3273 goto errout;
1da177e4 3274 }
72331bc0
SL
3275
3276 fl6.flowi6_iif = iif;
3277
3278 if (!ipv6_addr_any(&fl6.saddr))
3279 flags |= RT6_LOOKUP_F_HAS_SADDR;
3280
3281 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3282 flags);
3283 } else {
3284 fl6.flowi6_oif = oif;
3285
3286 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
1da177e4
LT
3287 }
3288
ab364a6f 3289 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 3290 if (!skb) {
94e187c0 3291 ip6_rt_put(rt);
ab364a6f
TG
3292 err = -ENOBUFS;
3293 goto errout;
3294 }
1da177e4 3295
ab364a6f
TG
3296 /* Reserve room for dummy headers, this skb can pass
3297 through good chunk of routing engine.
3298 */
459a98ed 3299 skb_reset_mac_header(skb);
ab364a6f 3300 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 3301
d8d1f30b 3302 skb_dst_set(skb, &rt->dst);
1da177e4 3303
4c9483b2 3304 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
15e47304 3305 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
7bc570c8 3306 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 3307 if (err < 0) {
ab364a6f
TG
3308 kfree_skb(skb);
3309 goto errout;
1da177e4
LT
3310 }
3311
15e47304 3312 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 3313errout:
1da177e4 3314 return err;
1da177e4
LT
3315}
3316
37a1d361
RP
3317void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3318 unsigned int nlm_flags)
1da177e4
LT
3319{
3320 struct sk_buff *skb;
5578689a 3321 struct net *net = info->nl_net;
528c4ceb
DL
3322 u32 seq;
3323 int err;
3324
3325 err = -ENOBUFS;
38308473 3326 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 3327
19e42e45 3328 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 3329 if (!skb)
21713ebc
TG
3330 goto errout;
3331
191cd582 3332 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
37a1d361 3333 event, info->portid, seq, 0, 0, nlm_flags);
26932566
PM
3334 if (err < 0) {
3335 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3336 WARN_ON(err == -EMSGSIZE);
3337 kfree_skb(skb);
3338 goto errout;
3339 }
15e47304 3340 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
3341 info->nlh, gfp_any());
3342 return;
21713ebc
TG
3343errout:
3344 if (err < 0)
5578689a 3345 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
3346}
3347
8ed67789 3348static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 3349 unsigned long event, void *ptr)
8ed67789 3350{
351638e7 3351 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 3352 struct net *net = dev_net(dev);
8ed67789
DL
3353
3354 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 3355 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
3356 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3357#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3358 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 3359 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 3360 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
3361 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3362#endif
3363 }
3364
3365 return NOTIFY_OK;
3366}
3367
1da177e4
LT
3368/*
3369 * /proc
3370 */
3371
3372#ifdef CONFIG_PROC_FS
3373
33120b30
AD
3374static const struct file_operations ipv6_route_proc_fops = {
3375 .owner = THIS_MODULE,
3376 .open = ipv6_route_open,
3377 .read = seq_read,
3378 .llseek = seq_lseek,
8d2ca1d7 3379 .release = seq_release_net,
33120b30
AD
3380};
3381
1da177e4
LT
3382static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3383{
69ddb805 3384 struct net *net = (struct net *)seq->private;
1da177e4 3385 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
3386 net->ipv6.rt6_stats->fib_nodes,
3387 net->ipv6.rt6_stats->fib_route_nodes,
3388 net->ipv6.rt6_stats->fib_rt_alloc,
3389 net->ipv6.rt6_stats->fib_rt_entries,
3390 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 3391 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 3392 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
3393
3394 return 0;
3395}
3396
3397static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3398{
de05c557 3399 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
3400}
3401
9a32144e 3402static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
3403 .owner = THIS_MODULE,
3404 .open = rt6_stats_seq_open,
3405 .read = seq_read,
3406 .llseek = seq_lseek,
b6fcbdb4 3407 .release = single_release_net,
1da177e4
LT
3408};
3409#endif /* CONFIG_PROC_FS */
3410
3411#ifdef CONFIG_SYSCTL
3412
1da177e4 3413static
fe2c6338 3414int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
3415 void __user *buffer, size_t *lenp, loff_t *ppos)
3416{
c486da34
LAG
3417 struct net *net;
3418 int delay;
3419 if (!write)
1da177e4 3420 return -EINVAL;
c486da34
LAG
3421
3422 net = (struct net *)ctl->extra1;
3423 delay = net->ipv6.sysctl.flush_delay;
3424 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 3425 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 3426 return 0;
1da177e4
LT
3427}
3428
fe2c6338 3429struct ctl_table ipv6_route_table_template[] = {
1ab1457c 3430 {
1da177e4 3431 .procname = "flush",
4990509f 3432 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 3433 .maxlen = sizeof(int),
89c8b3a1 3434 .mode = 0200,
6d9f239a 3435 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
3436 },
3437 {
1da177e4 3438 .procname = "gc_thresh",
9a7ec3a9 3439 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
3440 .maxlen = sizeof(int),
3441 .mode = 0644,
6d9f239a 3442 .proc_handler = proc_dointvec,
1da177e4
LT
3443 },
3444 {
1da177e4 3445 .procname = "max_size",
4990509f 3446 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
3447 .maxlen = sizeof(int),
3448 .mode = 0644,
6d9f239a 3449 .proc_handler = proc_dointvec,
1da177e4
LT
3450 },
3451 {
1da177e4 3452 .procname = "gc_min_interval",
4990509f 3453 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
3454 .maxlen = sizeof(int),
3455 .mode = 0644,
6d9f239a 3456 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3457 },
3458 {
1da177e4 3459 .procname = "gc_timeout",
4990509f 3460 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
3461 .maxlen = sizeof(int),
3462 .mode = 0644,
6d9f239a 3463 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3464 },
3465 {
1da177e4 3466 .procname = "gc_interval",
4990509f 3467 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
3468 .maxlen = sizeof(int),
3469 .mode = 0644,
6d9f239a 3470 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3471 },
3472 {
1da177e4 3473 .procname = "gc_elasticity",
4990509f 3474 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
3475 .maxlen = sizeof(int),
3476 .mode = 0644,
f3d3f616 3477 .proc_handler = proc_dointvec,
1da177e4
LT
3478 },
3479 {
1da177e4 3480 .procname = "mtu_expires",
4990509f 3481 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
3482 .maxlen = sizeof(int),
3483 .mode = 0644,
6d9f239a 3484 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3485 },
3486 {
1da177e4 3487 .procname = "min_adv_mss",
4990509f 3488 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
3489 .maxlen = sizeof(int),
3490 .mode = 0644,
f3d3f616 3491 .proc_handler = proc_dointvec,
1da177e4
LT
3492 },
3493 {
1da177e4 3494 .procname = "gc_min_interval_ms",
4990509f 3495 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
3496 .maxlen = sizeof(int),
3497 .mode = 0644,
6d9f239a 3498 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 3499 },
f8572d8f 3500 { }
1da177e4
LT
3501};
3502
2c8c1e72 3503struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
3504{
3505 struct ctl_table *table;
3506
3507 table = kmemdup(ipv6_route_table_template,
3508 sizeof(ipv6_route_table_template),
3509 GFP_KERNEL);
5ee09105
YH
3510
3511 if (table) {
3512 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 3513 table[0].extra1 = net;
86393e52 3514 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
3515 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3516 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3517 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3518 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3519 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3520 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3521 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 3522 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
3523
3524 /* Don't export sysctls to unprivileged users */
3525 if (net->user_ns != &init_user_ns)
3526 table[0].procname = NULL;
5ee09105
YH
3527 }
3528
760f2d01
DL
3529 return table;
3530}
1da177e4
LT
3531#endif
3532
2c8c1e72 3533static int __net_init ip6_route_net_init(struct net *net)
cdb18761 3534{
633d424b 3535 int ret = -ENOMEM;
8ed67789 3536
86393e52
AD
3537 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3538 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 3539
fc66f95c
ED
3540 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3541 goto out_ip6_dst_ops;
3542
8ed67789
DL
3543 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3544 sizeof(*net->ipv6.ip6_null_entry),
3545 GFP_KERNEL);
3546 if (!net->ipv6.ip6_null_entry)
fc66f95c 3547 goto out_ip6_dst_entries;
d8d1f30b 3548 net->ipv6.ip6_null_entry->dst.path =
8ed67789 3549 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 3550 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3551 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3552 ip6_template_metrics, true);
8ed67789
DL
3553
3554#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3555 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3556 sizeof(*net->ipv6.ip6_prohibit_entry),
3557 GFP_KERNEL);
68fffc67
PZ
3558 if (!net->ipv6.ip6_prohibit_entry)
3559 goto out_ip6_null_entry;
d8d1f30b 3560 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 3561 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 3562 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3563 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3564 ip6_template_metrics, true);
8ed67789
DL
3565
3566 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3567 sizeof(*net->ipv6.ip6_blk_hole_entry),
3568 GFP_KERNEL);
68fffc67
PZ
3569 if (!net->ipv6.ip6_blk_hole_entry)
3570 goto out_ip6_prohibit_entry;
d8d1f30b 3571 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 3572 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 3573 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3574 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3575 ip6_template_metrics, true);
8ed67789
DL
3576#endif
3577
b339a47c
PZ
3578 net->ipv6.sysctl.flush_delay = 0;
3579 net->ipv6.sysctl.ip6_rt_max_size = 4096;
3580 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3581 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3582 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3583 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3584 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3585 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3586
6891a346
BT
3587 net->ipv6.ip6_rt_gc_expire = 30*HZ;
3588
8ed67789
DL
3589 ret = 0;
3590out:
3591 return ret;
f2fc6a54 3592
68fffc67
PZ
3593#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3594out_ip6_prohibit_entry:
3595 kfree(net->ipv6.ip6_prohibit_entry);
3596out_ip6_null_entry:
3597 kfree(net->ipv6.ip6_null_entry);
3598#endif
fc66f95c
ED
3599out_ip6_dst_entries:
3600 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 3601out_ip6_dst_ops:
f2fc6a54 3602 goto out;
cdb18761
DL
3603}
3604
2c8c1e72 3605static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 3606{
8ed67789
DL
3607 kfree(net->ipv6.ip6_null_entry);
3608#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3609 kfree(net->ipv6.ip6_prohibit_entry);
3610 kfree(net->ipv6.ip6_blk_hole_entry);
3611#endif
41bb78b4 3612 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
3613}
3614
d189634e
TG
3615static int __net_init ip6_route_net_init_late(struct net *net)
3616{
3617#ifdef CONFIG_PROC_FS
d4beaa66
G
3618 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3619 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
d189634e
TG
3620#endif
3621 return 0;
3622}
3623
3624static void __net_exit ip6_route_net_exit_late(struct net *net)
3625{
3626#ifdef CONFIG_PROC_FS
ece31ffd
G
3627 remove_proc_entry("ipv6_route", net->proc_net);
3628 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
3629#endif
3630}
3631
cdb18761
DL
3632static struct pernet_operations ip6_route_net_ops = {
3633 .init = ip6_route_net_init,
3634 .exit = ip6_route_net_exit,
3635};
3636
c3426b47
DM
3637static int __net_init ipv6_inetpeer_init(struct net *net)
3638{
3639 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3640
3641 if (!bp)
3642 return -ENOMEM;
3643 inet_peer_base_init(bp);
3644 net->ipv6.peers = bp;
3645 return 0;
3646}
3647
3648static void __net_exit ipv6_inetpeer_exit(struct net *net)
3649{
3650 struct inet_peer_base *bp = net->ipv6.peers;
3651
3652 net->ipv6.peers = NULL;
56a6b248 3653 inetpeer_invalidate_tree(bp);
c3426b47
DM
3654 kfree(bp);
3655}
3656
2b823f72 3657static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
3658 .init = ipv6_inetpeer_init,
3659 .exit = ipv6_inetpeer_exit,
3660};
3661
d189634e
TG
3662static struct pernet_operations ip6_route_net_late_ops = {
3663 .init = ip6_route_net_init_late,
3664 .exit = ip6_route_net_exit_late,
3665};
3666
8ed67789
DL
3667static struct notifier_block ip6_route_dev_notifier = {
3668 .notifier_call = ip6_route_dev_notify,
3669 .priority = 0,
3670};
3671
433d49c3 3672int __init ip6_route_init(void)
1da177e4 3673{
433d49c3 3674 int ret;
8d0b94af 3675 int cpu;
433d49c3 3676
9a7ec3a9
DL
3677 ret = -ENOMEM;
3678 ip6_dst_ops_template.kmem_cachep =
e5d679f3 3679 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 3680 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 3681 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 3682 goto out;
14e50e57 3683
fc66f95c 3684 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 3685 if (ret)
bdb3289f 3686 goto out_kmem_cache;
bdb3289f 3687
c3426b47
DM
3688 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3689 if (ret)
e8803b6c 3690 goto out_dst_entries;
2a0c451a 3691
7e52b33b
DM
3692 ret = register_pernet_subsys(&ip6_route_net_ops);
3693 if (ret)
3694 goto out_register_inetpeer;
c3426b47 3695
5dc121e9
AE
3696 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3697
8ed67789
DL
3698 /* Registering of the loopback is done before this portion of code,
3699 * the loopback reference in rt6_info will not be taken, do it
3700 * manually for init_net */
d8d1f30b 3701 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3702 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3703 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3704 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 3705 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 3706 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3707 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3708 #endif
e8803b6c 3709 ret = fib6_init();
433d49c3 3710 if (ret)
8ed67789 3711 goto out_register_subsys;
433d49c3 3712
433d49c3
DL
3713 ret = xfrm6_init();
3714 if (ret)
e8803b6c 3715 goto out_fib6_init;
c35b7e72 3716
433d49c3
DL
3717 ret = fib6_rules_init();
3718 if (ret)
3719 goto xfrm6_init;
7e5449c2 3720
d189634e
TG
3721 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3722 if (ret)
3723 goto fib6_rules_init;
3724
433d49c3 3725 ret = -ENOBUFS;
c7ac8679
GR
3726 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3727 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3728 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
d189634e 3729 goto out_register_late_subsys;
c127ea2c 3730
8ed67789 3731 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 3732 if (ret)
d189634e 3733 goto out_register_late_subsys;
8ed67789 3734
8d0b94af
MKL
3735 for_each_possible_cpu(cpu) {
3736 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3737
3738 INIT_LIST_HEAD(&ul->head);
3739 spin_lock_init(&ul->lock);
3740 }
3741
433d49c3
DL
3742out:
3743 return ret;
3744
d189634e
TG
3745out_register_late_subsys:
3746 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 3747fib6_rules_init:
433d49c3
DL
3748 fib6_rules_cleanup();
3749xfrm6_init:
433d49c3 3750 xfrm6_fini();
2a0c451a
TG
3751out_fib6_init:
3752 fib6_gc_cleanup();
8ed67789
DL
3753out_register_subsys:
3754 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
3755out_register_inetpeer:
3756 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
3757out_dst_entries:
3758 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3759out_kmem_cache:
f2fc6a54 3760 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3761 goto out;
1da177e4
LT
3762}
3763
3764void ip6_route_cleanup(void)
3765{
8ed67789 3766 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 3767 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 3768 fib6_rules_cleanup();
1da177e4 3769 xfrm6_fini();
1da177e4 3770 fib6_gc_cleanup();
c3426b47 3771 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 3772 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3773 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3774 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3775}
This page took 1.26232 seconds and 5 git commands to generate.