net: fib: move metrics parsing to a helper
[deliverable/linux.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
457c4cbc 47#include <net/net_namespace.h>
1da177e4
LT
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
904af04d 57#include <net/dst_metadata.h>
1da177e4 58#include <net/xfrm.h>
8d71740c 59#include <net/netevent.h>
21713ebc 60#include <net/netlink.h>
51ebd318 61#include <net/nexthop.h>
19e42e45 62#include <net/lwtunnel.h>
904af04d 63#include <net/ip_tunnels.h>
1da177e4
LT
64
65#include <asm/uaccess.h>
66
67#ifdef CONFIG_SYSCTL
68#include <linux/sysctl.h>
69#endif
70
afc154e9 71enum rt6_nud_state {
7e980569
JB
72 RT6_NUD_FAIL_HARD = -3,
73 RT6_NUD_FAIL_PROBE = -2,
74 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
75 RT6_NUD_SUCCEED = 1
76};
77
83a09abd 78static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
1da177e4 79static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 80static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 81static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
82static struct dst_entry *ip6_negative_advice(struct dst_entry *);
83static void ip6_dst_destroy(struct dst_entry *);
84static void ip6_dst_ifdown(struct dst_entry *,
85 struct net_device *dev, int how);
569d3645 86static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
87
88static int ip6_pkt_discard(struct sk_buff *skb);
aad88724 89static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
7150aede 90static int ip6_pkt_prohibit(struct sk_buff *skb);
aad88724 91static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
1da177e4 92static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
93static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
94 struct sk_buff *skb, u32 mtu);
95static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
96 struct sk_buff *skb);
4b32b5ad 97static void rt6_dst_from_metrics_check(struct rt6_info *rt);
52bd4c0c 98static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
1da177e4 99
70ceb4f5 100#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 101static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
102 const struct in6_addr *prefix, int prefixlen,
103 const struct in6_addr *gwaddr, int ifindex,
95c96174 104 unsigned int pref);
efa2cea0 105static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
106 const struct in6_addr *prefix, int prefixlen,
107 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
108#endif
109
8d0b94af
MKL
110struct uncached_list {
111 spinlock_t lock;
112 struct list_head head;
113};
114
115static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
116
117static void rt6_uncached_list_add(struct rt6_info *rt)
118{
119 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
120
121 rt->dst.flags |= DST_NOCACHE;
122 rt->rt6i_uncached_list = ul;
123
124 spin_lock_bh(&ul->lock);
125 list_add_tail(&rt->rt6i_uncached, &ul->head);
126 spin_unlock_bh(&ul->lock);
127}
128
129static void rt6_uncached_list_del(struct rt6_info *rt)
130{
131 if (!list_empty(&rt->rt6i_uncached)) {
132 struct uncached_list *ul = rt->rt6i_uncached_list;
133
134 spin_lock_bh(&ul->lock);
135 list_del(&rt->rt6i_uncached);
136 spin_unlock_bh(&ul->lock);
137 }
138}
139
140static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
141{
142 struct net_device *loopback_dev = net->loopback_dev;
143 int cpu;
144
145 for_each_possible_cpu(cpu) {
146 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
147 struct rt6_info *rt;
148
149 spin_lock_bh(&ul->lock);
150 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
151 struct inet6_dev *rt_idev = rt->rt6i_idev;
152 struct net_device *rt_dev = rt->dst.dev;
153
154 if (rt_idev && (rt_idev->dev == dev || !dev) &&
155 rt_idev->dev != loopback_dev) {
156 rt->rt6i_idev = in6_dev_get(loopback_dev);
157 in6_dev_put(rt_idev);
158 }
159
160 if (rt_dev && (rt_dev == dev || !dev) &&
161 rt_dev != loopback_dev) {
162 rt->dst.dev = loopback_dev;
163 dev_hold(rt->dst.dev);
164 dev_put(rt_dev);
165 }
166 }
167 spin_unlock_bh(&ul->lock);
168 }
169}
170
d52d3997
MKL
171static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
172{
173 return dst_metrics_write_ptr(rt->dst.from);
174}
175
06582540
DM
176static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
177{
4b32b5ad 178 struct rt6_info *rt = (struct rt6_info *)dst;
06582540 179
d52d3997
MKL
180 if (rt->rt6i_flags & RTF_PCPU)
181 return rt6_pcpu_cow_metrics(rt);
182 else if (rt->rt6i_flags & RTF_CACHE)
4b32b5ad
MKL
183 return NULL;
184 else
3b471175 185 return dst_cow_metrics_generic(dst, old);
06582540
DM
186}
187
f894cbf8
DM
188static inline const void *choose_neigh_daddr(struct rt6_info *rt,
189 struct sk_buff *skb,
190 const void *daddr)
39232973
DM
191{
192 struct in6_addr *p = &rt->rt6i_gateway;
193
a7563f34 194 if (!ipv6_addr_any(p))
39232973 195 return (const void *) p;
f894cbf8
DM
196 else if (skb)
197 return &ipv6_hdr(skb)->daddr;
39232973
DM
198 return daddr;
199}
200
f894cbf8
DM
201static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
202 struct sk_buff *skb,
203 const void *daddr)
d3aaeb38 204{
39232973
DM
205 struct rt6_info *rt = (struct rt6_info *) dst;
206 struct neighbour *n;
207
f894cbf8 208 daddr = choose_neigh_daddr(rt, skb, daddr);
8e022ee6 209 n = __ipv6_neigh_lookup(dst->dev, daddr);
f83c7790
DM
210 if (n)
211 return n;
212 return neigh_create(&nd_tbl, daddr, dst->dev);
213}
214
9a7ec3a9 215static struct dst_ops ip6_dst_ops_template = {
1da177e4 216 .family = AF_INET6,
1da177e4
LT
217 .gc = ip6_dst_gc,
218 .gc_thresh = 1024,
219 .check = ip6_dst_check,
0dbaee3b 220 .default_advmss = ip6_default_advmss,
ebb762f2 221 .mtu = ip6_mtu,
06582540 222 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
223 .destroy = ip6_dst_destroy,
224 .ifdown = ip6_dst_ifdown,
225 .negative_advice = ip6_negative_advice,
226 .link_failure = ip6_link_failure,
227 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 228 .redirect = rt6_do_redirect,
1ac06e03 229 .local_out = __ip6_local_out,
d3aaeb38 230 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
231};
232
ebb762f2 233static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 234{
618f9bc7
SK
235 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
236
237 return mtu ? : dst->dev->mtu;
ec831ea7
RD
238}
239
6700c270
DM
240static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
241 struct sk_buff *skb, u32 mtu)
14e50e57
DM
242{
243}
244
6700c270
DM
245static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
246 struct sk_buff *skb)
b587ee3b
DM
247{
248}
249
0972ddb2
HB
250static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
251 unsigned long old)
252{
253 return NULL;
254}
255
14e50e57
DM
256static struct dst_ops ip6_dst_blackhole_ops = {
257 .family = AF_INET6,
14e50e57
DM
258 .destroy = ip6_dst_destroy,
259 .check = ip6_dst_check,
ebb762f2 260 .mtu = ip6_blackhole_mtu,
214f45c9 261 .default_advmss = ip6_default_advmss,
14e50e57 262 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 263 .redirect = ip6_rt_blackhole_redirect,
0972ddb2 264 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 265 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
266};
267
62fa8a84 268static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 269 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
270};
271
fb0af4c7 272static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
273 .dst = {
274 .__refcnt = ATOMIC_INIT(1),
275 .__use = 1,
2c20cbd7 276 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 277 .error = -ENETUNREACH,
d8d1f30b
CG
278 .input = ip6_pkt_discard,
279 .output = ip6_pkt_discard_out,
1da177e4
LT
280 },
281 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 282 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
283 .rt6i_metric = ~(u32) 0,
284 .rt6i_ref = ATOMIC_INIT(1),
285};
286
101367c2
TG
287#ifdef CONFIG_IPV6_MULTIPLE_TABLES
288
fb0af4c7 289static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
290 .dst = {
291 .__refcnt = ATOMIC_INIT(1),
292 .__use = 1,
2c20cbd7 293 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 294 .error = -EACCES,
d8d1f30b
CG
295 .input = ip6_pkt_prohibit,
296 .output = ip6_pkt_prohibit_out,
101367c2
TG
297 },
298 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 299 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
300 .rt6i_metric = ~(u32) 0,
301 .rt6i_ref = ATOMIC_INIT(1),
302};
303
fb0af4c7 304static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
305 .dst = {
306 .__refcnt = ATOMIC_INIT(1),
307 .__use = 1,
2c20cbd7 308 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 309 .error = -EINVAL,
d8d1f30b 310 .input = dst_discard,
aad88724 311 .output = dst_discard_sk,
101367c2
TG
312 },
313 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 314 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
315 .rt6i_metric = ~(u32) 0,
316 .rt6i_ref = ATOMIC_INIT(1),
317};
318
319#endif
320
1da177e4 321/* allocate dst with ip6_dst_ops */
d52d3997
MKL
322static struct rt6_info *__ip6_dst_alloc(struct net *net,
323 struct net_device *dev,
ad706862 324 int flags)
1da177e4 325{
97bab73f 326 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
6f3118b5 327 0, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 328
97bab73f 329 if (rt) {
8104891b
SK
330 struct dst_entry *dst = &rt->dst;
331
332 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
51ebd318 333 INIT_LIST_HEAD(&rt->rt6i_siblings);
8d0b94af 334 INIT_LIST_HEAD(&rt->rt6i_uncached);
97bab73f 335 }
cf911662 336 return rt;
1da177e4
LT
337}
338
d52d3997
MKL
339static struct rt6_info *ip6_dst_alloc(struct net *net,
340 struct net_device *dev,
ad706862 341 int flags)
d52d3997 342{
ad706862 343 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
d52d3997
MKL
344
345 if (rt) {
346 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
347 if (rt->rt6i_pcpu) {
348 int cpu;
349
350 for_each_possible_cpu(cpu) {
351 struct rt6_info **p;
352
353 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
354 /* no one shares rt */
355 *p = NULL;
356 }
357 } else {
358 dst_destroy((struct dst_entry *)rt);
359 return NULL;
360 }
361 }
362
363 return rt;
364}
365
1da177e4
LT
366static void ip6_dst_destroy(struct dst_entry *dst)
367{
368 struct rt6_info *rt = (struct rt6_info *)dst;
ecd98837 369 struct dst_entry *from = dst->from;
8d0b94af 370 struct inet6_dev *idev;
1da177e4 371
4b32b5ad 372 dst_destroy_metrics_generic(dst);
87775312 373 free_percpu(rt->rt6i_pcpu);
8d0b94af
MKL
374 rt6_uncached_list_del(rt);
375
376 idev = rt->rt6i_idev;
38308473 377 if (idev) {
1da177e4
LT
378 rt->rt6i_idev = NULL;
379 in6_dev_put(idev);
1ab1457c 380 }
1716a961 381
ecd98837
YH
382 dst->from = NULL;
383 dst_release(from);
b3419363
DM
384}
385
1da177e4
LT
386static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
387 int how)
388{
389 struct rt6_info *rt = (struct rt6_info *)dst;
390 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 391 struct net_device *loopback_dev =
c346dca1 392 dev_net(dev)->loopback_dev;
1da177e4 393
97cac082
DM
394 if (dev != loopback_dev) {
395 if (idev && idev->dev == dev) {
396 struct inet6_dev *loopback_idev =
397 in6_dev_get(loopback_dev);
398 if (loopback_idev) {
399 rt->rt6i_idev = loopback_idev;
400 in6_dev_put(idev);
401 }
402 }
1da177e4
LT
403 }
404}
405
a50feda5 406static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 407{
1716a961
G
408 if (rt->rt6i_flags & RTF_EXPIRES) {
409 if (time_after(jiffies, rt->dst.expires))
a50feda5 410 return true;
1716a961 411 } else if (rt->dst.from) {
3fd91fb3 412 return rt6_check_expired((struct rt6_info *) rt->dst.from);
1716a961 413 }
a50feda5 414 return false;
1da177e4
LT
415}
416
51ebd318
ND
417/* Multipath route selection:
418 * Hash based function using packet header and flowlabel.
419 * Adapted from fib_info_hashfn()
420 */
421static int rt6_info_hash_nhsfn(unsigned int candidate_count,
422 const struct flowi6 *fl6)
423{
424 unsigned int val = fl6->flowi6_proto;
425
c08977bb
YH
426 val ^= ipv6_addr_hash(&fl6->daddr);
427 val ^= ipv6_addr_hash(&fl6->saddr);
51ebd318
ND
428
429 /* Work only if this not encapsulated */
430 switch (fl6->flowi6_proto) {
431 case IPPROTO_UDP:
432 case IPPROTO_TCP:
433 case IPPROTO_SCTP:
b3ce5ae1
ND
434 val ^= (__force u16)fl6->fl6_sport;
435 val ^= (__force u16)fl6->fl6_dport;
51ebd318
ND
436 break;
437
438 case IPPROTO_ICMPV6:
b3ce5ae1
ND
439 val ^= (__force u16)fl6->fl6_icmp_type;
440 val ^= (__force u16)fl6->fl6_icmp_code;
51ebd318
ND
441 break;
442 }
443 /* RFC6438 recommands to use flowlabel */
b3ce5ae1 444 val ^= (__force u32)fl6->flowlabel;
51ebd318
ND
445
446 /* Perhaps, we need to tune, this function? */
447 val = val ^ (val >> 7) ^ (val >> 12);
448 return val % candidate_count;
449}
450
451static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
52bd4c0c
ND
452 struct flowi6 *fl6, int oif,
453 int strict)
51ebd318
ND
454{
455 struct rt6_info *sibling, *next_sibling;
456 int route_choosen;
457
458 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
459 /* Don't change the route, if route_choosen == 0
460 * (siblings does not include ourself)
461 */
462 if (route_choosen)
463 list_for_each_entry_safe(sibling, next_sibling,
464 &match->rt6i_siblings, rt6i_siblings) {
465 route_choosen--;
466 if (route_choosen == 0) {
52bd4c0c
ND
467 if (rt6_score_route(sibling, oif, strict) < 0)
468 break;
51ebd318
ND
469 match = sibling;
470 break;
471 }
472 }
473 return match;
474}
475
1da177e4 476/*
c71099ac 477 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
478 */
479
8ed67789
DL
480static inline struct rt6_info *rt6_device_match(struct net *net,
481 struct rt6_info *rt,
b71d1d42 482 const struct in6_addr *saddr,
1da177e4 483 int oif,
d420895e 484 int flags)
1da177e4
LT
485{
486 struct rt6_info *local = NULL;
487 struct rt6_info *sprt;
488
dd3abc4e
YH
489 if (!oif && ipv6_addr_any(saddr))
490 goto out;
491
d8d1f30b 492 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 493 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
494
495 if (oif) {
1da177e4
LT
496 if (dev->ifindex == oif)
497 return sprt;
498 if (dev->flags & IFF_LOOPBACK) {
38308473 499 if (!sprt->rt6i_idev ||
1da177e4 500 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 501 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 502 continue;
1ab1457c 503 if (local && (!oif ||
1da177e4
LT
504 local->rt6i_idev->dev->ifindex == oif))
505 continue;
506 }
507 local = sprt;
508 }
dd3abc4e
YH
509 } else {
510 if (ipv6_chk_addr(net, saddr, dev,
511 flags & RT6_LOOKUP_F_IFACE))
512 return sprt;
1da177e4 513 }
dd3abc4e 514 }
1da177e4 515
dd3abc4e 516 if (oif) {
1da177e4
LT
517 if (local)
518 return local;
519
d420895e 520 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 521 return net->ipv6.ip6_null_entry;
1da177e4 522 }
dd3abc4e 523out:
1da177e4
LT
524 return rt;
525}
526
27097255 527#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
528struct __rt6_probe_work {
529 struct work_struct work;
530 struct in6_addr target;
531 struct net_device *dev;
532};
533
534static void rt6_probe_deferred(struct work_struct *w)
535{
536 struct in6_addr mcaddr;
537 struct __rt6_probe_work *work =
538 container_of(w, struct __rt6_probe_work, work);
539
540 addrconf_addr_solict_mult(&work->target, &mcaddr);
ab450605 541 ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL, NULL);
c2f17e82 542 dev_put(work->dev);
662f5533 543 kfree(work);
c2f17e82
HFS
544}
545
27097255
YH
546static void rt6_probe(struct rt6_info *rt)
547{
990edb42 548 struct __rt6_probe_work *work;
f2c31e32 549 struct neighbour *neigh;
27097255
YH
550 /*
551 * Okay, this does not seem to be appropriate
552 * for now, however, we need to check if it
553 * is really so; aka Router Reachability Probing.
554 *
555 * Router Reachability Probe MUST be rate-limited
556 * to no more than one per minute.
557 */
2152caea 558 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
7ff74a59 559 return;
2152caea
YH
560 rcu_read_lock_bh();
561 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
562 if (neigh) {
8d6c31bf
MKL
563 if (neigh->nud_state & NUD_VALID)
564 goto out;
565
990edb42 566 work = NULL;
2152caea 567 write_lock(&neigh->lock);
990edb42
MKL
568 if (!(neigh->nud_state & NUD_VALID) &&
569 time_after(jiffies,
570 neigh->updated +
571 rt->rt6i_idev->cnf.rtr_probe_interval)) {
572 work = kmalloc(sizeof(*work), GFP_ATOMIC);
573 if (work)
574 __neigh_set_probe_once(neigh);
c2f17e82 575 }
2152caea 576 write_unlock(&neigh->lock);
990edb42
MKL
577 } else {
578 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 579 }
990edb42
MKL
580
581 if (work) {
582 INIT_WORK(&work->work, rt6_probe_deferred);
583 work->target = rt->rt6i_gateway;
584 dev_hold(rt->dst.dev);
585 work->dev = rt->dst.dev;
586 schedule_work(&work->work);
587 }
588
8d6c31bf 589out:
2152caea 590 rcu_read_unlock_bh();
27097255
YH
591}
592#else
593static inline void rt6_probe(struct rt6_info *rt)
594{
27097255
YH
595}
596#endif
597
1da177e4 598/*
554cfb7e 599 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 600 */
b6f99a21 601static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 602{
d1918542 603 struct net_device *dev = rt->dst.dev;
161980f4 604 if (!oif || dev->ifindex == oif)
554cfb7e 605 return 2;
161980f4
DM
606 if ((dev->flags & IFF_LOOPBACK) &&
607 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
608 return 1;
609 return 0;
554cfb7e 610}
1da177e4 611
afc154e9 612static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
1da177e4 613{
f2c31e32 614 struct neighbour *neigh;
afc154e9 615 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
f2c31e32 616
4d0c5911
YH
617 if (rt->rt6i_flags & RTF_NONEXTHOP ||
618 !(rt->rt6i_flags & RTF_GATEWAY))
afc154e9 619 return RT6_NUD_SUCCEED;
145a3621
YH
620
621 rcu_read_lock_bh();
622 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
623 if (neigh) {
624 read_lock(&neigh->lock);
554cfb7e 625 if (neigh->nud_state & NUD_VALID)
afc154e9 626 ret = RT6_NUD_SUCCEED;
398bcbeb 627#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 628 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 629 ret = RT6_NUD_SUCCEED;
7e980569
JB
630 else
631 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 632#endif
145a3621 633 read_unlock(&neigh->lock);
afc154e9
HFS
634 } else {
635 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 636 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 637 }
145a3621
YH
638 rcu_read_unlock_bh();
639
a5a81f0b 640 return ret;
1da177e4
LT
641}
642
554cfb7e
YH
643static int rt6_score_route(struct rt6_info *rt, int oif,
644 int strict)
1da177e4 645{
a5a81f0b 646 int m;
1ab1457c 647
4d0c5911 648 m = rt6_check_dev(rt, oif);
77d16f45 649 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 650 return RT6_NUD_FAIL_HARD;
ebacaaa0
YH
651#ifdef CONFIG_IPV6_ROUTER_PREF
652 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
653#endif
afc154e9
HFS
654 if (strict & RT6_LOOKUP_F_REACHABLE) {
655 int n = rt6_check_neigh(rt);
656 if (n < 0)
657 return n;
658 }
554cfb7e
YH
659 return m;
660}
661
f11e6659 662static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
afc154e9
HFS
663 int *mpri, struct rt6_info *match,
664 bool *do_rr)
554cfb7e 665{
f11e6659 666 int m;
afc154e9 667 bool match_do_rr = false;
35103d11
AG
668 struct inet6_dev *idev = rt->rt6i_idev;
669 struct net_device *dev = rt->dst.dev;
670
671 if (dev && !netif_carrier_ok(dev) &&
672 idev->cnf.ignore_routes_with_linkdown)
673 goto out;
f11e6659
DM
674
675 if (rt6_check_expired(rt))
676 goto out;
677
678 m = rt6_score_route(rt, oif, strict);
7e980569 679 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
680 match_do_rr = true;
681 m = 0; /* lowest valid score */
7e980569 682 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 683 goto out;
afc154e9
HFS
684 }
685
686 if (strict & RT6_LOOKUP_F_REACHABLE)
687 rt6_probe(rt);
f11e6659 688
7e980569 689 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 690 if (m > *mpri) {
afc154e9 691 *do_rr = match_do_rr;
f11e6659
DM
692 *mpri = m;
693 match = rt;
f11e6659 694 }
f11e6659
DM
695out:
696 return match;
697}
698
699static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
700 struct rt6_info *rr_head,
afc154e9
HFS
701 u32 metric, int oif, int strict,
702 bool *do_rr)
f11e6659 703{
9fbdcfaf 704 struct rt6_info *rt, *match, *cont;
554cfb7e 705 int mpri = -1;
1da177e4 706
f11e6659 707 match = NULL;
9fbdcfaf
SK
708 cont = NULL;
709 for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
710 if (rt->rt6i_metric != metric) {
711 cont = rt;
712 break;
713 }
714
715 match = find_match(rt, oif, strict, &mpri, match, do_rr);
716 }
717
718 for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
719 if (rt->rt6i_metric != metric) {
720 cont = rt;
721 break;
722 }
723
afc154e9 724 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
725 }
726
727 if (match || !cont)
728 return match;
729
730 for (rt = cont; rt; rt = rt->dst.rt6_next)
afc154e9 731 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 732
f11e6659
DM
733 return match;
734}
1da177e4 735
f11e6659
DM
736static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
737{
738 struct rt6_info *match, *rt0;
8ed67789 739 struct net *net;
afc154e9 740 bool do_rr = false;
1da177e4 741
f11e6659
DM
742 rt0 = fn->rr_ptr;
743 if (!rt0)
744 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 745
afc154e9
HFS
746 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
747 &do_rr);
1da177e4 748
afc154e9 749 if (do_rr) {
d8d1f30b 750 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 751
554cfb7e 752 /* no entries matched; do round-robin */
f11e6659
DM
753 if (!next || next->rt6i_metric != rt0->rt6i_metric)
754 next = fn->leaf;
755
756 if (next != rt0)
757 fn->rr_ptr = next;
1da177e4 758 }
1da177e4 759
d1918542 760 net = dev_net(rt0->dst.dev);
a02cec21 761 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
762}
763
8b9df265
MKL
764static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
765{
766 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
767}
768
70ceb4f5
YH
769#ifdef CONFIG_IPV6_ROUTE_INFO
770int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 771 const struct in6_addr *gwaddr)
70ceb4f5 772{
c346dca1 773 struct net *net = dev_net(dev);
70ceb4f5
YH
774 struct route_info *rinfo = (struct route_info *) opt;
775 struct in6_addr prefix_buf, *prefix;
776 unsigned int pref;
4bed72e4 777 unsigned long lifetime;
70ceb4f5
YH
778 struct rt6_info *rt;
779
780 if (len < sizeof(struct route_info)) {
781 return -EINVAL;
782 }
783
784 /* Sanity check for prefix_len and length */
785 if (rinfo->length > 3) {
786 return -EINVAL;
787 } else if (rinfo->prefix_len > 128) {
788 return -EINVAL;
789 } else if (rinfo->prefix_len > 64) {
790 if (rinfo->length < 2) {
791 return -EINVAL;
792 }
793 } else if (rinfo->prefix_len > 0) {
794 if (rinfo->length < 1) {
795 return -EINVAL;
796 }
797 }
798
799 pref = rinfo->route_pref;
800 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 801 return -EINVAL;
70ceb4f5 802
4bed72e4 803 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
804
805 if (rinfo->length == 3)
806 prefix = (struct in6_addr *)rinfo->prefix;
807 else {
808 /* this function is safe */
809 ipv6_addr_prefix(&prefix_buf,
810 (struct in6_addr *)rinfo->prefix,
811 rinfo->prefix_len);
812 prefix = &prefix_buf;
813 }
814
f104a567
DJ
815 if (rinfo->prefix_len == 0)
816 rt = rt6_get_dflt_router(gwaddr, dev);
817 else
818 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
819 gwaddr, dev->ifindex);
70ceb4f5
YH
820
821 if (rt && !lifetime) {
e0a1ad73 822 ip6_del_rt(rt);
70ceb4f5
YH
823 rt = NULL;
824 }
825
826 if (!rt && lifetime)
efa2cea0 827 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
828 pref);
829 else if (rt)
830 rt->rt6i_flags = RTF_ROUTEINFO |
831 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
832
833 if (rt) {
1716a961
G
834 if (!addrconf_finite_timeout(lifetime))
835 rt6_clean_expires(rt);
836 else
837 rt6_set_expires(rt, jiffies + HZ * lifetime);
838
94e187c0 839 ip6_rt_put(rt);
70ceb4f5
YH
840 }
841 return 0;
842}
843#endif
844
a3c00e46
MKL
845static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
846 struct in6_addr *saddr)
847{
848 struct fib6_node *pn;
849 while (1) {
850 if (fn->fn_flags & RTN_TL_ROOT)
851 return NULL;
852 pn = fn->parent;
853 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
854 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
855 else
856 fn = pn;
857 if (fn->fn_flags & RTN_RTINFO)
858 return fn;
859 }
860}
c71099ac 861
8ed67789
DL
862static struct rt6_info *ip6_pol_route_lookup(struct net *net,
863 struct fib6_table *table,
4c9483b2 864 struct flowi6 *fl6, int flags)
1da177e4
LT
865{
866 struct fib6_node *fn;
867 struct rt6_info *rt;
868
c71099ac 869 read_lock_bh(&table->tb6_lock);
4c9483b2 870 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
871restart:
872 rt = fn->leaf;
4c9483b2 873 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
51ebd318 874 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
52bd4c0c 875 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
a3c00e46
MKL
876 if (rt == net->ipv6.ip6_null_entry) {
877 fn = fib6_backtrack(fn, &fl6->saddr);
878 if (fn)
879 goto restart;
880 }
d8d1f30b 881 dst_use(&rt->dst, jiffies);
c71099ac 882 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
883 return rt;
884
885}
886
67ba4152 887struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
ea6e574e
FW
888 int flags)
889{
890 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
891}
892EXPORT_SYMBOL_GPL(ip6_route_lookup);
893
9acd9f3a
YH
894struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
895 const struct in6_addr *saddr, int oif, int strict)
c71099ac 896{
4c9483b2
DM
897 struct flowi6 fl6 = {
898 .flowi6_oif = oif,
899 .daddr = *daddr,
c71099ac
TG
900 };
901 struct dst_entry *dst;
77d16f45 902 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 903
adaa70bb 904 if (saddr) {
4c9483b2 905 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
906 flags |= RT6_LOOKUP_F_HAS_SADDR;
907 }
908
4c9483b2 909 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
910 if (dst->error == 0)
911 return (struct rt6_info *) dst;
912
913 dst_release(dst);
914
1da177e4
LT
915 return NULL;
916}
7159039a
YH
917EXPORT_SYMBOL(rt6_lookup);
918
c71099ac 919/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
920 It takes new route entry, the addition fails by any reason the
921 route is freed. In any case, if caller does not hold it, it may
922 be destroyed.
923 */
924
e5fd387a 925static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
e715b6d3 926 struct mx6_config *mxc)
1da177e4
LT
927{
928 int err;
c71099ac 929 struct fib6_table *table;
1da177e4 930
c71099ac
TG
931 table = rt->rt6i_table;
932 write_lock_bh(&table->tb6_lock);
e715b6d3 933 err = fib6_add(&table->tb6_root, rt, info, mxc);
c71099ac 934 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
935
936 return err;
937}
938
40e22e8f
TG
939int ip6_ins_rt(struct rt6_info *rt)
940{
e715b6d3
FW
941 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
942 struct mx6_config mxc = { .mx = NULL, };
943
944 return __ip6_ins_rt(rt, &info, &mxc);
40e22e8f
TG
945}
946
8b9df265
MKL
947static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
948 const struct in6_addr *daddr,
949 const struct in6_addr *saddr)
1da177e4 950{
1da177e4
LT
951 struct rt6_info *rt;
952
953 /*
954 * Clone the route.
955 */
956
d52d3997 957 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
83a09abd 958 ort = (struct rt6_info *)ort->dst.from;
1da177e4 959
ad706862 960 rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
83a09abd
MKL
961
962 if (!rt)
963 return NULL;
964
965 ip6_rt_copy_init(rt, ort);
966 rt->rt6i_flags |= RTF_CACHE;
967 rt->rt6i_metric = 0;
968 rt->dst.flags |= DST_HOST;
969 rt->rt6i_dst.addr = *daddr;
970 rt->rt6i_dst.plen = 128;
1da177e4 971
83a09abd
MKL
972 if (!rt6_is_gw_or_nonexthop(ort)) {
973 if (ort->rt6i_dst.plen != 128 &&
974 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
975 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 976#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
977 if (rt->rt6i_src.plen && saddr) {
978 rt->rt6i_src.addr = *saddr;
979 rt->rt6i_src.plen = 128;
8b9df265 980 }
83a09abd 981#endif
95a9a5ba 982 }
1da177e4 983
95a9a5ba
YH
984 return rt;
985}
1da177e4 986
d52d3997
MKL
987static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
988{
989 struct rt6_info *pcpu_rt;
990
991 pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
ad706862 992 rt->dst.dev, rt->dst.flags);
d52d3997
MKL
993
994 if (!pcpu_rt)
995 return NULL;
996 ip6_rt_copy_init(pcpu_rt, rt);
997 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
998 pcpu_rt->rt6i_flags |= RTF_PCPU;
999 return pcpu_rt;
1000}
1001
1002/* It should be called with read_lock_bh(&tb6_lock) acquired */
1003static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1004{
a73e4195 1005 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
1006
1007 p = this_cpu_ptr(rt->rt6i_pcpu);
1008 pcpu_rt = *p;
1009
a73e4195
MKL
1010 if (pcpu_rt) {
1011 dst_hold(&pcpu_rt->dst);
1012 rt6_dst_from_metrics_check(pcpu_rt);
1013 }
1014 return pcpu_rt;
1015}
1016
1017static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1018{
9c7370a1 1019 struct fib6_table *table = rt->rt6i_table;
a73e4195 1020 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1021
1022 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1023 if (!pcpu_rt) {
1024 struct net *net = dev_net(rt->dst.dev);
1025
9c7370a1
MKL
1026 dst_hold(&net->ipv6.ip6_null_entry->dst);
1027 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1028 }
1029
9c7370a1
MKL
1030 read_lock_bh(&table->tb6_lock);
1031 if (rt->rt6i_pcpu) {
1032 p = this_cpu_ptr(rt->rt6i_pcpu);
1033 prev = cmpxchg(p, NULL, pcpu_rt);
1034 if (prev) {
1035 /* If someone did it before us, return prev instead */
1036 dst_destroy(&pcpu_rt->dst);
1037 pcpu_rt = prev;
1038 }
1039 } else {
1040 /* rt has been removed from the fib6 tree
1041 * before we have a chance to acquire the read_lock.
1042 * In this case, don't brother to create a pcpu rt
1043 * since rt is going away anyway. The next
1044 * dst_check() will trigger a re-lookup.
1045 */
d52d3997 1046 dst_destroy(&pcpu_rt->dst);
9c7370a1 1047 pcpu_rt = rt;
d52d3997 1048 }
d52d3997
MKL
1049 dst_hold(&pcpu_rt->dst);
1050 rt6_dst_from_metrics_check(pcpu_rt);
9c7370a1 1051 read_unlock_bh(&table->tb6_lock);
d52d3997
MKL
1052 return pcpu_rt;
1053}
1054
8ed67789 1055static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 1056 struct flowi6 *fl6, int flags)
1da177e4 1057{
367efcb9 1058 struct fib6_node *fn, *saved_fn;
45e4fd26 1059 struct rt6_info *rt;
c71099ac 1060 int strict = 0;
1da177e4 1061
77d16f45 1062 strict |= flags & RT6_LOOKUP_F_IFACE;
367efcb9
MKL
1063 if (net->ipv6.devconf_all->forwarding == 0)
1064 strict |= RT6_LOOKUP_F_REACHABLE;
1da177e4 1065
c71099ac 1066 read_lock_bh(&table->tb6_lock);
1da177e4 1067
4c9483b2 1068 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1069 saved_fn = fn;
1da177e4 1070
a3c00e46 1071redo_rt6_select:
367efcb9 1072 rt = rt6_select(fn, oif, strict);
52bd4c0c 1073 if (rt->rt6i_nsiblings)
367efcb9 1074 rt = rt6_multipath_select(rt, fl6, oif, strict);
a3c00e46
MKL
1075 if (rt == net->ipv6.ip6_null_entry) {
1076 fn = fib6_backtrack(fn, &fl6->saddr);
1077 if (fn)
1078 goto redo_rt6_select;
367efcb9
MKL
1079 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1080 /* also consider unreachable route */
1081 strict &= ~RT6_LOOKUP_F_REACHABLE;
1082 fn = saved_fn;
1083 goto redo_rt6_select;
367efcb9 1084 }
a3c00e46
MKL
1085 }
1086
fb9de91e 1087
3da59bd9 1088 if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
d52d3997
MKL
1089 dst_use(&rt->dst, jiffies);
1090 read_unlock_bh(&table->tb6_lock);
1091
1092 rt6_dst_from_metrics_check(rt);
1093 return rt;
3da59bd9
MKL
1094 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1095 !(rt->rt6i_flags & RTF_GATEWAY))) {
1096 /* Create a RTF_CACHE clone which will not be
1097 * owned by the fib6 tree. It is for the special case where
1098 * the daddr in the skb during the neighbor look-up is different
1099 * from the fl6->daddr used to look-up route here.
1100 */
1101
1102 struct rt6_info *uncached_rt;
1103
d52d3997
MKL
1104 dst_use(&rt->dst, jiffies);
1105 read_unlock_bh(&table->tb6_lock);
1106
3da59bd9
MKL
1107 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1108 dst_release(&rt->dst);
c71099ac 1109
3da59bd9 1110 if (uncached_rt)
8d0b94af 1111 rt6_uncached_list_add(uncached_rt);
3da59bd9
MKL
1112 else
1113 uncached_rt = net->ipv6.ip6_null_entry;
d52d3997 1114
3da59bd9
MKL
1115 dst_hold(&uncached_rt->dst);
1116 return uncached_rt;
3da59bd9 1117
d52d3997
MKL
1118 } else {
1119 /* Get a percpu copy */
1120
1121 struct rt6_info *pcpu_rt;
1122
1123 rt->dst.lastuse = jiffies;
1124 rt->dst.__use++;
1125 pcpu_rt = rt6_get_pcpu_route(rt);
d52d3997 1126
9c7370a1
MKL
1127 if (pcpu_rt) {
1128 read_unlock_bh(&table->tb6_lock);
1129 } else {
1130 /* We have to do the read_unlock first
1131 * because rt6_make_pcpu_route() may trigger
1132 * ip6_dst_gc() which will take the write_lock.
1133 */
1134 dst_hold(&rt->dst);
1135 read_unlock_bh(&table->tb6_lock);
a73e4195 1136 pcpu_rt = rt6_make_pcpu_route(rt);
9c7370a1
MKL
1137 dst_release(&rt->dst);
1138 }
d52d3997
MKL
1139
1140 return pcpu_rt;
9c7370a1 1141
d52d3997 1142 }
1da177e4
LT
1143}
1144
8ed67789 1145static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 1146 struct flowi6 *fl6, int flags)
4acad72d 1147{
4c9483b2 1148 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
1149}
1150
72331bc0
SL
1151static struct dst_entry *ip6_route_input_lookup(struct net *net,
1152 struct net_device *dev,
1153 struct flowi6 *fl6, int flags)
1154{
1155 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1156 flags |= RT6_LOOKUP_F_IFACE;
1157
1158 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1159}
1160
c71099ac
TG
1161void ip6_route_input(struct sk_buff *skb)
1162{
b71d1d42 1163 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 1164 struct net *net = dev_net(skb->dev);
adaa70bb 1165 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 1166 struct ip_tunnel_info *tun_info;
4c9483b2
DM
1167 struct flowi6 fl6 = {
1168 .flowi6_iif = skb->dev->ifindex,
1169 .daddr = iph->daddr,
1170 .saddr = iph->saddr,
6502ca52 1171 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
1172 .flowi6_mark = skb->mark,
1173 .flowi6_proto = iph->nexthdr,
c71099ac 1174 };
adaa70bb 1175
904af04d 1176 tun_info = skb_tunnel_info(skb);
46fa062a 1177 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 1178 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
06e9d040 1179 skb_dst_drop(skb);
72331bc0 1180 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
1181}
1182
8ed67789 1183static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 1184 struct flowi6 *fl6, int flags)
1da177e4 1185{
4c9483b2 1186 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
1187}
1188
67ba4152 1189struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 1190 struct flowi6 *fl6)
c71099ac
TG
1191{
1192 int flags = 0;
1193
1fb9489b 1194 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 1195
4c9483b2 1196 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 1197 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 1198
4c9483b2 1199 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 1200 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
1201 else if (sk)
1202 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 1203
4c9483b2 1204 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4 1205}
7159039a 1206EXPORT_SYMBOL(ip6_route_output);
1da177e4 1207
2774c131 1208struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 1209{
5c1e6aa3 1210 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
1211 struct dst_entry *new = NULL;
1212
f5b0a874 1213 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
14e50e57 1214 if (rt) {
d8d1f30b 1215 new = &rt->dst;
14e50e57 1216
8104891b 1217 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
8104891b 1218
14e50e57 1219 new->__use = 1;
352e512c 1220 new->input = dst_discard;
aad88724 1221 new->output = dst_discard_sk;
14e50e57 1222
21efcfa0
ED
1223 if (dst_metrics_read_only(&ort->dst))
1224 new->_metrics = ort->dst._metrics;
1225 else
1226 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
1227 rt->rt6i_idev = ort->rt6i_idev;
1228 if (rt->rt6i_idev)
1229 in6_dev_hold(rt->rt6i_idev);
14e50e57 1230
4e3fd7a0 1231 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961 1232 rt->rt6i_flags = ort->rt6i_flags;
14e50e57
DM
1233 rt->rt6i_metric = 0;
1234
1235 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1236#ifdef CONFIG_IPV6_SUBTREES
1237 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1238#endif
1239
1240 dst_free(new);
1241 }
1242
69ead7af
DM
1243 dst_release(dst_orig);
1244 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 1245}
14e50e57 1246
1da177e4
LT
1247/*
1248 * Destination cache support functions
1249 */
1250
4b32b5ad
MKL
1251static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1252{
1253 if (rt->dst.from &&
1254 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1255 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1256}
1257
3da59bd9
MKL
1258static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1259{
1260 if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1261 return NULL;
1262
1263 if (rt6_check_expired(rt))
1264 return NULL;
1265
1266 return &rt->dst;
1267}
1268
1269static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1270{
1271 if (rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1272 rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1273 return &rt->dst;
1274 else
1275 return NULL;
1276}
1277
1da177e4
LT
1278static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1279{
1280 struct rt6_info *rt;
1281
1282 rt = (struct rt6_info *) dst;
1283
6f3118b5
ND
1284 /* All IPV6 dsts are created with ->obsolete set to the value
1285 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1286 * into this function always.
1287 */
e3bc10bd 1288
4b32b5ad
MKL
1289 rt6_dst_from_metrics_check(rt);
1290
d52d3997 1291 if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE))
3da59bd9
MKL
1292 return rt6_dst_from_check(rt, cookie);
1293 else
1294 return rt6_check(rt, cookie);
1da177e4
LT
1295}
1296
1297static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1298{
1299 struct rt6_info *rt = (struct rt6_info *) dst;
1300
1301 if (rt) {
54c1a859
YH
1302 if (rt->rt6i_flags & RTF_CACHE) {
1303 if (rt6_check_expired(rt)) {
1304 ip6_del_rt(rt);
1305 dst = NULL;
1306 }
1307 } else {
1da177e4 1308 dst_release(dst);
54c1a859
YH
1309 dst = NULL;
1310 }
1da177e4 1311 }
54c1a859 1312 return dst;
1da177e4
LT
1313}
1314
1315static void ip6_link_failure(struct sk_buff *skb)
1316{
1317 struct rt6_info *rt;
1318
3ffe533c 1319 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1320
adf30907 1321 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1322 if (rt) {
1eb4f758
HFS
1323 if (rt->rt6i_flags & RTF_CACHE) {
1324 dst_hold(&rt->dst);
1325 if (ip6_del_rt(rt))
1326 dst_free(&rt->dst);
1327 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1da177e4 1328 rt->rt6i_node->fn_sernum = -1;
1eb4f758 1329 }
1da177e4
LT
1330 }
1331}
1332
45e4fd26
MKL
1333static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1334{
1335 struct net *net = dev_net(rt->dst.dev);
1336
1337 rt->rt6i_flags |= RTF_MODIFIED;
1338 rt->rt6i_pmtu = mtu;
1339 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1340}
1341
1342static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1343 const struct ipv6hdr *iph, u32 mtu)
1da177e4 1344{
67ba4152 1345 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 1346
45e4fd26
MKL
1347 if (rt6->rt6i_flags & RTF_LOCAL)
1348 return;
81aded24 1349
45e4fd26
MKL
1350 dst_confirm(dst);
1351 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1352 if (mtu >= dst_mtu(dst))
1353 return;
9d289715 1354
45e4fd26
MKL
1355 if (rt6->rt6i_flags & RTF_CACHE) {
1356 rt6_do_update_pmtu(rt6, mtu);
1357 } else {
1358 const struct in6_addr *daddr, *saddr;
1359 struct rt6_info *nrt6;
1360
1361 if (iph) {
1362 daddr = &iph->daddr;
1363 saddr = &iph->saddr;
1364 } else if (sk) {
1365 daddr = &sk->sk_v6_daddr;
1366 saddr = &inet6_sk(sk)->saddr;
1367 } else {
1368 return;
1369 }
1370 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1371 if (nrt6) {
1372 rt6_do_update_pmtu(nrt6, mtu);
1373
1374 /* ip6_ins_rt(nrt6) will bump the
1375 * rt6->rt6i_node->fn_sernum
1376 * which will fail the next rt6_check() and
1377 * invalidate the sk->sk_dst_cache.
1378 */
1379 ip6_ins_rt(nrt6);
1380 }
1da177e4
LT
1381 }
1382}
1383
45e4fd26
MKL
1384static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1385 struct sk_buff *skb, u32 mtu)
1386{
1387 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1388}
1389
42ae66c8
DM
1390void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1391 int oif, u32 mark)
81aded24
DM
1392{
1393 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1394 struct dst_entry *dst;
1395 struct flowi6 fl6;
1396
1397 memset(&fl6, 0, sizeof(fl6));
1398 fl6.flowi6_oif = oif;
1b3c61dc 1399 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
1400 fl6.daddr = iph->daddr;
1401 fl6.saddr = iph->saddr;
6502ca52 1402 fl6.flowlabel = ip6_flowinfo(iph);
81aded24
DM
1403
1404 dst = ip6_route_output(net, NULL, &fl6);
1405 if (!dst->error)
45e4fd26 1406 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
1407 dst_release(dst);
1408}
1409EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1410
1411void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1412{
1413 ip6_update_pmtu(skb, sock_net(sk), mtu,
1414 sk->sk_bound_dev_if, sk->sk_mark);
1415}
1416EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1417
b55b76b2
DJ
1418/* Handle redirects */
1419struct ip6rd_flowi {
1420 struct flowi6 fl6;
1421 struct in6_addr gateway;
1422};
1423
1424static struct rt6_info *__ip6_route_redirect(struct net *net,
1425 struct fib6_table *table,
1426 struct flowi6 *fl6,
1427 int flags)
1428{
1429 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1430 struct rt6_info *rt;
1431 struct fib6_node *fn;
1432
1433 /* Get the "current" route for this destination and
1434 * check if the redirect has come from approriate router.
1435 *
1436 * RFC 4861 specifies that redirects should only be
1437 * accepted if they come from the nexthop to the target.
1438 * Due to the way the routes are chosen, this notion
1439 * is a bit fuzzy and one might need to check all possible
1440 * routes.
1441 */
1442
1443 read_lock_bh(&table->tb6_lock);
1444 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1445restart:
1446 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1447 if (rt6_check_expired(rt))
1448 continue;
1449 if (rt->dst.error)
1450 break;
1451 if (!(rt->rt6i_flags & RTF_GATEWAY))
1452 continue;
1453 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1454 continue;
1455 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1456 continue;
1457 break;
1458 }
1459
1460 if (!rt)
1461 rt = net->ipv6.ip6_null_entry;
1462 else if (rt->dst.error) {
1463 rt = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
1464 goto out;
1465 }
1466
1467 if (rt == net->ipv6.ip6_null_entry) {
a3c00e46
MKL
1468 fn = fib6_backtrack(fn, &fl6->saddr);
1469 if (fn)
1470 goto restart;
b55b76b2 1471 }
a3c00e46 1472
b0a1ba59 1473out:
b55b76b2
DJ
1474 dst_hold(&rt->dst);
1475
1476 read_unlock_bh(&table->tb6_lock);
1477
1478 return rt;
1479};
1480
1481static struct dst_entry *ip6_route_redirect(struct net *net,
1482 const struct flowi6 *fl6,
1483 const struct in6_addr *gateway)
1484{
1485 int flags = RT6_LOOKUP_F_HAS_SADDR;
1486 struct ip6rd_flowi rdfl;
1487
1488 rdfl.fl6 = *fl6;
1489 rdfl.gateway = *gateway;
1490
1491 return fib6_rule_lookup(net, &rdfl.fl6,
1492 flags, __ip6_route_redirect);
1493}
1494
3a5ad2ee
DM
1495void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1496{
1497 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1498 struct dst_entry *dst;
1499 struct flowi6 fl6;
1500
1501 memset(&fl6, 0, sizeof(fl6));
e374c618 1502 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
1503 fl6.flowi6_oif = oif;
1504 fl6.flowi6_mark = mark;
3a5ad2ee
DM
1505 fl6.daddr = iph->daddr;
1506 fl6.saddr = iph->saddr;
6502ca52 1507 fl6.flowlabel = ip6_flowinfo(iph);
3a5ad2ee 1508
b55b76b2
DJ
1509 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1510 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
1511 dst_release(dst);
1512}
1513EXPORT_SYMBOL_GPL(ip6_redirect);
1514
c92a59ec
DJ
1515void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1516 u32 mark)
1517{
1518 const struct ipv6hdr *iph = ipv6_hdr(skb);
1519 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1520 struct dst_entry *dst;
1521 struct flowi6 fl6;
1522
1523 memset(&fl6, 0, sizeof(fl6));
e374c618 1524 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
1525 fl6.flowi6_oif = oif;
1526 fl6.flowi6_mark = mark;
c92a59ec
DJ
1527 fl6.daddr = msg->dest;
1528 fl6.saddr = iph->daddr;
1529
b55b76b2
DJ
1530 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1531 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
1532 dst_release(dst);
1533}
1534
3a5ad2ee
DM
1535void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1536{
1537 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1538}
1539EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1540
0dbaee3b 1541static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1542{
0dbaee3b
DM
1543 struct net_device *dev = dst->dev;
1544 unsigned int mtu = dst_mtu(dst);
1545 struct net *net = dev_net(dev);
1546
1da177e4
LT
1547 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1548
5578689a
DL
1549 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1550 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1551
1552 /*
1ab1457c
YH
1553 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1554 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1555 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1556 * rely only on pmtu discovery"
1557 */
1558 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1559 mtu = IPV6_MAXPLEN;
1560 return mtu;
1561}
1562
ebb762f2 1563static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1564{
4b32b5ad
MKL
1565 const struct rt6_info *rt = (const struct rt6_info *)dst;
1566 unsigned int mtu = rt->rt6i_pmtu;
d33e4553 1567 struct inet6_dev *idev;
618f9bc7 1568
4b32b5ad
MKL
1569 if (mtu)
1570 goto out;
1571
1572 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 1573 if (mtu)
30f78d8e 1574 goto out;
618f9bc7
SK
1575
1576 mtu = IPV6_MIN_MTU;
d33e4553
DM
1577
1578 rcu_read_lock();
1579 idev = __in6_dev_get(dst->dev);
1580 if (idev)
1581 mtu = idev->cnf.mtu6;
1582 rcu_read_unlock();
1583
30f78d8e
ED
1584out:
1585 return min_t(unsigned int, mtu, IP6_MAX_MTU);
d33e4553
DM
1586}
1587
3b00944c
YH
1588static struct dst_entry *icmp6_dst_gc_list;
1589static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1590
3b00944c 1591struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 1592 struct flowi6 *fl6)
1da177e4 1593{
87a11578 1594 struct dst_entry *dst;
1da177e4
LT
1595 struct rt6_info *rt;
1596 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1597 struct net *net = dev_net(dev);
1da177e4 1598
38308473 1599 if (unlikely(!idev))
122bdf67 1600 return ERR_PTR(-ENODEV);
1da177e4 1601
ad706862 1602 rt = ip6_dst_alloc(net, dev, 0);
38308473 1603 if (unlikely(!rt)) {
1da177e4 1604 in6_dev_put(idev);
87a11578 1605 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1606 goto out;
1607 }
1608
8e2ec639
YZ
1609 rt->dst.flags |= DST_HOST;
1610 rt->dst.output = ip6_output;
d8d1f30b 1611 atomic_set(&rt->dst.__refcnt, 1);
550bab42 1612 rt->rt6i_gateway = fl6->daddr;
87a11578 1613 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1614 rt->rt6i_dst.plen = 128;
1615 rt->rt6i_idev = idev;
14edd87d 1616 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 1617
3b00944c 1618 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1619 rt->dst.next = icmp6_dst_gc_list;
1620 icmp6_dst_gc_list = &rt->dst;
3b00944c 1621 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1622
5578689a 1623 fib6_force_start_gc(net);
1da177e4 1624
87a11578
DM
1625 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1626
1da177e4 1627out:
87a11578 1628 return dst;
1da177e4
LT
1629}
1630
3d0f24a7 1631int icmp6_dst_gc(void)
1da177e4 1632{
e9476e95 1633 struct dst_entry *dst, **pprev;
3d0f24a7 1634 int more = 0;
1da177e4 1635
3b00944c
YH
1636 spin_lock_bh(&icmp6_dst_lock);
1637 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1638
1da177e4
LT
1639 while ((dst = *pprev) != NULL) {
1640 if (!atomic_read(&dst->__refcnt)) {
1641 *pprev = dst->next;
1642 dst_free(dst);
1da177e4
LT
1643 } else {
1644 pprev = &dst->next;
3d0f24a7 1645 ++more;
1da177e4
LT
1646 }
1647 }
1648
3b00944c 1649 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1650
3d0f24a7 1651 return more;
1da177e4
LT
1652}
1653
1e493d19
DM
1654static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1655 void *arg)
1656{
1657 struct dst_entry *dst, **pprev;
1658
1659 spin_lock_bh(&icmp6_dst_lock);
1660 pprev = &icmp6_dst_gc_list;
1661 while ((dst = *pprev) != NULL) {
1662 struct rt6_info *rt = (struct rt6_info *) dst;
1663 if (func(rt, arg)) {
1664 *pprev = dst->next;
1665 dst_free(dst);
1666 } else {
1667 pprev = &dst->next;
1668 }
1669 }
1670 spin_unlock_bh(&icmp6_dst_lock);
1671}
1672
569d3645 1673static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1674{
86393e52 1675 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1676 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1677 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1678 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1679 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1680 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1681 int entries;
7019b78e 1682
fc66f95c 1683 entries = dst_entries_get_fast(ops);
49a18d86 1684 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 1685 entries <= rt_max_size)
1da177e4
LT
1686 goto out;
1687
6891a346 1688 net->ipv6.ip6_rt_gc_expire++;
14956643 1689 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
1690 entries = dst_entries_get_slow(ops);
1691 if (entries < ops->gc_thresh)
7019b78e 1692 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1693out:
7019b78e 1694 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1695 return entries > rt_max_size;
1da177e4
LT
1696}
1697
e715b6d3
FW
1698static int ip6_convert_metrics(struct mx6_config *mxc,
1699 const struct fib6_config *cfg)
1700{
1701 struct nlattr *nla;
1702 int remaining;
1703 u32 *mp;
1704
63159f29 1705 if (!cfg->fc_mx)
e715b6d3
FW
1706 return 0;
1707
1708 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1709 if (unlikely(!mp))
1710 return -ENOMEM;
1711
1712 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1713 int type = nla_type(nla);
1714
1715 if (type) {
ea697639
DB
1716 u32 val;
1717
e715b6d3
FW
1718 if (unlikely(type > RTAX_MAX))
1719 goto err;
ea697639
DB
1720 if (type == RTAX_CC_ALGO) {
1721 char tmp[TCP_CA_NAME_MAX];
1722
1723 nla_strlcpy(tmp, nla, sizeof(tmp));
1724 val = tcp_ca_get_key_by_name(tmp);
1725 if (val == TCP_CA_UNSPEC)
1726 goto err;
1727 } else {
1728 val = nla_get_u32(nla);
1729 }
e715b6d3 1730
ea697639 1731 mp[type - 1] = val;
e715b6d3
FW
1732 __set_bit(type - 1, mxc->mx_valid);
1733 }
1734 }
1735
1736 mxc->mx = mp;
1737
1738 return 0;
1739 err:
1740 kfree(mp);
1741 return -EINVAL;
1742}
1da177e4 1743
86872cb5 1744int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1745{
1746 int err;
5578689a 1747 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1748 struct rt6_info *rt = NULL;
1749 struct net_device *dev = NULL;
1750 struct inet6_dev *idev = NULL;
c71099ac 1751 struct fib6_table *table;
e715b6d3 1752 struct mx6_config mxc = { .mx = NULL, };
1da177e4
LT
1753 int addr_type;
1754
86872cb5 1755 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1756 return -EINVAL;
1757#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1758 if (cfg->fc_src_len)
1da177e4
LT
1759 return -EINVAL;
1760#endif
86872cb5 1761 if (cfg->fc_ifindex) {
1da177e4 1762 err = -ENODEV;
5578689a 1763 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1764 if (!dev)
1765 goto out;
1766 idev = in6_dev_get(dev);
1767 if (!idev)
1768 goto out;
1769 }
1770
86872cb5
TG
1771 if (cfg->fc_metric == 0)
1772 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1773
d71314b4 1774 err = -ENOBUFS;
38308473
DM
1775 if (cfg->fc_nlinfo.nlh &&
1776 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1777 table = fib6_get_table(net, cfg->fc_table);
38308473 1778 if (!table) {
f3213831 1779 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
1780 table = fib6_new_table(net, cfg->fc_table);
1781 }
1782 } else {
1783 table = fib6_new_table(net, cfg->fc_table);
1784 }
38308473
DM
1785
1786 if (!table)
c71099ac 1787 goto out;
c71099ac 1788
ad706862
MKL
1789 rt = ip6_dst_alloc(net, NULL,
1790 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1da177e4 1791
38308473 1792 if (!rt) {
1da177e4
LT
1793 err = -ENOMEM;
1794 goto out;
1795 }
1796
1716a961
G
1797 if (cfg->fc_flags & RTF_EXPIRES)
1798 rt6_set_expires(rt, jiffies +
1799 clock_t_to_jiffies(cfg->fc_expires));
1800 else
1801 rt6_clean_expires(rt);
1da177e4 1802
86872cb5
TG
1803 if (cfg->fc_protocol == RTPROT_UNSPEC)
1804 cfg->fc_protocol = RTPROT_BOOT;
1805 rt->rt6i_protocol = cfg->fc_protocol;
1806
1807 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1808
1809 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1810 rt->dst.input = ip6_mc_input;
ab79ad14
1811 else if (cfg->fc_flags & RTF_LOCAL)
1812 rt->dst.input = ip6_input;
1da177e4 1813 else
d8d1f30b 1814 rt->dst.input = ip6_forward;
1da177e4 1815
d8d1f30b 1816 rt->dst.output = ip6_output;
1da177e4 1817
19e42e45
RP
1818 if (cfg->fc_encap) {
1819 struct lwtunnel_state *lwtstate;
1820
1821 err = lwtunnel_build_state(dev, cfg->fc_encap_type,
127eb7cd
TH
1822 cfg->fc_encap, AF_INET6, cfg,
1823 &lwtstate);
19e42e45
RP
1824 if (err)
1825 goto out;
61adedf3
JB
1826 rt->dst.lwtstate = lwtstate_get(lwtstate);
1827 if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1828 rt->dst.lwtstate->orig_output = rt->dst.output;
1829 rt->dst.output = lwtunnel_output;
25368623 1830 }
61adedf3
JB
1831 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1832 rt->dst.lwtstate->orig_input = rt->dst.input;
1833 rt->dst.input = lwtunnel_input;
25368623 1834 }
19e42e45
RP
1835 }
1836
86872cb5
TG
1837 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1838 rt->rt6i_dst.plen = cfg->fc_dst_len;
afc4eef8 1839 if (rt->rt6i_dst.plen == 128)
e5fd387a 1840 rt->dst.flags |= DST_HOST;
e5fd387a 1841
1da177e4 1842#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1843 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1844 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1845#endif
1846
86872cb5 1847 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1848
1849 /* We cannot add true routes via loopback here,
1850 they would result in kernel looping; promote them to reject routes
1851 */
86872cb5 1852 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1853 (dev && (dev->flags & IFF_LOOPBACK) &&
1854 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1855 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1856 /* hold loopback dev/idev if we haven't done so. */
5578689a 1857 if (dev != net->loopback_dev) {
1da177e4
LT
1858 if (dev) {
1859 dev_put(dev);
1860 in6_dev_put(idev);
1861 }
5578689a 1862 dev = net->loopback_dev;
1da177e4
LT
1863 dev_hold(dev);
1864 idev = in6_dev_get(dev);
1865 if (!idev) {
1866 err = -ENODEV;
1867 goto out;
1868 }
1869 }
1da177e4 1870 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
ef2c7d7b
ND
1871 switch (cfg->fc_type) {
1872 case RTN_BLACKHOLE:
1873 rt->dst.error = -EINVAL;
aad88724 1874 rt->dst.output = dst_discard_sk;
7150aede 1875 rt->dst.input = dst_discard;
ef2c7d7b
ND
1876 break;
1877 case RTN_PROHIBIT:
1878 rt->dst.error = -EACCES;
7150aede
K
1879 rt->dst.output = ip6_pkt_prohibit_out;
1880 rt->dst.input = ip6_pkt_prohibit;
ef2c7d7b 1881 break;
b4949ab2 1882 case RTN_THROW:
ef2c7d7b 1883 default:
7150aede
K
1884 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1885 : -ENETUNREACH;
1886 rt->dst.output = ip6_pkt_discard_out;
1887 rt->dst.input = ip6_pkt_discard;
ef2c7d7b
ND
1888 break;
1889 }
1da177e4
LT
1890 goto install_route;
1891 }
1892
86872cb5 1893 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1894 const struct in6_addr *gw_addr;
1da177e4
LT
1895 int gwa_type;
1896
86872cb5 1897 gw_addr = &cfg->fc_gateway;
330567b7 1898 gwa_type = ipv6_addr_type(gw_addr);
48ed7b26
FW
1899
1900 /* if gw_addr is local we will fail to detect this in case
1901 * address is still TENTATIVE (DAD in progress). rt6_lookup()
1902 * will return already-added prefix route via interface that
1903 * prefix route was assigned to, which might be non-loopback.
1904 */
1905 err = -EINVAL;
330567b7
FW
1906 if (ipv6_chk_addr_and_flags(net, gw_addr,
1907 gwa_type & IPV6_ADDR_LINKLOCAL ?
1908 dev : NULL, 0, 0))
48ed7b26
FW
1909 goto out;
1910
4e3fd7a0 1911 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1912
1913 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1914 struct rt6_info *grt;
1915
1916 /* IPv6 strictly inhibits using not link-local
1917 addresses as nexthop address.
1918 Otherwise, router will not able to send redirects.
1919 It is very good, but in some (rare!) circumstances
1920 (SIT, PtP, NBMA NOARP links) it is handy to allow
1921 some exceptions. --ANK
1922 */
38308473 1923 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1924 goto out;
1925
5578689a 1926 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1927
1928 err = -EHOSTUNREACH;
38308473 1929 if (!grt)
1da177e4
LT
1930 goto out;
1931 if (dev) {
d1918542 1932 if (dev != grt->dst.dev) {
94e187c0 1933 ip6_rt_put(grt);
1da177e4
LT
1934 goto out;
1935 }
1936 } else {
d1918542 1937 dev = grt->dst.dev;
1da177e4
LT
1938 idev = grt->rt6i_idev;
1939 dev_hold(dev);
1940 in6_dev_hold(grt->rt6i_idev);
1941 }
38308473 1942 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1943 err = 0;
94e187c0 1944 ip6_rt_put(grt);
1da177e4
LT
1945
1946 if (err)
1947 goto out;
1948 }
1949 err = -EINVAL;
38308473 1950 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1951 goto out;
1952 }
1953
1954 err = -ENODEV;
38308473 1955 if (!dev)
1da177e4
LT
1956 goto out;
1957
c3968a85
DW
1958 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1959 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1960 err = -EINVAL;
1961 goto out;
1962 }
4e3fd7a0 1963 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1964 rt->rt6i_prefsrc.plen = 128;
1965 } else
1966 rt->rt6i_prefsrc.plen = 0;
1967
86872cb5 1968 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1969
1970install_route:
d8d1f30b 1971 rt->dst.dev = dev;
1da177e4 1972 rt->rt6i_idev = idev;
c71099ac 1973 rt->rt6i_table = table;
63152fc0 1974
c346dca1 1975 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1976
e715b6d3
FW
1977 err = ip6_convert_metrics(&mxc, cfg);
1978 if (err)
1979 goto out;
1da177e4 1980
e715b6d3
FW
1981 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
1982
1983 kfree(mxc.mx);
1984 return err;
1da177e4
LT
1985out:
1986 if (dev)
1987 dev_put(dev);
1988 if (idev)
1989 in6_dev_put(idev);
1990 if (rt)
d8d1f30b 1991 dst_free(&rt->dst);
1da177e4
LT
1992 return err;
1993}
1994
86872cb5 1995static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1996{
1997 int err;
c71099ac 1998 struct fib6_table *table;
d1918542 1999 struct net *net = dev_net(rt->dst.dev);
1da177e4 2000
6825a26c
G
2001 if (rt == net->ipv6.ip6_null_entry) {
2002 err = -ENOENT;
2003 goto out;
2004 }
6c813a72 2005
c71099ac
TG
2006 table = rt->rt6i_table;
2007 write_lock_bh(&table->tb6_lock);
86872cb5 2008 err = fib6_del(rt, info);
c71099ac 2009 write_unlock_bh(&table->tb6_lock);
1da177e4 2010
6825a26c 2011out:
94e187c0 2012 ip6_rt_put(rt);
1da177e4
LT
2013 return err;
2014}
2015
e0a1ad73
TG
2016int ip6_del_rt(struct rt6_info *rt)
2017{
4d1169c1 2018 struct nl_info info = {
d1918542 2019 .nl_net = dev_net(rt->dst.dev),
4d1169c1 2020 };
528c4ceb 2021 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
2022}
2023
86872cb5 2024static int ip6_route_del(struct fib6_config *cfg)
1da177e4 2025{
c71099ac 2026 struct fib6_table *table;
1da177e4
LT
2027 struct fib6_node *fn;
2028 struct rt6_info *rt;
2029 int err = -ESRCH;
2030
5578689a 2031 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 2032 if (!table)
c71099ac
TG
2033 return err;
2034
2035 read_lock_bh(&table->tb6_lock);
1da177e4 2036
c71099ac 2037 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
2038 &cfg->fc_dst, cfg->fc_dst_len,
2039 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 2040
1da177e4 2041 if (fn) {
d8d1f30b 2042 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1f56a01f
MKL
2043 if ((rt->rt6i_flags & RTF_CACHE) &&
2044 !(cfg->fc_flags & RTF_CACHE))
2045 continue;
86872cb5 2046 if (cfg->fc_ifindex &&
d1918542
DM
2047 (!rt->dst.dev ||
2048 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 2049 continue;
86872cb5
TG
2050 if (cfg->fc_flags & RTF_GATEWAY &&
2051 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 2052 continue;
86872cb5 2053 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 2054 continue;
d8d1f30b 2055 dst_hold(&rt->dst);
c71099ac 2056 read_unlock_bh(&table->tb6_lock);
1da177e4 2057
86872cb5 2058 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
2059 }
2060 }
c71099ac 2061 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2062
2063 return err;
2064}
2065
6700c270 2066static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 2067{
e8599ff4 2068 struct net *net = dev_net(skb->dev);
a6279458 2069 struct netevent_redirect netevent;
e8599ff4 2070 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
2071 struct ndisc_options ndopts;
2072 struct inet6_dev *in6_dev;
2073 struct neighbour *neigh;
71bcdba0 2074 struct rd_msg *msg;
6e157b6a
DM
2075 int optlen, on_link;
2076 u8 *lladdr;
e8599ff4 2077
29a3cad5 2078 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 2079 optlen -= sizeof(*msg);
e8599ff4
DM
2080
2081 if (optlen < 0) {
6e157b6a 2082 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
2083 return;
2084 }
2085
71bcdba0 2086 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 2087
71bcdba0 2088 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 2089 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
2090 return;
2091 }
2092
6e157b6a 2093 on_link = 0;
71bcdba0 2094 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 2095 on_link = 1;
71bcdba0 2096 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 2097 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 2098 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
2099 return;
2100 }
2101
2102 in6_dev = __in6_dev_get(skb->dev);
2103 if (!in6_dev)
2104 return;
2105 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2106 return;
2107
2108 /* RFC2461 8.1:
2109 * The IP source address of the Redirect MUST be the same as the current
2110 * first-hop router for the specified ICMP Destination Address.
2111 */
2112
71bcdba0 2113 if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
e8599ff4
DM
2114 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2115 return;
2116 }
6e157b6a
DM
2117
2118 lladdr = NULL;
e8599ff4
DM
2119 if (ndopts.nd_opts_tgt_lladdr) {
2120 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2121 skb->dev);
2122 if (!lladdr) {
2123 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2124 return;
2125 }
2126 }
2127
6e157b6a
DM
2128 rt = (struct rt6_info *) dst;
2129 if (rt == net->ipv6.ip6_null_entry) {
2130 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 2131 return;
6e157b6a 2132 }
e8599ff4 2133
6e157b6a
DM
2134 /* Redirect received -> path was valid.
2135 * Look, redirects are sent only in response to data packets,
2136 * so that this nexthop apparently is reachable. --ANK
2137 */
2138 dst_confirm(&rt->dst);
a6279458 2139
71bcdba0 2140 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
2141 if (!neigh)
2142 return;
a6279458 2143
1da177e4
LT
2144 /*
2145 * We have finally decided to accept it.
2146 */
2147
1ab1457c 2148 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
2149 NEIGH_UPDATE_F_WEAK_OVERRIDE|
2150 NEIGH_UPDATE_F_OVERRIDE|
2151 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2152 NEIGH_UPDATE_F_ISROUTER))
2153 );
2154
83a09abd 2155 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
38308473 2156 if (!nrt)
1da177e4
LT
2157 goto out;
2158
2159 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2160 if (on_link)
2161 nrt->rt6i_flags &= ~RTF_GATEWAY;
2162
4e3fd7a0 2163 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 2164
40e22e8f 2165 if (ip6_ins_rt(nrt))
1da177e4
LT
2166 goto out;
2167
d8d1f30b
CG
2168 netevent.old = &rt->dst;
2169 netevent.new = &nrt->dst;
71bcdba0 2170 netevent.daddr = &msg->dest;
60592833 2171 netevent.neigh = neigh;
8d71740c
TT
2172 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2173
38308473 2174 if (rt->rt6i_flags & RTF_CACHE) {
6e157b6a 2175 rt = (struct rt6_info *) dst_clone(&rt->dst);
e0a1ad73 2176 ip6_del_rt(rt);
1da177e4
LT
2177 }
2178
2179out:
e8599ff4 2180 neigh_release(neigh);
6e157b6a
DM
2181}
2182
1da177e4
LT
2183/*
2184 * Misc support functions
2185 */
2186
4b32b5ad
MKL
2187static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2188{
2189 BUG_ON(from->dst.from);
2190
2191 rt->rt6i_flags &= ~RTF_EXPIRES;
2192 dst_hold(&from->dst);
2193 rt->dst.from = &from->dst;
2194 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2195}
2196
83a09abd
MKL
2197static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2198{
2199 rt->dst.input = ort->dst.input;
2200 rt->dst.output = ort->dst.output;
2201 rt->rt6i_dst = ort->rt6i_dst;
2202 rt->dst.error = ort->dst.error;
2203 rt->rt6i_idev = ort->rt6i_idev;
2204 if (rt->rt6i_idev)
2205 in6_dev_hold(rt->rt6i_idev);
2206 rt->dst.lastuse = jiffies;
2207 rt->rt6i_gateway = ort->rt6i_gateway;
2208 rt->rt6i_flags = ort->rt6i_flags;
2209 rt6_set_from(rt, ort);
2210 rt->rt6i_metric = ort->rt6i_metric;
1da177e4 2211#ifdef CONFIG_IPV6_SUBTREES
83a09abd 2212 rt->rt6i_src = ort->rt6i_src;
1da177e4 2213#endif
83a09abd
MKL
2214 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2215 rt->rt6i_table = ort->rt6i_table;
61adedf3 2216 rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
1da177e4
LT
2217}
2218
70ceb4f5 2219#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 2220static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
2221 const struct in6_addr *prefix, int prefixlen,
2222 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
2223{
2224 struct fib6_node *fn;
2225 struct rt6_info *rt = NULL;
c71099ac
TG
2226 struct fib6_table *table;
2227
efa2cea0 2228 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 2229 if (!table)
c71099ac 2230 return NULL;
70ceb4f5 2231
5744dd9b 2232 read_lock_bh(&table->tb6_lock);
67ba4152 2233 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
70ceb4f5
YH
2234 if (!fn)
2235 goto out;
2236
d8d1f30b 2237 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 2238 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
2239 continue;
2240 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2241 continue;
2242 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2243 continue;
d8d1f30b 2244 dst_hold(&rt->dst);
70ceb4f5
YH
2245 break;
2246 }
2247out:
5744dd9b 2248 read_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
2249 return rt;
2250}
2251
efa2cea0 2252static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
2253 const struct in6_addr *prefix, int prefixlen,
2254 const struct in6_addr *gwaddr, int ifindex,
95c96174 2255 unsigned int pref)
70ceb4f5 2256{
86872cb5
TG
2257 struct fib6_config cfg = {
2258 .fc_table = RT6_TABLE_INFO,
238fc7ea 2259 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
2260 .fc_ifindex = ifindex,
2261 .fc_dst_len = prefixlen,
2262 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2263 RTF_UP | RTF_PREF(pref),
15e47304 2264 .fc_nlinfo.portid = 0,
efa2cea0
DL
2265 .fc_nlinfo.nlh = NULL,
2266 .fc_nlinfo.nl_net = net,
86872cb5
TG
2267 };
2268
4e3fd7a0
AD
2269 cfg.fc_dst = *prefix;
2270 cfg.fc_gateway = *gwaddr;
70ceb4f5 2271
e317da96
YH
2272 /* We should treat it as a default route if prefix length is 0. */
2273 if (!prefixlen)
86872cb5 2274 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 2275
86872cb5 2276 ip6_route_add(&cfg);
70ceb4f5 2277
efa2cea0 2278 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
2279}
2280#endif
2281
b71d1d42 2282struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 2283{
1da177e4 2284 struct rt6_info *rt;
c71099ac 2285 struct fib6_table *table;
1da177e4 2286
c346dca1 2287 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 2288 if (!table)
c71099ac 2289 return NULL;
1da177e4 2290
5744dd9b 2291 read_lock_bh(&table->tb6_lock);
67ba4152 2292 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
d1918542 2293 if (dev == rt->dst.dev &&
045927ff 2294 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
2295 ipv6_addr_equal(&rt->rt6i_gateway, addr))
2296 break;
2297 }
2298 if (rt)
d8d1f30b 2299 dst_hold(&rt->dst);
5744dd9b 2300 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2301 return rt;
2302}
2303
b71d1d42 2304struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
2305 struct net_device *dev,
2306 unsigned int pref)
1da177e4 2307{
86872cb5
TG
2308 struct fib6_config cfg = {
2309 .fc_table = RT6_TABLE_DFLT,
238fc7ea 2310 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
2311 .fc_ifindex = dev->ifindex,
2312 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2313 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
15e47304 2314 .fc_nlinfo.portid = 0,
5578689a 2315 .fc_nlinfo.nlh = NULL,
c346dca1 2316 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 2317 };
1da177e4 2318
4e3fd7a0 2319 cfg.fc_gateway = *gwaddr;
1da177e4 2320
86872cb5 2321 ip6_route_add(&cfg);
1da177e4 2322
1da177e4
LT
2323 return rt6_get_dflt_router(gwaddr, dev);
2324}
2325
7b4da532 2326void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
2327{
2328 struct rt6_info *rt;
c71099ac
TG
2329 struct fib6_table *table;
2330
2331 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 2332 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 2333 if (!table)
c71099ac 2334 return;
1da177e4
LT
2335
2336restart:
c71099ac 2337 read_lock_bh(&table->tb6_lock);
d8d1f30b 2338 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
3e8b0ac3
LC
2339 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2340 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
d8d1f30b 2341 dst_hold(&rt->dst);
c71099ac 2342 read_unlock_bh(&table->tb6_lock);
e0a1ad73 2343 ip6_del_rt(rt);
1da177e4
LT
2344 goto restart;
2345 }
2346 }
c71099ac 2347 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2348}
2349
5578689a
DL
2350static void rtmsg_to_fib6_config(struct net *net,
2351 struct in6_rtmsg *rtmsg,
86872cb5
TG
2352 struct fib6_config *cfg)
2353{
2354 memset(cfg, 0, sizeof(*cfg));
2355
2356 cfg->fc_table = RT6_TABLE_MAIN;
2357 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2358 cfg->fc_metric = rtmsg->rtmsg_metric;
2359 cfg->fc_expires = rtmsg->rtmsg_info;
2360 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2361 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2362 cfg->fc_flags = rtmsg->rtmsg_flags;
2363
5578689a 2364 cfg->fc_nlinfo.nl_net = net;
f1243c2d 2365
4e3fd7a0
AD
2366 cfg->fc_dst = rtmsg->rtmsg_dst;
2367 cfg->fc_src = rtmsg->rtmsg_src;
2368 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
2369}
2370
5578689a 2371int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 2372{
86872cb5 2373 struct fib6_config cfg;
1da177e4
LT
2374 struct in6_rtmsg rtmsg;
2375 int err;
2376
67ba4152 2377 switch (cmd) {
1da177e4
LT
2378 case SIOCADDRT: /* Add a route */
2379 case SIOCDELRT: /* Delete a route */
af31f412 2380 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
2381 return -EPERM;
2382 err = copy_from_user(&rtmsg, arg,
2383 sizeof(struct in6_rtmsg));
2384 if (err)
2385 return -EFAULT;
86872cb5 2386
5578689a 2387 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 2388
1da177e4
LT
2389 rtnl_lock();
2390 switch (cmd) {
2391 case SIOCADDRT:
86872cb5 2392 err = ip6_route_add(&cfg);
1da177e4
LT
2393 break;
2394 case SIOCDELRT:
86872cb5 2395 err = ip6_route_del(&cfg);
1da177e4
LT
2396 break;
2397 default:
2398 err = -EINVAL;
2399 }
2400 rtnl_unlock();
2401
2402 return err;
3ff50b79 2403 }
1da177e4
LT
2404
2405 return -EINVAL;
2406}
2407
2408/*
2409 * Drop the packet on the floor
2410 */
2411
d5fdd6ba 2412static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2413{
612f09e8 2414 int type;
adf30907 2415 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2416 switch (ipstats_mib_noroutes) {
2417 case IPSTATS_MIB_INNOROUTES:
0660e03f 2418 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2419 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2420 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2421 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2422 break;
2423 }
2424 /* FALLTHROUGH */
2425 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2426 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2427 ipstats_mib_noroutes);
612f09e8
YH
2428 break;
2429 }
3ffe533c 2430 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2431 kfree_skb(skb);
2432 return 0;
2433}
2434
9ce8ade0
TG
2435static int ip6_pkt_discard(struct sk_buff *skb)
2436{
612f09e8 2437 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2438}
2439
aad88724 2440static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
1da177e4 2441{
adf30907 2442 skb->dev = skb_dst(skb)->dev;
612f09e8 2443 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2444}
2445
9ce8ade0
TG
2446static int ip6_pkt_prohibit(struct sk_buff *skb)
2447{
612f09e8 2448 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2449}
2450
aad88724 2451static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
9ce8ade0 2452{
adf30907 2453 skb->dev = skb_dst(skb)->dev;
612f09e8 2454 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2455}
2456
1da177e4
LT
2457/*
2458 * Allocate a dst for local (unicast / anycast) address.
2459 */
2460
2461struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2462 const struct in6_addr *addr,
8f031519 2463 bool anycast)
1da177e4 2464{
c346dca1 2465 struct net *net = dev_net(idev->dev);
a3300ef4 2466 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
ad706862 2467 DST_NOCOUNT);
a3300ef4 2468 if (!rt)
1da177e4
LT
2469 return ERR_PTR(-ENOMEM);
2470
1da177e4
LT
2471 in6_dev_hold(idev);
2472
11d53b49 2473 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2474 rt->dst.input = ip6_input;
2475 rt->dst.output = ip6_output;
1da177e4 2476 rt->rt6i_idev = idev;
1da177e4
LT
2477
2478 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2479 if (anycast)
2480 rt->rt6i_flags |= RTF_ANYCAST;
2481 else
1da177e4 2482 rt->rt6i_flags |= RTF_LOCAL;
1da177e4 2483
550bab42 2484 rt->rt6i_gateway = *addr;
4e3fd7a0 2485 rt->rt6i_dst.addr = *addr;
1da177e4 2486 rt->rt6i_dst.plen = 128;
5578689a 2487 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2488
d8d1f30b 2489 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2490
2491 return rt;
2492}
2493
c3968a85
DW
2494int ip6_route_get_saddr(struct net *net,
2495 struct rt6_info *rt,
b71d1d42 2496 const struct in6_addr *daddr,
c3968a85
DW
2497 unsigned int prefs,
2498 struct in6_addr *saddr)
2499{
e16e888b
MS
2500 struct inet6_dev *idev =
2501 rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
c3968a85 2502 int err = 0;
e16e888b 2503 if (rt && rt->rt6i_prefsrc.plen)
4e3fd7a0 2504 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2505 else
2506 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2507 daddr, prefs, saddr);
2508 return err;
2509}
2510
2511/* remove deleted ip from prefsrc entries */
2512struct arg_dev_net_ip {
2513 struct net_device *dev;
2514 struct net *net;
2515 struct in6_addr *addr;
2516};
2517
2518static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2519{
2520 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2521 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2522 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2523
d1918542 2524 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2525 rt != net->ipv6.ip6_null_entry &&
2526 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2527 /* remove prefsrc entry */
2528 rt->rt6i_prefsrc.plen = 0;
2529 }
2530 return 0;
2531}
2532
2533void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2534{
2535 struct net *net = dev_net(ifp->idev->dev);
2536 struct arg_dev_net_ip adni = {
2537 .dev = ifp->idev->dev,
2538 .net = net,
2539 .addr = &ifp->addr,
2540 };
0c3584d5 2541 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
2542}
2543
be7a010d
DJ
2544#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2545#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
2546
2547/* Remove routers and update dst entries when gateway turn into host. */
2548static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2549{
2550 struct in6_addr *gateway = (struct in6_addr *)arg;
2551
2552 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2553 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2554 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2555 return -1;
2556 }
2557 return 0;
2558}
2559
2560void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2561{
2562 fib6_clean_all(net, fib6_clean_tohost, gateway);
2563}
2564
8ed67789
DL
2565struct arg_dev_net {
2566 struct net_device *dev;
2567 struct net *net;
2568};
2569
1da177e4
LT
2570static int fib6_ifdown(struct rt6_info *rt, void *arg)
2571{
bc3ef660 2572 const struct arg_dev_net *adn = arg;
2573 const struct net_device *dev = adn->dev;
8ed67789 2574
d1918542 2575 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2576 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2577 return -1;
c159d30c 2578
1da177e4
LT
2579 return 0;
2580}
2581
f3db4851 2582void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2583{
8ed67789
DL
2584 struct arg_dev_net adn = {
2585 .dev = dev,
2586 .net = net,
2587 };
2588
0c3584d5 2589 fib6_clean_all(net, fib6_ifdown, &adn);
1e493d19 2590 icmp6_clean_all(fib6_ifdown, &adn);
8d0b94af 2591 rt6_uncached_list_flush_dev(net, dev);
1da177e4
LT
2592}
2593
95c96174 2594struct rt6_mtu_change_arg {
1da177e4 2595 struct net_device *dev;
95c96174 2596 unsigned int mtu;
1da177e4
LT
2597};
2598
2599static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2600{
2601 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2602 struct inet6_dev *idev;
2603
2604 /* In IPv6 pmtu discovery is not optional,
2605 so that RTAX_MTU lock cannot disable it.
2606 We still use this lock to block changes
2607 caused by addrconf/ndisc.
2608 */
2609
2610 idev = __in6_dev_get(arg->dev);
38308473 2611 if (!idev)
1da177e4
LT
2612 return 0;
2613
2614 /* For administrative MTU increase, there is no way to discover
2615 IPv6 PMTU increase, so PMTU increase should be updated here.
2616 Since RFC 1981 doesn't include administrative MTU increase
2617 update PMTU increase is a MUST. (i.e. jumbo frame)
2618 */
2619 /*
2620 If new MTU is less than route PMTU, this new MTU will be the
2621 lowest MTU in the path, update the route PMTU to reflect PMTU
2622 decreases; if new MTU is greater than route PMTU, and the
2623 old MTU is the lowest MTU in the path, update the route PMTU
2624 to reflect the increase. In this case if the other nodes' MTU
2625 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2626 PMTU discouvery.
2627 */
d1918542 2628 if (rt->dst.dev == arg->dev &&
4b32b5ad
MKL
2629 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2630 if (rt->rt6i_flags & RTF_CACHE) {
2631 /* For RTF_CACHE with rt6i_pmtu == 0
2632 * (i.e. a redirected route),
2633 * the metrics of its rt->dst.from has already
2634 * been updated.
2635 */
2636 if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2637 rt->rt6i_pmtu = arg->mtu;
2638 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2639 (dst_mtu(&rt->dst) < arg->mtu &&
2640 dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2641 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2642 }
566cfd8f 2643 }
1da177e4
LT
2644 return 0;
2645}
2646
95c96174 2647void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 2648{
c71099ac
TG
2649 struct rt6_mtu_change_arg arg = {
2650 .dev = dev,
2651 .mtu = mtu,
2652 };
1da177e4 2653
0c3584d5 2654 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
2655}
2656
ef7c79ed 2657static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2658 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2659 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2660 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2661 [RTA_PRIORITY] = { .type = NLA_U32 },
2662 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 2663 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 2664 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
2665 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
2666 [RTA_ENCAP] = { .type = NLA_NESTED },
86872cb5
TG
2667};
2668
2669static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2670 struct fib6_config *cfg)
1da177e4 2671{
86872cb5
TG
2672 struct rtmsg *rtm;
2673 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 2674 unsigned int pref;
86872cb5 2675 int err;
1da177e4 2676
86872cb5
TG
2677 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2678 if (err < 0)
2679 goto errout;
1da177e4 2680
86872cb5
TG
2681 err = -EINVAL;
2682 rtm = nlmsg_data(nlh);
2683 memset(cfg, 0, sizeof(*cfg));
2684
2685 cfg->fc_table = rtm->rtm_table;
2686 cfg->fc_dst_len = rtm->rtm_dst_len;
2687 cfg->fc_src_len = rtm->rtm_src_len;
2688 cfg->fc_flags = RTF_UP;
2689 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 2690 cfg->fc_type = rtm->rtm_type;
86872cb5 2691
ef2c7d7b
ND
2692 if (rtm->rtm_type == RTN_UNREACHABLE ||
2693 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
2694 rtm->rtm_type == RTN_PROHIBIT ||
2695 rtm->rtm_type == RTN_THROW)
86872cb5
TG
2696 cfg->fc_flags |= RTF_REJECT;
2697
ab79ad14
2698 if (rtm->rtm_type == RTN_LOCAL)
2699 cfg->fc_flags |= RTF_LOCAL;
2700
1f56a01f
MKL
2701 if (rtm->rtm_flags & RTM_F_CLONED)
2702 cfg->fc_flags |= RTF_CACHE;
2703
15e47304 2704 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 2705 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2706 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2707
2708 if (tb[RTA_GATEWAY]) {
67b61f6c 2709 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 2710 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2711 }
86872cb5
TG
2712
2713 if (tb[RTA_DST]) {
2714 int plen = (rtm->rtm_dst_len + 7) >> 3;
2715
2716 if (nla_len(tb[RTA_DST]) < plen)
2717 goto errout;
2718
2719 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2720 }
86872cb5
TG
2721
2722 if (tb[RTA_SRC]) {
2723 int plen = (rtm->rtm_src_len + 7) >> 3;
2724
2725 if (nla_len(tb[RTA_SRC]) < plen)
2726 goto errout;
2727
2728 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2729 }
86872cb5 2730
c3968a85 2731 if (tb[RTA_PREFSRC])
67b61f6c 2732 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 2733
86872cb5
TG
2734 if (tb[RTA_OIF])
2735 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2736
2737 if (tb[RTA_PRIORITY])
2738 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2739
2740 if (tb[RTA_METRICS]) {
2741 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2742 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2743 }
86872cb5
TG
2744
2745 if (tb[RTA_TABLE])
2746 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2747
51ebd318
ND
2748 if (tb[RTA_MULTIPATH]) {
2749 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2750 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2751 }
2752
c78ba6d6
LR
2753 if (tb[RTA_PREF]) {
2754 pref = nla_get_u8(tb[RTA_PREF]);
2755 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2756 pref != ICMPV6_ROUTER_PREF_HIGH)
2757 pref = ICMPV6_ROUTER_PREF_MEDIUM;
2758 cfg->fc_flags |= RTF_PREF(pref);
2759 }
2760
19e42e45
RP
2761 if (tb[RTA_ENCAP])
2762 cfg->fc_encap = tb[RTA_ENCAP];
2763
2764 if (tb[RTA_ENCAP_TYPE])
2765 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2766
86872cb5
TG
2767 err = 0;
2768errout:
2769 return err;
1da177e4
LT
2770}
2771
51ebd318
ND
2772static int ip6_route_multipath(struct fib6_config *cfg, int add)
2773{
2774 struct fib6_config r_cfg;
2775 struct rtnexthop *rtnh;
2776 int remaining;
2777 int attrlen;
2778 int err = 0, last_err = 0;
2779
35f1b4e9 2780 remaining = cfg->fc_mp_len;
51ebd318
ND
2781beginning:
2782 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318
ND
2783
2784 /* Parse a Multipath Entry */
2785 while (rtnh_ok(rtnh, remaining)) {
2786 memcpy(&r_cfg, cfg, sizeof(*cfg));
2787 if (rtnh->rtnh_ifindex)
2788 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2789
2790 attrlen = rtnh_attrlen(rtnh);
2791 if (attrlen > 0) {
2792 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2793
2794 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2795 if (nla) {
67b61f6c 2796 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
2797 r_cfg.fc_flags |= RTF_GATEWAY;
2798 }
19e42e45
RP
2799 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2800 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
2801 if (nla)
2802 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318
ND
2803 }
2804 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2805 if (err) {
2806 last_err = err;
2807 /* If we are trying to remove a route, do not stop the
2808 * loop when ip6_route_del() fails (because next hop is
2809 * already gone), we should try to remove all next hops.
2810 */
2811 if (add) {
2812 /* If add fails, we should try to delete all
2813 * next hops that have been already added.
2814 */
2815 add = 0;
35f1b4e9 2816 remaining = cfg->fc_mp_len - remaining;
51ebd318
ND
2817 goto beginning;
2818 }
2819 }
1a72418b 2820 /* Because each route is added like a single route we remove
27596472
MK
2821 * these flags after the first nexthop: if there is a collision,
2822 * we have already failed to add the first nexthop:
2823 * fib6_add_rt2node() has rejected it; when replacing, old
2824 * nexthops have been replaced by first new, the rest should
2825 * be added to it.
1a72418b 2826 */
27596472
MK
2827 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2828 NLM_F_REPLACE);
51ebd318
ND
2829 rtnh = rtnh_next(rtnh, &remaining);
2830 }
2831
2832 return last_err;
2833}
2834
67ba4152 2835static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
1da177e4 2836{
86872cb5
TG
2837 struct fib6_config cfg;
2838 int err;
1da177e4 2839
86872cb5
TG
2840 err = rtm_to_fib6_config(skb, nlh, &cfg);
2841 if (err < 0)
2842 return err;
2843
51ebd318
ND
2844 if (cfg.fc_mp)
2845 return ip6_route_multipath(&cfg, 0);
2846 else
2847 return ip6_route_del(&cfg);
1da177e4
LT
2848}
2849
67ba4152 2850static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
1da177e4 2851{
86872cb5
TG
2852 struct fib6_config cfg;
2853 int err;
1da177e4 2854
86872cb5
TG
2855 err = rtm_to_fib6_config(skb, nlh, &cfg);
2856 if (err < 0)
2857 return err;
2858
51ebd318
ND
2859 if (cfg.fc_mp)
2860 return ip6_route_multipath(&cfg, 1);
2861 else
2862 return ip6_route_add(&cfg);
1da177e4
LT
2863}
2864
19e42e45 2865static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
339bf98f
TG
2866{
2867 return NLMSG_ALIGN(sizeof(struct rtmsg))
2868 + nla_total_size(16) /* RTA_SRC */
2869 + nla_total_size(16) /* RTA_DST */
2870 + nla_total_size(16) /* RTA_GATEWAY */
2871 + nla_total_size(16) /* RTA_PREFSRC */
2872 + nla_total_size(4) /* RTA_TABLE */
2873 + nla_total_size(4) /* RTA_IIF */
2874 + nla_total_size(4) /* RTA_OIF */
2875 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2876 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 2877 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 2878 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 2879 + nla_total_size(1) /* RTA_PREF */
61adedf3 2880 + lwtunnel_get_encap_size(rt->dst.lwtstate);
339bf98f
TG
2881}
2882
191cd582
BH
2883static int rt6_fill_node(struct net *net,
2884 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80 2885 struct in6_addr *dst, struct in6_addr *src,
15e47304 2886 int iif, int type, u32 portid, u32 seq,
7bc570c8 2887 int prefix, int nowait, unsigned int flags)
1da177e4 2888{
4b32b5ad 2889 u32 metrics[RTAX_MAX];
1da177e4 2890 struct rtmsg *rtm;
2d7202bf 2891 struct nlmsghdr *nlh;
e3703b3d 2892 long expires;
9e762a4a 2893 u32 table;
1da177e4
LT
2894
2895 if (prefix) { /* user wants prefix routes only */
2896 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2897 /* success since this is not a prefix route */
2898 return 1;
2899 }
2900 }
2901
15e47304 2902 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 2903 if (!nlh)
26932566 2904 return -EMSGSIZE;
2d7202bf
TG
2905
2906 rtm = nlmsg_data(nlh);
1da177e4
LT
2907 rtm->rtm_family = AF_INET6;
2908 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2909 rtm->rtm_src_len = rt->rt6i_src.plen;
2910 rtm->rtm_tos = 0;
c71099ac 2911 if (rt->rt6i_table)
9e762a4a 2912 table = rt->rt6i_table->tb6_id;
c71099ac 2913 else
9e762a4a
PM
2914 table = RT6_TABLE_UNSPEC;
2915 rtm->rtm_table = table;
c78679e8
DM
2916 if (nla_put_u32(skb, RTA_TABLE, table))
2917 goto nla_put_failure;
ef2c7d7b
ND
2918 if (rt->rt6i_flags & RTF_REJECT) {
2919 switch (rt->dst.error) {
2920 case -EINVAL:
2921 rtm->rtm_type = RTN_BLACKHOLE;
2922 break;
2923 case -EACCES:
2924 rtm->rtm_type = RTN_PROHIBIT;
2925 break;
b4949ab2
ND
2926 case -EAGAIN:
2927 rtm->rtm_type = RTN_THROW;
2928 break;
ef2c7d7b
ND
2929 default:
2930 rtm->rtm_type = RTN_UNREACHABLE;
2931 break;
2932 }
2933 }
38308473 2934 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 2935 rtm->rtm_type = RTN_LOCAL;
d1918542 2936 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
2937 rtm->rtm_type = RTN_LOCAL;
2938 else
2939 rtm->rtm_type = RTN_UNICAST;
2940 rtm->rtm_flags = 0;
35103d11 2941 if (!netif_carrier_ok(rt->dst.dev)) {
cea45e20 2942 rtm->rtm_flags |= RTNH_F_LINKDOWN;
35103d11
AG
2943 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
2944 rtm->rtm_flags |= RTNH_F_DEAD;
2945 }
1da177e4
LT
2946 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2947 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 2948 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4 2949 rtm->rtm_protocol = RTPROT_REDIRECT;
f0396f60
DO
2950 else if (rt->rt6i_flags & RTF_ADDRCONF) {
2951 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2952 rtm->rtm_protocol = RTPROT_RA;
2953 else
2954 rtm->rtm_protocol = RTPROT_KERNEL;
2955 }
1da177e4 2956
38308473 2957 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
2958 rtm->rtm_flags |= RTM_F_CLONED;
2959
2960 if (dst) {
930345ea 2961 if (nla_put_in6_addr(skb, RTA_DST, dst))
c78679e8 2962 goto nla_put_failure;
1ab1457c 2963 rtm->rtm_dst_len = 128;
1da177e4 2964 } else if (rtm->rtm_dst_len)
930345ea 2965 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
c78679e8 2966 goto nla_put_failure;
1da177e4
LT
2967#ifdef CONFIG_IPV6_SUBTREES
2968 if (src) {
930345ea 2969 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 2970 goto nla_put_failure;
1ab1457c 2971 rtm->rtm_src_len = 128;
c78679e8 2972 } else if (rtm->rtm_src_len &&
930345ea 2973 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
c78679e8 2974 goto nla_put_failure;
1da177e4 2975#endif
7bc570c8
YH
2976 if (iif) {
2977#ifdef CONFIG_IPV6_MROUTE
2978 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2979 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2980 if (err <= 0) {
2981 if (!nowait) {
2982 if (err == 0)
2983 return 0;
2984 goto nla_put_failure;
2985 } else {
2986 if (err == -EMSGSIZE)
2987 goto nla_put_failure;
2988 }
2989 }
2990 } else
2991#endif
c78679e8
DM
2992 if (nla_put_u32(skb, RTA_IIF, iif))
2993 goto nla_put_failure;
7bc570c8 2994 } else if (dst) {
1da177e4 2995 struct in6_addr saddr_buf;
c78679e8 2996 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
930345ea 2997 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 2998 goto nla_put_failure;
1da177e4 2999 }
2d7202bf 3000
c3968a85
DW
3001 if (rt->rt6i_prefsrc.plen) {
3002 struct in6_addr saddr_buf;
4e3fd7a0 3003 saddr_buf = rt->rt6i_prefsrc.addr;
930345ea 3004 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 3005 goto nla_put_failure;
c3968a85
DW
3006 }
3007
4b32b5ad
MKL
3008 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3009 if (rt->rt6i_pmtu)
3010 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3011 if (rtnetlink_put_metrics(skb, metrics) < 0)
2d7202bf
TG
3012 goto nla_put_failure;
3013
dd0cbf29 3014 if (rt->rt6i_flags & RTF_GATEWAY) {
930345ea 3015 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
94f826b8 3016 goto nla_put_failure;
94f826b8 3017 }
2d7202bf 3018
c78679e8
DM
3019 if (rt->dst.dev &&
3020 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3021 goto nla_put_failure;
3022 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3023 goto nla_put_failure;
8253947e
LW
3024
3025 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
69cdf8f9 3026
87a50699 3027 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 3028 goto nla_put_failure;
2d7202bf 3029
c78ba6d6
LR
3030 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3031 goto nla_put_failure;
3032
61adedf3 3033 lwtunnel_fill_encap(skb, rt->dst.lwtstate);
19e42e45 3034
053c095a
JB
3035 nlmsg_end(skb, nlh);
3036 return 0;
2d7202bf
TG
3037
3038nla_put_failure:
26932566
PM
3039 nlmsg_cancel(skb, nlh);
3040 return -EMSGSIZE;
1da177e4
LT
3041}
3042
1b43af54 3043int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
3044{
3045 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3046 int prefix;
3047
2d7202bf
TG
3048 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3049 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
3050 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3051 } else
3052 prefix = 0;
3053
191cd582
BH
3054 return rt6_fill_node(arg->net,
3055 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
15e47304 3056 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
7bc570c8 3057 prefix, 0, NLM_F_MULTI);
1da177e4
LT
3058}
3059
67ba4152 3060static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
1da177e4 3061{
3b1e0a65 3062 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
3063 struct nlattr *tb[RTA_MAX+1];
3064 struct rt6_info *rt;
1da177e4 3065 struct sk_buff *skb;
ab364a6f 3066 struct rtmsg *rtm;
4c9483b2 3067 struct flowi6 fl6;
72331bc0 3068 int err, iif = 0, oif = 0;
1da177e4 3069
ab364a6f
TG
3070 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3071 if (err < 0)
3072 goto errout;
1da177e4 3073
ab364a6f 3074 err = -EINVAL;
4c9483b2 3075 memset(&fl6, 0, sizeof(fl6));
1da177e4 3076
ab364a6f
TG
3077 if (tb[RTA_SRC]) {
3078 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3079 goto errout;
3080
4e3fd7a0 3081 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
3082 }
3083
3084 if (tb[RTA_DST]) {
3085 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3086 goto errout;
3087
4e3fd7a0 3088 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
3089 }
3090
3091 if (tb[RTA_IIF])
3092 iif = nla_get_u32(tb[RTA_IIF]);
3093
3094 if (tb[RTA_OIF])
72331bc0 3095 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 3096
2e47b291
LC
3097 if (tb[RTA_MARK])
3098 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3099
1da177e4
LT
3100 if (iif) {
3101 struct net_device *dev;
72331bc0
SL
3102 int flags = 0;
3103
5578689a 3104 dev = __dev_get_by_index(net, iif);
1da177e4
LT
3105 if (!dev) {
3106 err = -ENODEV;
ab364a6f 3107 goto errout;
1da177e4 3108 }
72331bc0
SL
3109
3110 fl6.flowi6_iif = iif;
3111
3112 if (!ipv6_addr_any(&fl6.saddr))
3113 flags |= RT6_LOOKUP_F_HAS_SADDR;
3114
3115 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3116 flags);
3117 } else {
3118 fl6.flowi6_oif = oif;
3119
3120 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
1da177e4
LT
3121 }
3122
ab364a6f 3123 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 3124 if (!skb) {
94e187c0 3125 ip6_rt_put(rt);
ab364a6f
TG
3126 err = -ENOBUFS;
3127 goto errout;
3128 }
1da177e4 3129
ab364a6f
TG
3130 /* Reserve room for dummy headers, this skb can pass
3131 through good chunk of routing engine.
3132 */
459a98ed 3133 skb_reset_mac_header(skb);
ab364a6f 3134 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 3135
d8d1f30b 3136 skb_dst_set(skb, &rt->dst);
1da177e4 3137
4c9483b2 3138 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
15e47304 3139 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
7bc570c8 3140 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 3141 if (err < 0) {
ab364a6f
TG
3142 kfree_skb(skb);
3143 goto errout;
1da177e4
LT
3144 }
3145
15e47304 3146 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 3147errout:
1da177e4 3148 return err;
1da177e4
LT
3149}
3150
86872cb5 3151void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
3152{
3153 struct sk_buff *skb;
5578689a 3154 struct net *net = info->nl_net;
528c4ceb
DL
3155 u32 seq;
3156 int err;
3157
3158 err = -ENOBUFS;
38308473 3159 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 3160
19e42e45 3161 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 3162 if (!skb)
21713ebc
TG
3163 goto errout;
3164
191cd582 3165 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
15e47304 3166 event, info->portid, seq, 0, 0, 0);
26932566
PM
3167 if (err < 0) {
3168 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3169 WARN_ON(err == -EMSGSIZE);
3170 kfree_skb(skb);
3171 goto errout;
3172 }
15e47304 3173 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
3174 info->nlh, gfp_any());
3175 return;
21713ebc
TG
3176errout:
3177 if (err < 0)
5578689a 3178 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
3179}
3180
8ed67789 3181static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 3182 unsigned long event, void *ptr)
8ed67789 3183{
351638e7 3184 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 3185 struct net *net = dev_net(dev);
8ed67789
DL
3186
3187 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 3188 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
3189 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3190#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3191 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 3192 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 3193 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
3194 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3195#endif
3196 }
3197
3198 return NOTIFY_OK;
3199}
3200
1da177e4
LT
3201/*
3202 * /proc
3203 */
3204
3205#ifdef CONFIG_PROC_FS
3206
33120b30
AD
3207static const struct file_operations ipv6_route_proc_fops = {
3208 .owner = THIS_MODULE,
3209 .open = ipv6_route_open,
3210 .read = seq_read,
3211 .llseek = seq_lseek,
8d2ca1d7 3212 .release = seq_release_net,
33120b30
AD
3213};
3214
1da177e4
LT
3215static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3216{
69ddb805 3217 struct net *net = (struct net *)seq->private;
1da177e4 3218 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
3219 net->ipv6.rt6_stats->fib_nodes,
3220 net->ipv6.rt6_stats->fib_route_nodes,
3221 net->ipv6.rt6_stats->fib_rt_alloc,
3222 net->ipv6.rt6_stats->fib_rt_entries,
3223 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 3224 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 3225 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
3226
3227 return 0;
3228}
3229
3230static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3231{
de05c557 3232 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
3233}
3234
9a32144e 3235static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
3236 .owner = THIS_MODULE,
3237 .open = rt6_stats_seq_open,
3238 .read = seq_read,
3239 .llseek = seq_lseek,
b6fcbdb4 3240 .release = single_release_net,
1da177e4
LT
3241};
3242#endif /* CONFIG_PROC_FS */
3243
3244#ifdef CONFIG_SYSCTL
3245
1da177e4 3246static
fe2c6338 3247int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
3248 void __user *buffer, size_t *lenp, loff_t *ppos)
3249{
c486da34
LAG
3250 struct net *net;
3251 int delay;
3252 if (!write)
1da177e4 3253 return -EINVAL;
c486da34
LAG
3254
3255 net = (struct net *)ctl->extra1;
3256 delay = net->ipv6.sysctl.flush_delay;
3257 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 3258 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 3259 return 0;
1da177e4
LT
3260}
3261
fe2c6338 3262struct ctl_table ipv6_route_table_template[] = {
1ab1457c 3263 {
1da177e4 3264 .procname = "flush",
4990509f 3265 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 3266 .maxlen = sizeof(int),
89c8b3a1 3267 .mode = 0200,
6d9f239a 3268 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
3269 },
3270 {
1da177e4 3271 .procname = "gc_thresh",
9a7ec3a9 3272 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
3273 .maxlen = sizeof(int),
3274 .mode = 0644,
6d9f239a 3275 .proc_handler = proc_dointvec,
1da177e4
LT
3276 },
3277 {
1da177e4 3278 .procname = "max_size",
4990509f 3279 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
3280 .maxlen = sizeof(int),
3281 .mode = 0644,
6d9f239a 3282 .proc_handler = proc_dointvec,
1da177e4
LT
3283 },
3284 {
1da177e4 3285 .procname = "gc_min_interval",
4990509f 3286 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
3287 .maxlen = sizeof(int),
3288 .mode = 0644,
6d9f239a 3289 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3290 },
3291 {
1da177e4 3292 .procname = "gc_timeout",
4990509f 3293 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
3294 .maxlen = sizeof(int),
3295 .mode = 0644,
6d9f239a 3296 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3297 },
3298 {
1da177e4 3299 .procname = "gc_interval",
4990509f 3300 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
3301 .maxlen = sizeof(int),
3302 .mode = 0644,
6d9f239a 3303 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3304 },
3305 {
1da177e4 3306 .procname = "gc_elasticity",
4990509f 3307 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
3308 .maxlen = sizeof(int),
3309 .mode = 0644,
f3d3f616 3310 .proc_handler = proc_dointvec,
1da177e4
LT
3311 },
3312 {
1da177e4 3313 .procname = "mtu_expires",
4990509f 3314 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
3315 .maxlen = sizeof(int),
3316 .mode = 0644,
6d9f239a 3317 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3318 },
3319 {
1da177e4 3320 .procname = "min_adv_mss",
4990509f 3321 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
3322 .maxlen = sizeof(int),
3323 .mode = 0644,
f3d3f616 3324 .proc_handler = proc_dointvec,
1da177e4
LT
3325 },
3326 {
1da177e4 3327 .procname = "gc_min_interval_ms",
4990509f 3328 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
3329 .maxlen = sizeof(int),
3330 .mode = 0644,
6d9f239a 3331 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 3332 },
f8572d8f 3333 { }
1da177e4
LT
3334};
3335
2c8c1e72 3336struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
3337{
3338 struct ctl_table *table;
3339
3340 table = kmemdup(ipv6_route_table_template,
3341 sizeof(ipv6_route_table_template),
3342 GFP_KERNEL);
5ee09105
YH
3343
3344 if (table) {
3345 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 3346 table[0].extra1 = net;
86393e52 3347 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
3348 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3349 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3350 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3351 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3352 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3353 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3354 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 3355 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
3356
3357 /* Don't export sysctls to unprivileged users */
3358 if (net->user_ns != &init_user_ns)
3359 table[0].procname = NULL;
5ee09105
YH
3360 }
3361
760f2d01
DL
3362 return table;
3363}
1da177e4
LT
3364#endif
3365
2c8c1e72 3366static int __net_init ip6_route_net_init(struct net *net)
cdb18761 3367{
633d424b 3368 int ret = -ENOMEM;
8ed67789 3369
86393e52
AD
3370 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3371 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 3372
fc66f95c
ED
3373 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3374 goto out_ip6_dst_ops;
3375
8ed67789
DL
3376 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3377 sizeof(*net->ipv6.ip6_null_entry),
3378 GFP_KERNEL);
3379 if (!net->ipv6.ip6_null_entry)
fc66f95c 3380 goto out_ip6_dst_entries;
d8d1f30b 3381 net->ipv6.ip6_null_entry->dst.path =
8ed67789 3382 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 3383 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3384 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3385 ip6_template_metrics, true);
8ed67789
DL
3386
3387#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3388 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3389 sizeof(*net->ipv6.ip6_prohibit_entry),
3390 GFP_KERNEL);
68fffc67
PZ
3391 if (!net->ipv6.ip6_prohibit_entry)
3392 goto out_ip6_null_entry;
d8d1f30b 3393 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 3394 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 3395 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3396 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3397 ip6_template_metrics, true);
8ed67789
DL
3398
3399 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3400 sizeof(*net->ipv6.ip6_blk_hole_entry),
3401 GFP_KERNEL);
68fffc67
PZ
3402 if (!net->ipv6.ip6_blk_hole_entry)
3403 goto out_ip6_prohibit_entry;
d8d1f30b 3404 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 3405 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 3406 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3407 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3408 ip6_template_metrics, true);
8ed67789
DL
3409#endif
3410
b339a47c
PZ
3411 net->ipv6.sysctl.flush_delay = 0;
3412 net->ipv6.sysctl.ip6_rt_max_size = 4096;
3413 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3414 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3415 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3416 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3417 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3418 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3419
6891a346
BT
3420 net->ipv6.ip6_rt_gc_expire = 30*HZ;
3421
8ed67789
DL
3422 ret = 0;
3423out:
3424 return ret;
f2fc6a54 3425
68fffc67
PZ
3426#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3427out_ip6_prohibit_entry:
3428 kfree(net->ipv6.ip6_prohibit_entry);
3429out_ip6_null_entry:
3430 kfree(net->ipv6.ip6_null_entry);
3431#endif
fc66f95c
ED
3432out_ip6_dst_entries:
3433 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 3434out_ip6_dst_ops:
f2fc6a54 3435 goto out;
cdb18761
DL
3436}
3437
2c8c1e72 3438static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 3439{
8ed67789
DL
3440 kfree(net->ipv6.ip6_null_entry);
3441#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3442 kfree(net->ipv6.ip6_prohibit_entry);
3443 kfree(net->ipv6.ip6_blk_hole_entry);
3444#endif
41bb78b4 3445 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
3446}
3447
d189634e
TG
3448static int __net_init ip6_route_net_init_late(struct net *net)
3449{
3450#ifdef CONFIG_PROC_FS
d4beaa66
G
3451 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3452 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
d189634e
TG
3453#endif
3454 return 0;
3455}
3456
3457static void __net_exit ip6_route_net_exit_late(struct net *net)
3458{
3459#ifdef CONFIG_PROC_FS
ece31ffd
G
3460 remove_proc_entry("ipv6_route", net->proc_net);
3461 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
3462#endif
3463}
3464
cdb18761
DL
3465static struct pernet_operations ip6_route_net_ops = {
3466 .init = ip6_route_net_init,
3467 .exit = ip6_route_net_exit,
3468};
3469
c3426b47
DM
3470static int __net_init ipv6_inetpeer_init(struct net *net)
3471{
3472 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3473
3474 if (!bp)
3475 return -ENOMEM;
3476 inet_peer_base_init(bp);
3477 net->ipv6.peers = bp;
3478 return 0;
3479}
3480
3481static void __net_exit ipv6_inetpeer_exit(struct net *net)
3482{
3483 struct inet_peer_base *bp = net->ipv6.peers;
3484
3485 net->ipv6.peers = NULL;
56a6b248 3486 inetpeer_invalidate_tree(bp);
c3426b47
DM
3487 kfree(bp);
3488}
3489
2b823f72 3490static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
3491 .init = ipv6_inetpeer_init,
3492 .exit = ipv6_inetpeer_exit,
3493};
3494
d189634e
TG
3495static struct pernet_operations ip6_route_net_late_ops = {
3496 .init = ip6_route_net_init_late,
3497 .exit = ip6_route_net_exit_late,
3498};
3499
8ed67789
DL
3500static struct notifier_block ip6_route_dev_notifier = {
3501 .notifier_call = ip6_route_dev_notify,
3502 .priority = 0,
3503};
3504
433d49c3 3505int __init ip6_route_init(void)
1da177e4 3506{
433d49c3 3507 int ret;
8d0b94af 3508 int cpu;
433d49c3 3509
9a7ec3a9
DL
3510 ret = -ENOMEM;
3511 ip6_dst_ops_template.kmem_cachep =
e5d679f3 3512 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 3513 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 3514 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 3515 goto out;
14e50e57 3516
fc66f95c 3517 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 3518 if (ret)
bdb3289f 3519 goto out_kmem_cache;
bdb3289f 3520
c3426b47
DM
3521 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3522 if (ret)
e8803b6c 3523 goto out_dst_entries;
2a0c451a 3524
7e52b33b
DM
3525 ret = register_pernet_subsys(&ip6_route_net_ops);
3526 if (ret)
3527 goto out_register_inetpeer;
c3426b47 3528
5dc121e9
AE
3529 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3530
8ed67789
DL
3531 /* Registering of the loopback is done before this portion of code,
3532 * the loopback reference in rt6_info will not be taken, do it
3533 * manually for init_net */
d8d1f30b 3534 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3535 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3536 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3537 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 3538 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 3539 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3540 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3541 #endif
e8803b6c 3542 ret = fib6_init();
433d49c3 3543 if (ret)
8ed67789 3544 goto out_register_subsys;
433d49c3 3545
433d49c3
DL
3546 ret = xfrm6_init();
3547 if (ret)
e8803b6c 3548 goto out_fib6_init;
c35b7e72 3549
433d49c3
DL
3550 ret = fib6_rules_init();
3551 if (ret)
3552 goto xfrm6_init;
7e5449c2 3553
d189634e
TG
3554 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3555 if (ret)
3556 goto fib6_rules_init;
3557
433d49c3 3558 ret = -ENOBUFS;
c7ac8679
GR
3559 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3560 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3561 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
d189634e 3562 goto out_register_late_subsys;
c127ea2c 3563
8ed67789 3564 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 3565 if (ret)
d189634e 3566 goto out_register_late_subsys;
8ed67789 3567
8d0b94af
MKL
3568 for_each_possible_cpu(cpu) {
3569 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3570
3571 INIT_LIST_HEAD(&ul->head);
3572 spin_lock_init(&ul->lock);
3573 }
3574
433d49c3
DL
3575out:
3576 return ret;
3577
d189634e
TG
3578out_register_late_subsys:
3579 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 3580fib6_rules_init:
433d49c3
DL
3581 fib6_rules_cleanup();
3582xfrm6_init:
433d49c3 3583 xfrm6_fini();
2a0c451a
TG
3584out_fib6_init:
3585 fib6_gc_cleanup();
8ed67789
DL
3586out_register_subsys:
3587 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
3588out_register_inetpeer:
3589 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
3590out_dst_entries:
3591 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3592out_kmem_cache:
f2fc6a54 3593 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3594 goto out;
1da177e4
LT
3595}
3596
3597void ip6_route_cleanup(void)
3598{
8ed67789 3599 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 3600 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 3601 fib6_rules_cleanup();
1da177e4 3602 xfrm6_fini();
1da177e4 3603 fib6_gc_cleanup();
c3426b47 3604 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 3605 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3606 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3607 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3608}
This page took 1.190296 seconds and 5 git commands to generate.