bonding: convert num_grat_arp to the new bonding option API
[deliverable/linux.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
457c4cbc 47#include <net/net_namespace.h>
1da177e4
LT
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
57#include <net/xfrm.h>
8d71740c 58#include <net/netevent.h>
21713ebc 59#include <net/netlink.h>
51ebd318 60#include <net/nexthop.h>
19e42e45 61#include <net/lwtunnel.h>
1da177e4
LT
62
63#include <asm/uaccess.h>
64
65#ifdef CONFIG_SYSCTL
66#include <linux/sysctl.h>
67#endif
68
afc154e9 69enum rt6_nud_state {
7e980569
JB
70 RT6_NUD_FAIL_HARD = -3,
71 RT6_NUD_FAIL_PROBE = -2,
72 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
73 RT6_NUD_SUCCEED = 1
74};
75
83a09abd 76static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
1da177e4 77static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 78static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 79static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
80static struct dst_entry *ip6_negative_advice(struct dst_entry *);
81static void ip6_dst_destroy(struct dst_entry *);
82static void ip6_dst_ifdown(struct dst_entry *,
83 struct net_device *dev, int how);
569d3645 84static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
85
86static int ip6_pkt_discard(struct sk_buff *skb);
aad88724 87static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
7150aede 88static int ip6_pkt_prohibit(struct sk_buff *skb);
aad88724 89static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
1da177e4 90static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
91static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
92 struct sk_buff *skb, u32 mtu);
93static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
94 struct sk_buff *skb);
4b32b5ad 95static void rt6_dst_from_metrics_check(struct rt6_info *rt);
52bd4c0c 96static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
1da177e4 97
70ceb4f5 98#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 99static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
100 const struct in6_addr *prefix, int prefixlen,
101 const struct in6_addr *gwaddr, int ifindex,
95c96174 102 unsigned int pref);
efa2cea0 103static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
104 const struct in6_addr *prefix, int prefixlen,
105 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
106#endif
107
8d0b94af
MKL
108struct uncached_list {
109 spinlock_t lock;
110 struct list_head head;
111};
112
113static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
114
115static void rt6_uncached_list_add(struct rt6_info *rt)
116{
117 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
118
119 rt->dst.flags |= DST_NOCACHE;
120 rt->rt6i_uncached_list = ul;
121
122 spin_lock_bh(&ul->lock);
123 list_add_tail(&rt->rt6i_uncached, &ul->head);
124 spin_unlock_bh(&ul->lock);
125}
126
127static void rt6_uncached_list_del(struct rt6_info *rt)
128{
129 if (!list_empty(&rt->rt6i_uncached)) {
130 struct uncached_list *ul = rt->rt6i_uncached_list;
131
132 spin_lock_bh(&ul->lock);
133 list_del(&rt->rt6i_uncached);
134 spin_unlock_bh(&ul->lock);
135 }
136}
137
138static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
139{
140 struct net_device *loopback_dev = net->loopback_dev;
141 int cpu;
142
143 for_each_possible_cpu(cpu) {
144 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
145 struct rt6_info *rt;
146
147 spin_lock_bh(&ul->lock);
148 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
149 struct inet6_dev *rt_idev = rt->rt6i_idev;
150 struct net_device *rt_dev = rt->dst.dev;
151
152 if (rt_idev && (rt_idev->dev == dev || !dev) &&
153 rt_idev->dev != loopback_dev) {
154 rt->rt6i_idev = in6_dev_get(loopback_dev);
155 in6_dev_put(rt_idev);
156 }
157
158 if (rt_dev && (rt_dev == dev || !dev) &&
159 rt_dev != loopback_dev) {
160 rt->dst.dev = loopback_dev;
161 dev_hold(rt->dst.dev);
162 dev_put(rt_dev);
163 }
164 }
165 spin_unlock_bh(&ul->lock);
166 }
167}
168
d52d3997
MKL
169static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
170{
171 return dst_metrics_write_ptr(rt->dst.from);
172}
173
06582540
DM
174static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
175{
4b32b5ad 176 struct rt6_info *rt = (struct rt6_info *)dst;
06582540 177
d52d3997
MKL
178 if (rt->rt6i_flags & RTF_PCPU)
179 return rt6_pcpu_cow_metrics(rt);
180 else if (rt->rt6i_flags & RTF_CACHE)
4b32b5ad
MKL
181 return NULL;
182 else
3b471175 183 return dst_cow_metrics_generic(dst, old);
06582540
DM
184}
185
f894cbf8
DM
186static inline const void *choose_neigh_daddr(struct rt6_info *rt,
187 struct sk_buff *skb,
188 const void *daddr)
39232973
DM
189{
190 struct in6_addr *p = &rt->rt6i_gateway;
191
a7563f34 192 if (!ipv6_addr_any(p))
39232973 193 return (const void *) p;
f894cbf8
DM
194 else if (skb)
195 return &ipv6_hdr(skb)->daddr;
39232973
DM
196 return daddr;
197}
198
f894cbf8
DM
199static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
200 struct sk_buff *skb,
201 const void *daddr)
d3aaeb38 202{
39232973
DM
203 struct rt6_info *rt = (struct rt6_info *) dst;
204 struct neighbour *n;
205
f894cbf8 206 daddr = choose_neigh_daddr(rt, skb, daddr);
8e022ee6 207 n = __ipv6_neigh_lookup(dst->dev, daddr);
f83c7790
DM
208 if (n)
209 return n;
210 return neigh_create(&nd_tbl, daddr, dst->dev);
211}
212
9a7ec3a9 213static struct dst_ops ip6_dst_ops_template = {
1da177e4 214 .family = AF_INET6,
1da177e4
LT
215 .gc = ip6_dst_gc,
216 .gc_thresh = 1024,
217 .check = ip6_dst_check,
0dbaee3b 218 .default_advmss = ip6_default_advmss,
ebb762f2 219 .mtu = ip6_mtu,
06582540 220 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
221 .destroy = ip6_dst_destroy,
222 .ifdown = ip6_dst_ifdown,
223 .negative_advice = ip6_negative_advice,
224 .link_failure = ip6_link_failure,
225 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 226 .redirect = rt6_do_redirect,
1ac06e03 227 .local_out = __ip6_local_out,
d3aaeb38 228 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
229};
230
ebb762f2 231static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 232{
618f9bc7
SK
233 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
234
235 return mtu ? : dst->dev->mtu;
ec831ea7
RD
236}
237
6700c270
DM
238static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
239 struct sk_buff *skb, u32 mtu)
14e50e57
DM
240{
241}
242
6700c270
DM
243static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
244 struct sk_buff *skb)
b587ee3b
DM
245{
246}
247
0972ddb2
HB
248static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
249 unsigned long old)
250{
251 return NULL;
252}
253
14e50e57
DM
254static struct dst_ops ip6_dst_blackhole_ops = {
255 .family = AF_INET6,
14e50e57
DM
256 .destroy = ip6_dst_destroy,
257 .check = ip6_dst_check,
ebb762f2 258 .mtu = ip6_blackhole_mtu,
214f45c9 259 .default_advmss = ip6_default_advmss,
14e50e57 260 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 261 .redirect = ip6_rt_blackhole_redirect,
0972ddb2 262 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 263 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
264};
265
62fa8a84 266static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 267 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
268};
269
fb0af4c7 270static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
271 .dst = {
272 .__refcnt = ATOMIC_INIT(1),
273 .__use = 1,
2c20cbd7 274 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 275 .error = -ENETUNREACH,
d8d1f30b
CG
276 .input = ip6_pkt_discard,
277 .output = ip6_pkt_discard_out,
1da177e4
LT
278 },
279 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 280 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
281 .rt6i_metric = ~(u32) 0,
282 .rt6i_ref = ATOMIC_INIT(1),
283};
284
101367c2
TG
285#ifdef CONFIG_IPV6_MULTIPLE_TABLES
286
fb0af4c7 287static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
288 .dst = {
289 .__refcnt = ATOMIC_INIT(1),
290 .__use = 1,
2c20cbd7 291 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 292 .error = -EACCES,
d8d1f30b
CG
293 .input = ip6_pkt_prohibit,
294 .output = ip6_pkt_prohibit_out,
101367c2
TG
295 },
296 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 297 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
298 .rt6i_metric = ~(u32) 0,
299 .rt6i_ref = ATOMIC_INIT(1),
300};
301
fb0af4c7 302static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
303 .dst = {
304 .__refcnt = ATOMIC_INIT(1),
305 .__use = 1,
2c20cbd7 306 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 307 .error = -EINVAL,
d8d1f30b 308 .input = dst_discard,
aad88724 309 .output = dst_discard_sk,
101367c2
TG
310 },
311 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 312 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
313 .rt6i_metric = ~(u32) 0,
314 .rt6i_ref = ATOMIC_INIT(1),
315};
316
317#endif
318
1da177e4 319/* allocate dst with ip6_dst_ops */
d52d3997
MKL
320static struct rt6_info *__ip6_dst_alloc(struct net *net,
321 struct net_device *dev,
322 int flags,
323 struct fib6_table *table)
1da177e4 324{
97bab73f 325 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
6f3118b5 326 0, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 327
97bab73f 328 if (rt) {
8104891b
SK
329 struct dst_entry *dst = &rt->dst;
330
331 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
51ebd318 332 INIT_LIST_HEAD(&rt->rt6i_siblings);
8d0b94af 333 INIT_LIST_HEAD(&rt->rt6i_uncached);
97bab73f 334 }
cf911662 335 return rt;
1da177e4
LT
336}
337
d52d3997
MKL
338static struct rt6_info *ip6_dst_alloc(struct net *net,
339 struct net_device *dev,
340 int flags,
341 struct fib6_table *table)
342{
343 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags, table);
344
345 if (rt) {
346 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
347 if (rt->rt6i_pcpu) {
348 int cpu;
349
350 for_each_possible_cpu(cpu) {
351 struct rt6_info **p;
352
353 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
354 /* no one shares rt */
355 *p = NULL;
356 }
357 } else {
358 dst_destroy((struct dst_entry *)rt);
359 return NULL;
360 }
361 }
362
363 return rt;
364}
365
1da177e4
LT
366static void ip6_dst_destroy(struct dst_entry *dst)
367{
368 struct rt6_info *rt = (struct rt6_info *)dst;
ecd98837 369 struct dst_entry *from = dst->from;
8d0b94af 370 struct inet6_dev *idev;
1da177e4 371
4b32b5ad 372 dst_destroy_metrics_generic(dst);
87775312 373 free_percpu(rt->rt6i_pcpu);
8d0b94af
MKL
374 rt6_uncached_list_del(rt);
375
376 idev = rt->rt6i_idev;
38308473 377 if (idev) {
1da177e4
LT
378 rt->rt6i_idev = NULL;
379 in6_dev_put(idev);
1ab1457c 380 }
1716a961 381
ecd98837
YH
382 dst->from = NULL;
383 dst_release(from);
b3419363
DM
384}
385
1da177e4
LT
386static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
387 int how)
388{
389 struct rt6_info *rt = (struct rt6_info *)dst;
390 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 391 struct net_device *loopback_dev =
c346dca1 392 dev_net(dev)->loopback_dev;
1da177e4 393
97cac082
DM
394 if (dev != loopback_dev) {
395 if (idev && idev->dev == dev) {
396 struct inet6_dev *loopback_idev =
397 in6_dev_get(loopback_dev);
398 if (loopback_idev) {
399 rt->rt6i_idev = loopback_idev;
400 in6_dev_put(idev);
401 }
402 }
1da177e4
LT
403 }
404}
405
a50feda5 406static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 407{
1716a961
G
408 if (rt->rt6i_flags & RTF_EXPIRES) {
409 if (time_after(jiffies, rt->dst.expires))
a50feda5 410 return true;
1716a961 411 } else if (rt->dst.from) {
3fd91fb3 412 return rt6_check_expired((struct rt6_info *) rt->dst.from);
1716a961 413 }
a50feda5 414 return false;
1da177e4
LT
415}
416
51ebd318
ND
417/* Multipath route selection:
418 * Hash based function using packet header and flowlabel.
419 * Adapted from fib_info_hashfn()
420 */
421static int rt6_info_hash_nhsfn(unsigned int candidate_count,
422 const struct flowi6 *fl6)
423{
424 unsigned int val = fl6->flowi6_proto;
425
c08977bb
YH
426 val ^= ipv6_addr_hash(&fl6->daddr);
427 val ^= ipv6_addr_hash(&fl6->saddr);
51ebd318
ND
428
429 /* Work only if this not encapsulated */
430 switch (fl6->flowi6_proto) {
431 case IPPROTO_UDP:
432 case IPPROTO_TCP:
433 case IPPROTO_SCTP:
b3ce5ae1
ND
434 val ^= (__force u16)fl6->fl6_sport;
435 val ^= (__force u16)fl6->fl6_dport;
51ebd318
ND
436 break;
437
438 case IPPROTO_ICMPV6:
b3ce5ae1
ND
439 val ^= (__force u16)fl6->fl6_icmp_type;
440 val ^= (__force u16)fl6->fl6_icmp_code;
51ebd318
ND
441 break;
442 }
443 /* RFC6438 recommands to use flowlabel */
b3ce5ae1 444 val ^= (__force u32)fl6->flowlabel;
51ebd318
ND
445
446 /* Perhaps, we need to tune, this function? */
447 val = val ^ (val >> 7) ^ (val >> 12);
448 return val % candidate_count;
449}
450
451static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
52bd4c0c
ND
452 struct flowi6 *fl6, int oif,
453 int strict)
51ebd318
ND
454{
455 struct rt6_info *sibling, *next_sibling;
456 int route_choosen;
457
458 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
459 /* Don't change the route, if route_choosen == 0
460 * (siblings does not include ourself)
461 */
462 if (route_choosen)
463 list_for_each_entry_safe(sibling, next_sibling,
464 &match->rt6i_siblings, rt6i_siblings) {
465 route_choosen--;
466 if (route_choosen == 0) {
52bd4c0c
ND
467 if (rt6_score_route(sibling, oif, strict) < 0)
468 break;
51ebd318
ND
469 match = sibling;
470 break;
471 }
472 }
473 return match;
474}
475
1da177e4 476/*
c71099ac 477 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
478 */
479
8ed67789
DL
480static inline struct rt6_info *rt6_device_match(struct net *net,
481 struct rt6_info *rt,
b71d1d42 482 const struct in6_addr *saddr,
1da177e4 483 int oif,
d420895e 484 int flags)
1da177e4
LT
485{
486 struct rt6_info *local = NULL;
487 struct rt6_info *sprt;
488
dd3abc4e
YH
489 if (!oif && ipv6_addr_any(saddr))
490 goto out;
491
d8d1f30b 492 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 493 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
494
495 if (oif) {
1da177e4
LT
496 if (dev->ifindex == oif)
497 return sprt;
498 if (dev->flags & IFF_LOOPBACK) {
38308473 499 if (!sprt->rt6i_idev ||
1da177e4 500 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 501 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 502 continue;
1ab1457c 503 if (local && (!oif ||
1da177e4
LT
504 local->rt6i_idev->dev->ifindex == oif))
505 continue;
506 }
507 local = sprt;
508 }
dd3abc4e
YH
509 } else {
510 if (ipv6_chk_addr(net, saddr, dev,
511 flags & RT6_LOOKUP_F_IFACE))
512 return sprt;
1da177e4 513 }
dd3abc4e 514 }
1da177e4 515
dd3abc4e 516 if (oif) {
1da177e4
LT
517 if (local)
518 return local;
519
d420895e 520 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 521 return net->ipv6.ip6_null_entry;
1da177e4 522 }
dd3abc4e 523out:
1da177e4
LT
524 return rt;
525}
526
27097255 527#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
528struct __rt6_probe_work {
529 struct work_struct work;
530 struct in6_addr target;
531 struct net_device *dev;
532};
533
534static void rt6_probe_deferred(struct work_struct *w)
535{
536 struct in6_addr mcaddr;
537 struct __rt6_probe_work *work =
538 container_of(w, struct __rt6_probe_work, work);
539
540 addrconf_addr_solict_mult(&work->target, &mcaddr);
541 ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
542 dev_put(work->dev);
662f5533 543 kfree(work);
c2f17e82
HFS
544}
545
27097255
YH
546static void rt6_probe(struct rt6_info *rt)
547{
f2c31e32 548 struct neighbour *neigh;
27097255
YH
549 /*
550 * Okay, this does not seem to be appropriate
551 * for now, however, we need to check if it
552 * is really so; aka Router Reachability Probing.
553 *
554 * Router Reachability Probe MUST be rate-limited
555 * to no more than one per minute.
556 */
2152caea 557 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
7ff74a59 558 return;
2152caea
YH
559 rcu_read_lock_bh();
560 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
561 if (neigh) {
562 write_lock(&neigh->lock);
563 if (neigh->nud_state & NUD_VALID)
564 goto out;
7ff74a59 565 }
2152caea
YH
566
567 if (!neigh ||
52e16356 568 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
c2f17e82 569 struct __rt6_probe_work *work;
27097255 570
c2f17e82
HFS
571 work = kmalloc(sizeof(*work), GFP_ATOMIC);
572
573 if (neigh && work)
7e980569 574 __neigh_set_probe_once(neigh);
c2f17e82
HFS
575
576 if (neigh)
2152caea
YH
577 write_unlock(&neigh->lock);
578
c2f17e82
HFS
579 if (work) {
580 INIT_WORK(&work->work, rt6_probe_deferred);
581 work->target = rt->rt6i_gateway;
582 dev_hold(rt->dst.dev);
583 work->dev = rt->dst.dev;
584 schedule_work(&work->work);
585 }
f2c31e32 586 } else {
2152caea
YH
587out:
588 write_unlock(&neigh->lock);
f2c31e32 589 }
2152caea 590 rcu_read_unlock_bh();
27097255
YH
591}
592#else
593static inline void rt6_probe(struct rt6_info *rt)
594{
27097255
YH
595}
596#endif
597
1da177e4 598/*
554cfb7e 599 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 600 */
b6f99a21 601static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 602{
d1918542 603 struct net_device *dev = rt->dst.dev;
161980f4 604 if (!oif || dev->ifindex == oif)
554cfb7e 605 return 2;
161980f4
DM
606 if ((dev->flags & IFF_LOOPBACK) &&
607 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
608 return 1;
609 return 0;
554cfb7e 610}
1da177e4 611
afc154e9 612static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
1da177e4 613{
f2c31e32 614 struct neighbour *neigh;
afc154e9 615 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
f2c31e32 616
4d0c5911
YH
617 if (rt->rt6i_flags & RTF_NONEXTHOP ||
618 !(rt->rt6i_flags & RTF_GATEWAY))
afc154e9 619 return RT6_NUD_SUCCEED;
145a3621
YH
620
621 rcu_read_lock_bh();
622 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
623 if (neigh) {
624 read_lock(&neigh->lock);
554cfb7e 625 if (neigh->nud_state & NUD_VALID)
afc154e9 626 ret = RT6_NUD_SUCCEED;
398bcbeb 627#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 628 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 629 ret = RT6_NUD_SUCCEED;
7e980569
JB
630 else
631 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 632#endif
145a3621 633 read_unlock(&neigh->lock);
afc154e9
HFS
634 } else {
635 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 636 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 637 }
145a3621
YH
638 rcu_read_unlock_bh();
639
a5a81f0b 640 return ret;
1da177e4
LT
641}
642
554cfb7e
YH
643static int rt6_score_route(struct rt6_info *rt, int oif,
644 int strict)
1da177e4 645{
a5a81f0b 646 int m;
1ab1457c 647
4d0c5911 648 m = rt6_check_dev(rt, oif);
77d16f45 649 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 650 return RT6_NUD_FAIL_HARD;
ebacaaa0
YH
651#ifdef CONFIG_IPV6_ROUTER_PREF
652 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
653#endif
afc154e9
HFS
654 if (strict & RT6_LOOKUP_F_REACHABLE) {
655 int n = rt6_check_neigh(rt);
656 if (n < 0)
657 return n;
658 }
554cfb7e
YH
659 return m;
660}
661
f11e6659 662static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
afc154e9
HFS
663 int *mpri, struct rt6_info *match,
664 bool *do_rr)
554cfb7e 665{
f11e6659 666 int m;
afc154e9 667 bool match_do_rr = false;
f11e6659
DM
668
669 if (rt6_check_expired(rt))
670 goto out;
671
672 m = rt6_score_route(rt, oif, strict);
7e980569 673 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
674 match_do_rr = true;
675 m = 0; /* lowest valid score */
7e980569 676 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 677 goto out;
afc154e9
HFS
678 }
679
680 if (strict & RT6_LOOKUP_F_REACHABLE)
681 rt6_probe(rt);
f11e6659 682
7e980569 683 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 684 if (m > *mpri) {
afc154e9 685 *do_rr = match_do_rr;
f11e6659
DM
686 *mpri = m;
687 match = rt;
f11e6659 688 }
f11e6659
DM
689out:
690 return match;
691}
692
693static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
694 struct rt6_info *rr_head,
afc154e9
HFS
695 u32 metric, int oif, int strict,
696 bool *do_rr)
f11e6659 697{
9fbdcfaf 698 struct rt6_info *rt, *match, *cont;
554cfb7e 699 int mpri = -1;
1da177e4 700
f11e6659 701 match = NULL;
9fbdcfaf
SK
702 cont = NULL;
703 for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
704 if (rt->rt6i_metric != metric) {
705 cont = rt;
706 break;
707 }
708
709 match = find_match(rt, oif, strict, &mpri, match, do_rr);
710 }
711
712 for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
713 if (rt->rt6i_metric != metric) {
714 cont = rt;
715 break;
716 }
717
afc154e9 718 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
719 }
720
721 if (match || !cont)
722 return match;
723
724 for (rt = cont; rt; rt = rt->dst.rt6_next)
afc154e9 725 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 726
f11e6659
DM
727 return match;
728}
1da177e4 729
f11e6659
DM
730static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
731{
732 struct rt6_info *match, *rt0;
8ed67789 733 struct net *net;
afc154e9 734 bool do_rr = false;
1da177e4 735
f11e6659
DM
736 rt0 = fn->rr_ptr;
737 if (!rt0)
738 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 739
afc154e9
HFS
740 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
741 &do_rr);
1da177e4 742
afc154e9 743 if (do_rr) {
d8d1f30b 744 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 745
554cfb7e 746 /* no entries matched; do round-robin */
f11e6659
DM
747 if (!next || next->rt6i_metric != rt0->rt6i_metric)
748 next = fn->leaf;
749
750 if (next != rt0)
751 fn->rr_ptr = next;
1da177e4 752 }
1da177e4 753
d1918542 754 net = dev_net(rt0->dst.dev);
a02cec21 755 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
756}
757
8b9df265
MKL
758static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
759{
760 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
761}
762
70ceb4f5
YH
763#ifdef CONFIG_IPV6_ROUTE_INFO
764int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 765 const struct in6_addr *gwaddr)
70ceb4f5 766{
c346dca1 767 struct net *net = dev_net(dev);
70ceb4f5
YH
768 struct route_info *rinfo = (struct route_info *) opt;
769 struct in6_addr prefix_buf, *prefix;
770 unsigned int pref;
4bed72e4 771 unsigned long lifetime;
70ceb4f5
YH
772 struct rt6_info *rt;
773
774 if (len < sizeof(struct route_info)) {
775 return -EINVAL;
776 }
777
778 /* Sanity check for prefix_len and length */
779 if (rinfo->length > 3) {
780 return -EINVAL;
781 } else if (rinfo->prefix_len > 128) {
782 return -EINVAL;
783 } else if (rinfo->prefix_len > 64) {
784 if (rinfo->length < 2) {
785 return -EINVAL;
786 }
787 } else if (rinfo->prefix_len > 0) {
788 if (rinfo->length < 1) {
789 return -EINVAL;
790 }
791 }
792
793 pref = rinfo->route_pref;
794 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 795 return -EINVAL;
70ceb4f5 796
4bed72e4 797 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
798
799 if (rinfo->length == 3)
800 prefix = (struct in6_addr *)rinfo->prefix;
801 else {
802 /* this function is safe */
803 ipv6_addr_prefix(&prefix_buf,
804 (struct in6_addr *)rinfo->prefix,
805 rinfo->prefix_len);
806 prefix = &prefix_buf;
807 }
808
f104a567
DJ
809 if (rinfo->prefix_len == 0)
810 rt = rt6_get_dflt_router(gwaddr, dev);
811 else
812 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
813 gwaddr, dev->ifindex);
70ceb4f5
YH
814
815 if (rt && !lifetime) {
e0a1ad73 816 ip6_del_rt(rt);
70ceb4f5
YH
817 rt = NULL;
818 }
819
820 if (!rt && lifetime)
efa2cea0 821 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
822 pref);
823 else if (rt)
824 rt->rt6i_flags = RTF_ROUTEINFO |
825 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
826
827 if (rt) {
1716a961
G
828 if (!addrconf_finite_timeout(lifetime))
829 rt6_clean_expires(rt);
830 else
831 rt6_set_expires(rt, jiffies + HZ * lifetime);
832
94e187c0 833 ip6_rt_put(rt);
70ceb4f5
YH
834 }
835 return 0;
836}
837#endif
838
a3c00e46
MKL
839static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
840 struct in6_addr *saddr)
841{
842 struct fib6_node *pn;
843 while (1) {
844 if (fn->fn_flags & RTN_TL_ROOT)
845 return NULL;
846 pn = fn->parent;
847 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
848 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
849 else
850 fn = pn;
851 if (fn->fn_flags & RTN_RTINFO)
852 return fn;
853 }
854}
c71099ac 855
8ed67789
DL
856static struct rt6_info *ip6_pol_route_lookup(struct net *net,
857 struct fib6_table *table,
4c9483b2 858 struct flowi6 *fl6, int flags)
1da177e4
LT
859{
860 struct fib6_node *fn;
861 struct rt6_info *rt;
862
c71099ac 863 read_lock_bh(&table->tb6_lock);
4c9483b2 864 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
865restart:
866 rt = fn->leaf;
4c9483b2 867 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
51ebd318 868 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
52bd4c0c 869 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
a3c00e46
MKL
870 if (rt == net->ipv6.ip6_null_entry) {
871 fn = fib6_backtrack(fn, &fl6->saddr);
872 if (fn)
873 goto restart;
874 }
d8d1f30b 875 dst_use(&rt->dst, jiffies);
c71099ac 876 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
877 return rt;
878
879}
880
67ba4152 881struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
ea6e574e
FW
882 int flags)
883{
884 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
885}
886EXPORT_SYMBOL_GPL(ip6_route_lookup);
887
9acd9f3a
YH
888struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
889 const struct in6_addr *saddr, int oif, int strict)
c71099ac 890{
4c9483b2
DM
891 struct flowi6 fl6 = {
892 .flowi6_oif = oif,
893 .daddr = *daddr,
c71099ac
TG
894 };
895 struct dst_entry *dst;
77d16f45 896 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 897
adaa70bb 898 if (saddr) {
4c9483b2 899 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
900 flags |= RT6_LOOKUP_F_HAS_SADDR;
901 }
902
4c9483b2 903 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
904 if (dst->error == 0)
905 return (struct rt6_info *) dst;
906
907 dst_release(dst);
908
1da177e4
LT
909 return NULL;
910}
7159039a
YH
911EXPORT_SYMBOL(rt6_lookup);
912
c71099ac 913/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
914 It takes new route entry, the addition fails by any reason the
915 route is freed. In any case, if caller does not hold it, it may
916 be destroyed.
917 */
918
e5fd387a 919static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
e715b6d3 920 struct mx6_config *mxc)
1da177e4
LT
921{
922 int err;
c71099ac 923 struct fib6_table *table;
1da177e4 924
c71099ac
TG
925 table = rt->rt6i_table;
926 write_lock_bh(&table->tb6_lock);
e715b6d3 927 err = fib6_add(&table->tb6_root, rt, info, mxc);
c71099ac 928 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
929
930 return err;
931}
932
40e22e8f
TG
933int ip6_ins_rt(struct rt6_info *rt)
934{
e715b6d3
FW
935 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
936 struct mx6_config mxc = { .mx = NULL, };
937
938 return __ip6_ins_rt(rt, &info, &mxc);
40e22e8f
TG
939}
940
8b9df265
MKL
941static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
942 const struct in6_addr *daddr,
943 const struct in6_addr *saddr)
1da177e4 944{
1da177e4
LT
945 struct rt6_info *rt;
946
947 /*
948 * Clone the route.
949 */
950
d52d3997 951 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
83a09abd 952 ort = (struct rt6_info *)ort->dst.from;
1da177e4 953
d52d3997
MKL
954 rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev,
955 0, ort->rt6i_table);
83a09abd
MKL
956
957 if (!rt)
958 return NULL;
959
960 ip6_rt_copy_init(rt, ort);
961 rt->rt6i_flags |= RTF_CACHE;
962 rt->rt6i_metric = 0;
963 rt->dst.flags |= DST_HOST;
964 rt->rt6i_dst.addr = *daddr;
965 rt->rt6i_dst.plen = 128;
1da177e4 966
83a09abd
MKL
967 if (!rt6_is_gw_or_nonexthop(ort)) {
968 if (ort->rt6i_dst.plen != 128 &&
969 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
970 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 971#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
972 if (rt->rt6i_src.plen && saddr) {
973 rt->rt6i_src.addr = *saddr;
974 rt->rt6i_src.plen = 128;
8b9df265 975 }
83a09abd 976#endif
95a9a5ba 977 }
1da177e4 978
95a9a5ba
YH
979 return rt;
980}
1da177e4 981
d52d3997
MKL
982static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
983{
984 struct rt6_info *pcpu_rt;
985
986 pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
987 rt->dst.dev, rt->dst.flags,
988 rt->rt6i_table);
989
990 if (!pcpu_rt)
991 return NULL;
992 ip6_rt_copy_init(pcpu_rt, rt);
993 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
994 pcpu_rt->rt6i_flags |= RTF_PCPU;
995 return pcpu_rt;
996}
997
998/* It should be called with read_lock_bh(&tb6_lock) acquired */
999static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1000{
1001 struct rt6_info *pcpu_rt, *prev, **p;
1002
1003 p = this_cpu_ptr(rt->rt6i_pcpu);
1004 pcpu_rt = *p;
1005
1006 if (pcpu_rt)
1007 goto done;
1008
1009 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1010 if (!pcpu_rt) {
1011 struct net *net = dev_net(rt->dst.dev);
1012
1013 pcpu_rt = net->ipv6.ip6_null_entry;
1014 goto done;
1015 }
1016
1017 prev = cmpxchg(p, NULL, pcpu_rt);
1018 if (prev) {
1019 /* If someone did it before us, return prev instead */
1020 dst_destroy(&pcpu_rt->dst);
1021 pcpu_rt = prev;
1022 }
1023
1024done:
1025 dst_hold(&pcpu_rt->dst);
1026 rt6_dst_from_metrics_check(pcpu_rt);
1027 return pcpu_rt;
1028}
1029
8ed67789 1030static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 1031 struct flowi6 *fl6, int flags)
1da177e4 1032{
367efcb9 1033 struct fib6_node *fn, *saved_fn;
45e4fd26 1034 struct rt6_info *rt;
c71099ac 1035 int strict = 0;
1da177e4 1036
77d16f45 1037 strict |= flags & RT6_LOOKUP_F_IFACE;
367efcb9
MKL
1038 if (net->ipv6.devconf_all->forwarding == 0)
1039 strict |= RT6_LOOKUP_F_REACHABLE;
1da177e4 1040
c71099ac 1041 read_lock_bh(&table->tb6_lock);
1da177e4 1042
4c9483b2 1043 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1044 saved_fn = fn;
1da177e4 1045
a3c00e46 1046redo_rt6_select:
367efcb9 1047 rt = rt6_select(fn, oif, strict);
52bd4c0c 1048 if (rt->rt6i_nsiblings)
367efcb9 1049 rt = rt6_multipath_select(rt, fl6, oif, strict);
a3c00e46
MKL
1050 if (rt == net->ipv6.ip6_null_entry) {
1051 fn = fib6_backtrack(fn, &fl6->saddr);
1052 if (fn)
1053 goto redo_rt6_select;
367efcb9
MKL
1054 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1055 /* also consider unreachable route */
1056 strict &= ~RT6_LOOKUP_F_REACHABLE;
1057 fn = saved_fn;
1058 goto redo_rt6_select;
367efcb9 1059 }
a3c00e46
MKL
1060 }
1061
fb9de91e 1062
3da59bd9 1063 if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
d52d3997
MKL
1064 dst_use(&rt->dst, jiffies);
1065 read_unlock_bh(&table->tb6_lock);
1066
1067 rt6_dst_from_metrics_check(rt);
1068 return rt;
3da59bd9
MKL
1069 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1070 !(rt->rt6i_flags & RTF_GATEWAY))) {
1071 /* Create a RTF_CACHE clone which will not be
1072 * owned by the fib6 tree. It is for the special case where
1073 * the daddr in the skb during the neighbor look-up is different
1074 * from the fl6->daddr used to look-up route here.
1075 */
1076
1077 struct rt6_info *uncached_rt;
1078
d52d3997
MKL
1079 dst_use(&rt->dst, jiffies);
1080 read_unlock_bh(&table->tb6_lock);
1081
3da59bd9
MKL
1082 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1083 dst_release(&rt->dst);
c71099ac 1084
3da59bd9 1085 if (uncached_rt)
8d0b94af 1086 rt6_uncached_list_add(uncached_rt);
3da59bd9
MKL
1087 else
1088 uncached_rt = net->ipv6.ip6_null_entry;
d52d3997 1089
3da59bd9
MKL
1090 dst_hold(&uncached_rt->dst);
1091 return uncached_rt;
3da59bd9 1092
d52d3997
MKL
1093 } else {
1094 /* Get a percpu copy */
1095
1096 struct rt6_info *pcpu_rt;
1097
1098 rt->dst.lastuse = jiffies;
1099 rt->dst.__use++;
1100 pcpu_rt = rt6_get_pcpu_route(rt);
1101 read_unlock_bh(&table->tb6_lock);
1102
1103 return pcpu_rt;
1104 }
1da177e4
LT
1105}
1106
8ed67789 1107static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 1108 struct flowi6 *fl6, int flags)
4acad72d 1109{
4c9483b2 1110 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
1111}
1112
72331bc0
SL
1113static struct dst_entry *ip6_route_input_lookup(struct net *net,
1114 struct net_device *dev,
1115 struct flowi6 *fl6, int flags)
1116{
1117 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1118 flags |= RT6_LOOKUP_F_IFACE;
1119
1120 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1121}
1122
c71099ac
TG
1123void ip6_route_input(struct sk_buff *skb)
1124{
b71d1d42 1125 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 1126 struct net *net = dev_net(skb->dev);
adaa70bb 1127 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
1128 struct flowi6 fl6 = {
1129 .flowi6_iif = skb->dev->ifindex,
1130 .daddr = iph->daddr,
1131 .saddr = iph->saddr,
6502ca52 1132 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
1133 .flowi6_mark = skb->mark,
1134 .flowi6_proto = iph->nexthdr,
c71099ac 1135 };
adaa70bb 1136
72331bc0 1137 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
1138}
1139
8ed67789 1140static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 1141 struct flowi6 *fl6, int flags)
1da177e4 1142{
4c9483b2 1143 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
1144}
1145
67ba4152 1146struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 1147 struct flowi6 *fl6)
c71099ac
TG
1148{
1149 int flags = 0;
1150
1fb9489b 1151 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 1152
4c9483b2 1153 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 1154 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 1155
4c9483b2 1156 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 1157 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
1158 else if (sk)
1159 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 1160
4c9483b2 1161 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4 1162}
7159039a 1163EXPORT_SYMBOL(ip6_route_output);
1da177e4 1164
2774c131 1165struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 1166{
5c1e6aa3 1167 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
1168 struct dst_entry *new = NULL;
1169
f5b0a874 1170 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
14e50e57 1171 if (rt) {
d8d1f30b 1172 new = &rt->dst;
14e50e57 1173
8104891b 1174 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
8104891b 1175
14e50e57 1176 new->__use = 1;
352e512c 1177 new->input = dst_discard;
aad88724 1178 new->output = dst_discard_sk;
14e50e57 1179
21efcfa0
ED
1180 if (dst_metrics_read_only(&ort->dst))
1181 new->_metrics = ort->dst._metrics;
1182 else
1183 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
1184 rt->rt6i_idev = ort->rt6i_idev;
1185 if (rt->rt6i_idev)
1186 in6_dev_hold(rt->rt6i_idev);
14e50e57 1187
4e3fd7a0 1188 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961 1189 rt->rt6i_flags = ort->rt6i_flags;
14e50e57
DM
1190 rt->rt6i_metric = 0;
1191
1192 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1193#ifdef CONFIG_IPV6_SUBTREES
1194 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1195#endif
1196
1197 dst_free(new);
1198 }
1199
69ead7af
DM
1200 dst_release(dst_orig);
1201 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 1202}
14e50e57 1203
1da177e4
LT
1204/*
1205 * Destination cache support functions
1206 */
1207
4b32b5ad
MKL
1208static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1209{
1210 if (rt->dst.from &&
1211 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1212 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1213}
1214
3da59bd9
MKL
1215static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1216{
1217 if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1218 return NULL;
1219
1220 if (rt6_check_expired(rt))
1221 return NULL;
1222
1223 return &rt->dst;
1224}
1225
1226static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1227{
1228 if (rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1229 rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1230 return &rt->dst;
1231 else
1232 return NULL;
1233}
1234
1da177e4
LT
1235static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1236{
1237 struct rt6_info *rt;
1238
1239 rt = (struct rt6_info *) dst;
1240
6f3118b5
ND
1241 /* All IPV6 dsts are created with ->obsolete set to the value
1242 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1243 * into this function always.
1244 */
e3bc10bd 1245
4b32b5ad
MKL
1246 rt6_dst_from_metrics_check(rt);
1247
d52d3997 1248 if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE))
3da59bd9
MKL
1249 return rt6_dst_from_check(rt, cookie);
1250 else
1251 return rt6_check(rt, cookie);
1da177e4
LT
1252}
1253
1254static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1255{
1256 struct rt6_info *rt = (struct rt6_info *) dst;
1257
1258 if (rt) {
54c1a859
YH
1259 if (rt->rt6i_flags & RTF_CACHE) {
1260 if (rt6_check_expired(rt)) {
1261 ip6_del_rt(rt);
1262 dst = NULL;
1263 }
1264 } else {
1da177e4 1265 dst_release(dst);
54c1a859
YH
1266 dst = NULL;
1267 }
1da177e4 1268 }
54c1a859 1269 return dst;
1da177e4
LT
1270}
1271
1272static void ip6_link_failure(struct sk_buff *skb)
1273{
1274 struct rt6_info *rt;
1275
3ffe533c 1276 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1277
adf30907 1278 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1279 if (rt) {
1eb4f758
HFS
1280 if (rt->rt6i_flags & RTF_CACHE) {
1281 dst_hold(&rt->dst);
1282 if (ip6_del_rt(rt))
1283 dst_free(&rt->dst);
1284 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1da177e4 1285 rt->rt6i_node->fn_sernum = -1;
1eb4f758 1286 }
1da177e4
LT
1287 }
1288}
1289
45e4fd26
MKL
1290static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1291{
1292 struct net *net = dev_net(rt->dst.dev);
1293
1294 rt->rt6i_flags |= RTF_MODIFIED;
1295 rt->rt6i_pmtu = mtu;
1296 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1297}
1298
1299static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1300 const struct ipv6hdr *iph, u32 mtu)
1da177e4 1301{
67ba4152 1302 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 1303
45e4fd26
MKL
1304 if (rt6->rt6i_flags & RTF_LOCAL)
1305 return;
81aded24 1306
45e4fd26
MKL
1307 dst_confirm(dst);
1308 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1309 if (mtu >= dst_mtu(dst))
1310 return;
9d289715 1311
45e4fd26
MKL
1312 if (rt6->rt6i_flags & RTF_CACHE) {
1313 rt6_do_update_pmtu(rt6, mtu);
1314 } else {
1315 const struct in6_addr *daddr, *saddr;
1316 struct rt6_info *nrt6;
1317
1318 if (iph) {
1319 daddr = &iph->daddr;
1320 saddr = &iph->saddr;
1321 } else if (sk) {
1322 daddr = &sk->sk_v6_daddr;
1323 saddr = &inet6_sk(sk)->saddr;
1324 } else {
1325 return;
1326 }
1327 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1328 if (nrt6) {
1329 rt6_do_update_pmtu(nrt6, mtu);
1330
1331 /* ip6_ins_rt(nrt6) will bump the
1332 * rt6->rt6i_node->fn_sernum
1333 * which will fail the next rt6_check() and
1334 * invalidate the sk->sk_dst_cache.
1335 */
1336 ip6_ins_rt(nrt6);
1337 }
1da177e4
LT
1338 }
1339}
1340
45e4fd26
MKL
1341static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1342 struct sk_buff *skb, u32 mtu)
1343{
1344 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1345}
1346
42ae66c8
DM
1347void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1348 int oif, u32 mark)
81aded24
DM
1349{
1350 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1351 struct dst_entry *dst;
1352 struct flowi6 fl6;
1353
1354 memset(&fl6, 0, sizeof(fl6));
1355 fl6.flowi6_oif = oif;
1b3c61dc 1356 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
1357 fl6.daddr = iph->daddr;
1358 fl6.saddr = iph->saddr;
6502ca52 1359 fl6.flowlabel = ip6_flowinfo(iph);
81aded24
DM
1360
1361 dst = ip6_route_output(net, NULL, &fl6);
1362 if (!dst->error)
45e4fd26 1363 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
1364 dst_release(dst);
1365}
1366EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1367
1368void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1369{
1370 ip6_update_pmtu(skb, sock_net(sk), mtu,
1371 sk->sk_bound_dev_if, sk->sk_mark);
1372}
1373EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1374
b55b76b2
DJ
1375/* Handle redirects */
1376struct ip6rd_flowi {
1377 struct flowi6 fl6;
1378 struct in6_addr gateway;
1379};
1380
1381static struct rt6_info *__ip6_route_redirect(struct net *net,
1382 struct fib6_table *table,
1383 struct flowi6 *fl6,
1384 int flags)
1385{
1386 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1387 struct rt6_info *rt;
1388 struct fib6_node *fn;
1389
1390 /* Get the "current" route for this destination and
1391 * check if the redirect has come from approriate router.
1392 *
1393 * RFC 4861 specifies that redirects should only be
1394 * accepted if they come from the nexthop to the target.
1395 * Due to the way the routes are chosen, this notion
1396 * is a bit fuzzy and one might need to check all possible
1397 * routes.
1398 */
1399
1400 read_lock_bh(&table->tb6_lock);
1401 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1402restart:
1403 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1404 if (rt6_check_expired(rt))
1405 continue;
1406 if (rt->dst.error)
1407 break;
1408 if (!(rt->rt6i_flags & RTF_GATEWAY))
1409 continue;
1410 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1411 continue;
1412 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1413 continue;
1414 break;
1415 }
1416
1417 if (!rt)
1418 rt = net->ipv6.ip6_null_entry;
1419 else if (rt->dst.error) {
1420 rt = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
1421 goto out;
1422 }
1423
1424 if (rt == net->ipv6.ip6_null_entry) {
a3c00e46
MKL
1425 fn = fib6_backtrack(fn, &fl6->saddr);
1426 if (fn)
1427 goto restart;
b55b76b2 1428 }
a3c00e46 1429
b0a1ba59 1430out:
b55b76b2
DJ
1431 dst_hold(&rt->dst);
1432
1433 read_unlock_bh(&table->tb6_lock);
1434
1435 return rt;
1436};
1437
1438static struct dst_entry *ip6_route_redirect(struct net *net,
1439 const struct flowi6 *fl6,
1440 const struct in6_addr *gateway)
1441{
1442 int flags = RT6_LOOKUP_F_HAS_SADDR;
1443 struct ip6rd_flowi rdfl;
1444
1445 rdfl.fl6 = *fl6;
1446 rdfl.gateway = *gateway;
1447
1448 return fib6_rule_lookup(net, &rdfl.fl6,
1449 flags, __ip6_route_redirect);
1450}
1451
3a5ad2ee
DM
1452void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1453{
1454 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1455 struct dst_entry *dst;
1456 struct flowi6 fl6;
1457
1458 memset(&fl6, 0, sizeof(fl6));
e374c618 1459 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
1460 fl6.flowi6_oif = oif;
1461 fl6.flowi6_mark = mark;
3a5ad2ee
DM
1462 fl6.daddr = iph->daddr;
1463 fl6.saddr = iph->saddr;
6502ca52 1464 fl6.flowlabel = ip6_flowinfo(iph);
3a5ad2ee 1465
b55b76b2
DJ
1466 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1467 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
1468 dst_release(dst);
1469}
1470EXPORT_SYMBOL_GPL(ip6_redirect);
1471
c92a59ec
DJ
1472void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1473 u32 mark)
1474{
1475 const struct ipv6hdr *iph = ipv6_hdr(skb);
1476 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1477 struct dst_entry *dst;
1478 struct flowi6 fl6;
1479
1480 memset(&fl6, 0, sizeof(fl6));
e374c618 1481 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
1482 fl6.flowi6_oif = oif;
1483 fl6.flowi6_mark = mark;
c92a59ec
DJ
1484 fl6.daddr = msg->dest;
1485 fl6.saddr = iph->daddr;
1486
b55b76b2
DJ
1487 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1488 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
1489 dst_release(dst);
1490}
1491
3a5ad2ee
DM
1492void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1493{
1494 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1495}
1496EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1497
0dbaee3b 1498static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1499{
0dbaee3b
DM
1500 struct net_device *dev = dst->dev;
1501 unsigned int mtu = dst_mtu(dst);
1502 struct net *net = dev_net(dev);
1503
1da177e4
LT
1504 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1505
5578689a
DL
1506 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1507 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1508
1509 /*
1ab1457c
YH
1510 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1511 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1512 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1513 * rely only on pmtu discovery"
1514 */
1515 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1516 mtu = IPV6_MAXPLEN;
1517 return mtu;
1518}
1519
ebb762f2 1520static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1521{
4b32b5ad
MKL
1522 const struct rt6_info *rt = (const struct rt6_info *)dst;
1523 unsigned int mtu = rt->rt6i_pmtu;
d33e4553 1524 struct inet6_dev *idev;
618f9bc7 1525
4b32b5ad
MKL
1526 if (mtu)
1527 goto out;
1528
1529 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 1530 if (mtu)
30f78d8e 1531 goto out;
618f9bc7
SK
1532
1533 mtu = IPV6_MIN_MTU;
d33e4553
DM
1534
1535 rcu_read_lock();
1536 idev = __in6_dev_get(dst->dev);
1537 if (idev)
1538 mtu = idev->cnf.mtu6;
1539 rcu_read_unlock();
1540
30f78d8e
ED
1541out:
1542 return min_t(unsigned int, mtu, IP6_MAX_MTU);
d33e4553
DM
1543}
1544
3b00944c
YH
1545static struct dst_entry *icmp6_dst_gc_list;
1546static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1547
3b00944c 1548struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 1549 struct flowi6 *fl6)
1da177e4 1550{
87a11578 1551 struct dst_entry *dst;
1da177e4
LT
1552 struct rt6_info *rt;
1553 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1554 struct net *net = dev_net(dev);
1da177e4 1555
38308473 1556 if (unlikely(!idev))
122bdf67 1557 return ERR_PTR(-ENODEV);
1da177e4 1558
8b96d22d 1559 rt = ip6_dst_alloc(net, dev, 0, NULL);
38308473 1560 if (unlikely(!rt)) {
1da177e4 1561 in6_dev_put(idev);
87a11578 1562 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1563 goto out;
1564 }
1565
8e2ec639
YZ
1566 rt->dst.flags |= DST_HOST;
1567 rt->dst.output = ip6_output;
d8d1f30b 1568 atomic_set(&rt->dst.__refcnt, 1);
550bab42 1569 rt->rt6i_gateway = fl6->daddr;
87a11578 1570 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1571 rt->rt6i_dst.plen = 128;
1572 rt->rt6i_idev = idev;
14edd87d 1573 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 1574
3b00944c 1575 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1576 rt->dst.next = icmp6_dst_gc_list;
1577 icmp6_dst_gc_list = &rt->dst;
3b00944c 1578 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1579
5578689a 1580 fib6_force_start_gc(net);
1da177e4 1581
87a11578
DM
1582 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1583
1da177e4 1584out:
87a11578 1585 return dst;
1da177e4
LT
1586}
1587
3d0f24a7 1588int icmp6_dst_gc(void)
1da177e4 1589{
e9476e95 1590 struct dst_entry *dst, **pprev;
3d0f24a7 1591 int more = 0;
1da177e4 1592
3b00944c
YH
1593 spin_lock_bh(&icmp6_dst_lock);
1594 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1595
1da177e4
LT
1596 while ((dst = *pprev) != NULL) {
1597 if (!atomic_read(&dst->__refcnt)) {
1598 *pprev = dst->next;
1599 dst_free(dst);
1da177e4
LT
1600 } else {
1601 pprev = &dst->next;
3d0f24a7 1602 ++more;
1da177e4
LT
1603 }
1604 }
1605
3b00944c 1606 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1607
3d0f24a7 1608 return more;
1da177e4
LT
1609}
1610
1e493d19
DM
1611static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1612 void *arg)
1613{
1614 struct dst_entry *dst, **pprev;
1615
1616 spin_lock_bh(&icmp6_dst_lock);
1617 pprev = &icmp6_dst_gc_list;
1618 while ((dst = *pprev) != NULL) {
1619 struct rt6_info *rt = (struct rt6_info *) dst;
1620 if (func(rt, arg)) {
1621 *pprev = dst->next;
1622 dst_free(dst);
1623 } else {
1624 pprev = &dst->next;
1625 }
1626 }
1627 spin_unlock_bh(&icmp6_dst_lock);
1628}
1629
569d3645 1630static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1631{
86393e52 1632 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1633 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1634 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1635 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1636 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1637 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1638 int entries;
7019b78e 1639
fc66f95c 1640 entries = dst_entries_get_fast(ops);
49a18d86 1641 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 1642 entries <= rt_max_size)
1da177e4
LT
1643 goto out;
1644
6891a346 1645 net->ipv6.ip6_rt_gc_expire++;
14956643 1646 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
1647 entries = dst_entries_get_slow(ops);
1648 if (entries < ops->gc_thresh)
7019b78e 1649 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1650out:
7019b78e 1651 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1652 return entries > rt_max_size;
1da177e4
LT
1653}
1654
e715b6d3
FW
1655static int ip6_convert_metrics(struct mx6_config *mxc,
1656 const struct fib6_config *cfg)
1657{
1658 struct nlattr *nla;
1659 int remaining;
1660 u32 *mp;
1661
63159f29 1662 if (!cfg->fc_mx)
e715b6d3
FW
1663 return 0;
1664
1665 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1666 if (unlikely(!mp))
1667 return -ENOMEM;
1668
1669 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1670 int type = nla_type(nla);
1671
1672 if (type) {
ea697639
DB
1673 u32 val;
1674
e715b6d3
FW
1675 if (unlikely(type > RTAX_MAX))
1676 goto err;
ea697639
DB
1677 if (type == RTAX_CC_ALGO) {
1678 char tmp[TCP_CA_NAME_MAX];
1679
1680 nla_strlcpy(tmp, nla, sizeof(tmp));
1681 val = tcp_ca_get_key_by_name(tmp);
1682 if (val == TCP_CA_UNSPEC)
1683 goto err;
1684 } else {
1685 val = nla_get_u32(nla);
1686 }
e715b6d3 1687
ea697639 1688 mp[type - 1] = val;
e715b6d3
FW
1689 __set_bit(type - 1, mxc->mx_valid);
1690 }
1691 }
1692
1693 mxc->mx = mp;
1694
1695 return 0;
1696 err:
1697 kfree(mp);
1698 return -EINVAL;
1699}
1da177e4 1700
86872cb5 1701int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1702{
1703 int err;
5578689a 1704 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1705 struct rt6_info *rt = NULL;
1706 struct net_device *dev = NULL;
1707 struct inet6_dev *idev = NULL;
c71099ac 1708 struct fib6_table *table;
e715b6d3 1709 struct mx6_config mxc = { .mx = NULL, };
1da177e4
LT
1710 int addr_type;
1711
86872cb5 1712 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1713 return -EINVAL;
1714#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1715 if (cfg->fc_src_len)
1da177e4
LT
1716 return -EINVAL;
1717#endif
86872cb5 1718 if (cfg->fc_ifindex) {
1da177e4 1719 err = -ENODEV;
5578689a 1720 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1721 if (!dev)
1722 goto out;
1723 idev = in6_dev_get(dev);
1724 if (!idev)
1725 goto out;
1726 }
1727
86872cb5
TG
1728 if (cfg->fc_metric == 0)
1729 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1730
d71314b4 1731 err = -ENOBUFS;
38308473
DM
1732 if (cfg->fc_nlinfo.nlh &&
1733 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1734 table = fib6_get_table(net, cfg->fc_table);
38308473 1735 if (!table) {
f3213831 1736 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
1737 table = fib6_new_table(net, cfg->fc_table);
1738 }
1739 } else {
1740 table = fib6_new_table(net, cfg->fc_table);
1741 }
38308473
DM
1742
1743 if (!table)
c71099ac 1744 goto out;
c71099ac 1745
c88507fb 1746 rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);
1da177e4 1747
38308473 1748 if (!rt) {
1da177e4
LT
1749 err = -ENOMEM;
1750 goto out;
1751 }
1752
1716a961
G
1753 if (cfg->fc_flags & RTF_EXPIRES)
1754 rt6_set_expires(rt, jiffies +
1755 clock_t_to_jiffies(cfg->fc_expires));
1756 else
1757 rt6_clean_expires(rt);
1da177e4 1758
86872cb5
TG
1759 if (cfg->fc_protocol == RTPROT_UNSPEC)
1760 cfg->fc_protocol = RTPROT_BOOT;
1761 rt->rt6i_protocol = cfg->fc_protocol;
1762
1763 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1764
1765 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1766 rt->dst.input = ip6_mc_input;
ab79ad14
1767 else if (cfg->fc_flags & RTF_LOCAL)
1768 rt->dst.input = ip6_input;
1da177e4 1769 else
d8d1f30b 1770 rt->dst.input = ip6_forward;
1da177e4 1771
d8d1f30b 1772 rt->dst.output = ip6_output;
1da177e4 1773
19e42e45
RP
1774 if (cfg->fc_encap) {
1775 struct lwtunnel_state *lwtstate;
1776
1777 err = lwtunnel_build_state(dev, cfg->fc_encap_type,
1778 cfg->fc_encap, &lwtstate);
1779 if (err)
1780 goto out;
5a6228a0 1781 rt->rt6i_lwtstate = lwtstate_get(lwtstate);
6673a9f4
ND
1782 if (lwtunnel_output_redirect(rt->rt6i_lwtstate))
1783 rt->dst.output = lwtunnel_output6;
19e42e45
RP
1784 }
1785
86872cb5
TG
1786 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1787 rt->rt6i_dst.plen = cfg->fc_dst_len;
afc4eef8 1788 if (rt->rt6i_dst.plen == 128)
e5fd387a 1789 rt->dst.flags |= DST_HOST;
e5fd387a 1790
1da177e4 1791#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1792 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1793 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1794#endif
1795
86872cb5 1796 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1797
1798 /* We cannot add true routes via loopback here,
1799 they would result in kernel looping; promote them to reject routes
1800 */
86872cb5 1801 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1802 (dev && (dev->flags & IFF_LOOPBACK) &&
1803 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1804 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1805 /* hold loopback dev/idev if we haven't done so. */
5578689a 1806 if (dev != net->loopback_dev) {
1da177e4
LT
1807 if (dev) {
1808 dev_put(dev);
1809 in6_dev_put(idev);
1810 }
5578689a 1811 dev = net->loopback_dev;
1da177e4
LT
1812 dev_hold(dev);
1813 idev = in6_dev_get(dev);
1814 if (!idev) {
1815 err = -ENODEV;
1816 goto out;
1817 }
1818 }
1da177e4 1819 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
ef2c7d7b
ND
1820 switch (cfg->fc_type) {
1821 case RTN_BLACKHOLE:
1822 rt->dst.error = -EINVAL;
aad88724 1823 rt->dst.output = dst_discard_sk;
7150aede 1824 rt->dst.input = dst_discard;
ef2c7d7b
ND
1825 break;
1826 case RTN_PROHIBIT:
1827 rt->dst.error = -EACCES;
7150aede
K
1828 rt->dst.output = ip6_pkt_prohibit_out;
1829 rt->dst.input = ip6_pkt_prohibit;
ef2c7d7b 1830 break;
b4949ab2 1831 case RTN_THROW:
ef2c7d7b 1832 default:
7150aede
K
1833 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1834 : -ENETUNREACH;
1835 rt->dst.output = ip6_pkt_discard_out;
1836 rt->dst.input = ip6_pkt_discard;
ef2c7d7b
ND
1837 break;
1838 }
1da177e4
LT
1839 goto install_route;
1840 }
1841
86872cb5 1842 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1843 const struct in6_addr *gw_addr;
1da177e4
LT
1844 int gwa_type;
1845
86872cb5 1846 gw_addr = &cfg->fc_gateway;
48ed7b26
FW
1847
1848 /* if gw_addr is local we will fail to detect this in case
1849 * address is still TENTATIVE (DAD in progress). rt6_lookup()
1850 * will return already-added prefix route via interface that
1851 * prefix route was assigned to, which might be non-loopback.
1852 */
1853 err = -EINVAL;
1854 if (ipv6_chk_addr_and_flags(net, gw_addr, NULL, 0, 0))
1855 goto out;
1856
4e3fd7a0 1857 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1858 gwa_type = ipv6_addr_type(gw_addr);
1859
1860 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1861 struct rt6_info *grt;
1862
1863 /* IPv6 strictly inhibits using not link-local
1864 addresses as nexthop address.
1865 Otherwise, router will not able to send redirects.
1866 It is very good, but in some (rare!) circumstances
1867 (SIT, PtP, NBMA NOARP links) it is handy to allow
1868 some exceptions. --ANK
1869 */
38308473 1870 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1871 goto out;
1872
5578689a 1873 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1874
1875 err = -EHOSTUNREACH;
38308473 1876 if (!grt)
1da177e4
LT
1877 goto out;
1878 if (dev) {
d1918542 1879 if (dev != grt->dst.dev) {
94e187c0 1880 ip6_rt_put(grt);
1da177e4
LT
1881 goto out;
1882 }
1883 } else {
d1918542 1884 dev = grt->dst.dev;
1da177e4
LT
1885 idev = grt->rt6i_idev;
1886 dev_hold(dev);
1887 in6_dev_hold(grt->rt6i_idev);
1888 }
38308473 1889 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1890 err = 0;
94e187c0 1891 ip6_rt_put(grt);
1da177e4
LT
1892
1893 if (err)
1894 goto out;
1895 }
1896 err = -EINVAL;
38308473 1897 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1898 goto out;
1899 }
1900
1901 err = -ENODEV;
38308473 1902 if (!dev)
1da177e4
LT
1903 goto out;
1904
c3968a85
DW
1905 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1906 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1907 err = -EINVAL;
1908 goto out;
1909 }
4e3fd7a0 1910 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1911 rt->rt6i_prefsrc.plen = 128;
1912 } else
1913 rt->rt6i_prefsrc.plen = 0;
1914
86872cb5 1915 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1916
1917install_route:
d8d1f30b 1918 rt->dst.dev = dev;
1da177e4 1919 rt->rt6i_idev = idev;
c71099ac 1920 rt->rt6i_table = table;
63152fc0 1921
c346dca1 1922 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1923
e715b6d3
FW
1924 err = ip6_convert_metrics(&mxc, cfg);
1925 if (err)
1926 goto out;
1da177e4 1927
e715b6d3
FW
1928 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
1929
1930 kfree(mxc.mx);
1931 return err;
1da177e4
LT
1932out:
1933 if (dev)
1934 dev_put(dev);
1935 if (idev)
1936 in6_dev_put(idev);
1937 if (rt)
d8d1f30b 1938 dst_free(&rt->dst);
1da177e4
LT
1939 return err;
1940}
1941
86872cb5 1942static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1943{
1944 int err;
c71099ac 1945 struct fib6_table *table;
d1918542 1946 struct net *net = dev_net(rt->dst.dev);
1da177e4 1947
6825a26c
G
1948 if (rt == net->ipv6.ip6_null_entry) {
1949 err = -ENOENT;
1950 goto out;
1951 }
6c813a72 1952
c71099ac
TG
1953 table = rt->rt6i_table;
1954 write_lock_bh(&table->tb6_lock);
86872cb5 1955 err = fib6_del(rt, info);
c71099ac 1956 write_unlock_bh(&table->tb6_lock);
1da177e4 1957
6825a26c 1958out:
94e187c0 1959 ip6_rt_put(rt);
1da177e4
LT
1960 return err;
1961}
1962
e0a1ad73
TG
1963int ip6_del_rt(struct rt6_info *rt)
1964{
4d1169c1 1965 struct nl_info info = {
d1918542 1966 .nl_net = dev_net(rt->dst.dev),
4d1169c1 1967 };
528c4ceb 1968 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1969}
1970
86872cb5 1971static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1972{
c71099ac 1973 struct fib6_table *table;
1da177e4
LT
1974 struct fib6_node *fn;
1975 struct rt6_info *rt;
1976 int err = -ESRCH;
1977
5578689a 1978 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 1979 if (!table)
c71099ac
TG
1980 return err;
1981
1982 read_lock_bh(&table->tb6_lock);
1da177e4 1983
c71099ac 1984 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1985 &cfg->fc_dst, cfg->fc_dst_len,
1986 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1987
1da177e4 1988 if (fn) {
d8d1f30b 1989 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1f56a01f
MKL
1990 if ((rt->rt6i_flags & RTF_CACHE) &&
1991 !(cfg->fc_flags & RTF_CACHE))
1992 continue;
86872cb5 1993 if (cfg->fc_ifindex &&
d1918542
DM
1994 (!rt->dst.dev ||
1995 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 1996 continue;
86872cb5
TG
1997 if (cfg->fc_flags & RTF_GATEWAY &&
1998 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1999 continue;
86872cb5 2000 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 2001 continue;
d8d1f30b 2002 dst_hold(&rt->dst);
c71099ac 2003 read_unlock_bh(&table->tb6_lock);
1da177e4 2004
86872cb5 2005 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
2006 }
2007 }
c71099ac 2008 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2009
2010 return err;
2011}
2012
6700c270 2013static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 2014{
e8599ff4 2015 struct net *net = dev_net(skb->dev);
a6279458 2016 struct netevent_redirect netevent;
e8599ff4 2017 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
2018 struct ndisc_options ndopts;
2019 struct inet6_dev *in6_dev;
2020 struct neighbour *neigh;
71bcdba0 2021 struct rd_msg *msg;
6e157b6a
DM
2022 int optlen, on_link;
2023 u8 *lladdr;
e8599ff4 2024
29a3cad5 2025 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 2026 optlen -= sizeof(*msg);
e8599ff4
DM
2027
2028 if (optlen < 0) {
6e157b6a 2029 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
2030 return;
2031 }
2032
71bcdba0 2033 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 2034
71bcdba0 2035 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 2036 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
2037 return;
2038 }
2039
6e157b6a 2040 on_link = 0;
71bcdba0 2041 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 2042 on_link = 1;
71bcdba0 2043 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 2044 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 2045 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
2046 return;
2047 }
2048
2049 in6_dev = __in6_dev_get(skb->dev);
2050 if (!in6_dev)
2051 return;
2052 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2053 return;
2054
2055 /* RFC2461 8.1:
2056 * The IP source address of the Redirect MUST be the same as the current
2057 * first-hop router for the specified ICMP Destination Address.
2058 */
2059
71bcdba0 2060 if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
e8599ff4
DM
2061 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2062 return;
2063 }
6e157b6a
DM
2064
2065 lladdr = NULL;
e8599ff4
DM
2066 if (ndopts.nd_opts_tgt_lladdr) {
2067 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2068 skb->dev);
2069 if (!lladdr) {
2070 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2071 return;
2072 }
2073 }
2074
6e157b6a
DM
2075 rt = (struct rt6_info *) dst;
2076 if (rt == net->ipv6.ip6_null_entry) {
2077 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 2078 return;
6e157b6a 2079 }
e8599ff4 2080
6e157b6a
DM
2081 /* Redirect received -> path was valid.
2082 * Look, redirects are sent only in response to data packets,
2083 * so that this nexthop apparently is reachable. --ANK
2084 */
2085 dst_confirm(&rt->dst);
a6279458 2086
71bcdba0 2087 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
2088 if (!neigh)
2089 return;
a6279458 2090
1da177e4
LT
2091 /*
2092 * We have finally decided to accept it.
2093 */
2094
1ab1457c 2095 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
2096 NEIGH_UPDATE_F_WEAK_OVERRIDE|
2097 NEIGH_UPDATE_F_OVERRIDE|
2098 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2099 NEIGH_UPDATE_F_ISROUTER))
2100 );
2101
83a09abd 2102 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
38308473 2103 if (!nrt)
1da177e4
LT
2104 goto out;
2105
2106 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2107 if (on_link)
2108 nrt->rt6i_flags &= ~RTF_GATEWAY;
2109
4e3fd7a0 2110 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 2111
40e22e8f 2112 if (ip6_ins_rt(nrt))
1da177e4
LT
2113 goto out;
2114
d8d1f30b
CG
2115 netevent.old = &rt->dst;
2116 netevent.new = &nrt->dst;
71bcdba0 2117 netevent.daddr = &msg->dest;
60592833 2118 netevent.neigh = neigh;
8d71740c
TT
2119 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2120
38308473 2121 if (rt->rt6i_flags & RTF_CACHE) {
6e157b6a 2122 rt = (struct rt6_info *) dst_clone(&rt->dst);
e0a1ad73 2123 ip6_del_rt(rt);
1da177e4
LT
2124 }
2125
2126out:
e8599ff4 2127 neigh_release(neigh);
6e157b6a
DM
2128}
2129
1da177e4
LT
2130/*
2131 * Misc support functions
2132 */
2133
4b32b5ad
MKL
2134static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2135{
2136 BUG_ON(from->dst.from);
2137
2138 rt->rt6i_flags &= ~RTF_EXPIRES;
2139 dst_hold(&from->dst);
2140 rt->dst.from = &from->dst;
2141 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2142}
2143
83a09abd
MKL
2144static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2145{
2146 rt->dst.input = ort->dst.input;
2147 rt->dst.output = ort->dst.output;
2148 rt->rt6i_dst = ort->rt6i_dst;
2149 rt->dst.error = ort->dst.error;
2150 rt->rt6i_idev = ort->rt6i_idev;
2151 if (rt->rt6i_idev)
2152 in6_dev_hold(rt->rt6i_idev);
2153 rt->dst.lastuse = jiffies;
2154 rt->rt6i_gateway = ort->rt6i_gateway;
2155 rt->rt6i_flags = ort->rt6i_flags;
2156 rt6_set_from(rt, ort);
2157 rt->rt6i_metric = ort->rt6i_metric;
1da177e4 2158#ifdef CONFIG_IPV6_SUBTREES
83a09abd 2159 rt->rt6i_src = ort->rt6i_src;
1da177e4 2160#endif
83a09abd
MKL
2161 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2162 rt->rt6i_table = ort->rt6i_table;
5a6228a0 2163 rt->rt6i_lwtstate = lwtstate_get(ort->rt6i_lwtstate);
1da177e4
LT
2164}
2165
70ceb4f5 2166#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 2167static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
2168 const struct in6_addr *prefix, int prefixlen,
2169 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
2170{
2171 struct fib6_node *fn;
2172 struct rt6_info *rt = NULL;
c71099ac
TG
2173 struct fib6_table *table;
2174
efa2cea0 2175 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 2176 if (!table)
c71099ac 2177 return NULL;
70ceb4f5 2178
5744dd9b 2179 read_lock_bh(&table->tb6_lock);
67ba4152 2180 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
70ceb4f5
YH
2181 if (!fn)
2182 goto out;
2183
d8d1f30b 2184 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 2185 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
2186 continue;
2187 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2188 continue;
2189 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2190 continue;
d8d1f30b 2191 dst_hold(&rt->dst);
70ceb4f5
YH
2192 break;
2193 }
2194out:
5744dd9b 2195 read_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
2196 return rt;
2197}
2198
efa2cea0 2199static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
2200 const struct in6_addr *prefix, int prefixlen,
2201 const struct in6_addr *gwaddr, int ifindex,
95c96174 2202 unsigned int pref)
70ceb4f5 2203{
86872cb5
TG
2204 struct fib6_config cfg = {
2205 .fc_table = RT6_TABLE_INFO,
238fc7ea 2206 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
2207 .fc_ifindex = ifindex,
2208 .fc_dst_len = prefixlen,
2209 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2210 RTF_UP | RTF_PREF(pref),
15e47304 2211 .fc_nlinfo.portid = 0,
efa2cea0
DL
2212 .fc_nlinfo.nlh = NULL,
2213 .fc_nlinfo.nl_net = net,
86872cb5
TG
2214 };
2215
4e3fd7a0
AD
2216 cfg.fc_dst = *prefix;
2217 cfg.fc_gateway = *gwaddr;
70ceb4f5 2218
e317da96
YH
2219 /* We should treat it as a default route if prefix length is 0. */
2220 if (!prefixlen)
86872cb5 2221 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 2222
86872cb5 2223 ip6_route_add(&cfg);
70ceb4f5 2224
efa2cea0 2225 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
2226}
2227#endif
2228
b71d1d42 2229struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 2230{
1da177e4 2231 struct rt6_info *rt;
c71099ac 2232 struct fib6_table *table;
1da177e4 2233
c346dca1 2234 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 2235 if (!table)
c71099ac 2236 return NULL;
1da177e4 2237
5744dd9b 2238 read_lock_bh(&table->tb6_lock);
67ba4152 2239 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
d1918542 2240 if (dev == rt->dst.dev &&
045927ff 2241 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
2242 ipv6_addr_equal(&rt->rt6i_gateway, addr))
2243 break;
2244 }
2245 if (rt)
d8d1f30b 2246 dst_hold(&rt->dst);
5744dd9b 2247 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2248 return rt;
2249}
2250
b71d1d42 2251struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
2252 struct net_device *dev,
2253 unsigned int pref)
1da177e4 2254{
86872cb5
TG
2255 struct fib6_config cfg = {
2256 .fc_table = RT6_TABLE_DFLT,
238fc7ea 2257 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
2258 .fc_ifindex = dev->ifindex,
2259 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2260 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
15e47304 2261 .fc_nlinfo.portid = 0,
5578689a 2262 .fc_nlinfo.nlh = NULL,
c346dca1 2263 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 2264 };
1da177e4 2265
4e3fd7a0 2266 cfg.fc_gateway = *gwaddr;
1da177e4 2267
86872cb5 2268 ip6_route_add(&cfg);
1da177e4 2269
1da177e4
LT
2270 return rt6_get_dflt_router(gwaddr, dev);
2271}
2272
7b4da532 2273void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
2274{
2275 struct rt6_info *rt;
c71099ac
TG
2276 struct fib6_table *table;
2277
2278 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 2279 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 2280 if (!table)
c71099ac 2281 return;
1da177e4
LT
2282
2283restart:
c71099ac 2284 read_lock_bh(&table->tb6_lock);
d8d1f30b 2285 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
3e8b0ac3
LC
2286 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2287 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
d8d1f30b 2288 dst_hold(&rt->dst);
c71099ac 2289 read_unlock_bh(&table->tb6_lock);
e0a1ad73 2290 ip6_del_rt(rt);
1da177e4
LT
2291 goto restart;
2292 }
2293 }
c71099ac 2294 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2295}
2296
5578689a
DL
2297static void rtmsg_to_fib6_config(struct net *net,
2298 struct in6_rtmsg *rtmsg,
86872cb5
TG
2299 struct fib6_config *cfg)
2300{
2301 memset(cfg, 0, sizeof(*cfg));
2302
2303 cfg->fc_table = RT6_TABLE_MAIN;
2304 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2305 cfg->fc_metric = rtmsg->rtmsg_metric;
2306 cfg->fc_expires = rtmsg->rtmsg_info;
2307 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2308 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2309 cfg->fc_flags = rtmsg->rtmsg_flags;
2310
5578689a 2311 cfg->fc_nlinfo.nl_net = net;
f1243c2d 2312
4e3fd7a0
AD
2313 cfg->fc_dst = rtmsg->rtmsg_dst;
2314 cfg->fc_src = rtmsg->rtmsg_src;
2315 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
2316}
2317
5578689a 2318int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 2319{
86872cb5 2320 struct fib6_config cfg;
1da177e4
LT
2321 struct in6_rtmsg rtmsg;
2322 int err;
2323
67ba4152 2324 switch (cmd) {
1da177e4
LT
2325 case SIOCADDRT: /* Add a route */
2326 case SIOCDELRT: /* Delete a route */
af31f412 2327 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
2328 return -EPERM;
2329 err = copy_from_user(&rtmsg, arg,
2330 sizeof(struct in6_rtmsg));
2331 if (err)
2332 return -EFAULT;
86872cb5 2333
5578689a 2334 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 2335
1da177e4
LT
2336 rtnl_lock();
2337 switch (cmd) {
2338 case SIOCADDRT:
86872cb5 2339 err = ip6_route_add(&cfg);
1da177e4
LT
2340 break;
2341 case SIOCDELRT:
86872cb5 2342 err = ip6_route_del(&cfg);
1da177e4
LT
2343 break;
2344 default:
2345 err = -EINVAL;
2346 }
2347 rtnl_unlock();
2348
2349 return err;
3ff50b79 2350 }
1da177e4
LT
2351
2352 return -EINVAL;
2353}
2354
2355/*
2356 * Drop the packet on the floor
2357 */
2358
d5fdd6ba 2359static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2360{
612f09e8 2361 int type;
adf30907 2362 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2363 switch (ipstats_mib_noroutes) {
2364 case IPSTATS_MIB_INNOROUTES:
0660e03f 2365 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2366 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2367 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2368 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2369 break;
2370 }
2371 /* FALLTHROUGH */
2372 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2373 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2374 ipstats_mib_noroutes);
612f09e8
YH
2375 break;
2376 }
3ffe533c 2377 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2378 kfree_skb(skb);
2379 return 0;
2380}
2381
9ce8ade0
TG
2382static int ip6_pkt_discard(struct sk_buff *skb)
2383{
612f09e8 2384 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2385}
2386
aad88724 2387static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
1da177e4 2388{
adf30907 2389 skb->dev = skb_dst(skb)->dev;
612f09e8 2390 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2391}
2392
9ce8ade0
TG
2393static int ip6_pkt_prohibit(struct sk_buff *skb)
2394{
612f09e8 2395 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2396}
2397
aad88724 2398static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
9ce8ade0 2399{
adf30907 2400 skb->dev = skb_dst(skb)->dev;
612f09e8 2401 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2402}
2403
1da177e4
LT
2404/*
2405 * Allocate a dst for local (unicast / anycast) address.
2406 */
2407
2408struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2409 const struct in6_addr *addr,
8f031519 2410 bool anycast)
1da177e4 2411{
c346dca1 2412 struct net *net = dev_net(idev->dev);
a3300ef4
HFS
2413 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2414 DST_NOCOUNT, NULL);
2415 if (!rt)
1da177e4
LT
2416 return ERR_PTR(-ENOMEM);
2417
1da177e4
LT
2418 in6_dev_hold(idev);
2419
11d53b49 2420 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2421 rt->dst.input = ip6_input;
2422 rt->dst.output = ip6_output;
1da177e4 2423 rt->rt6i_idev = idev;
1da177e4
LT
2424
2425 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2426 if (anycast)
2427 rt->rt6i_flags |= RTF_ANYCAST;
2428 else
1da177e4 2429 rt->rt6i_flags |= RTF_LOCAL;
1da177e4 2430
550bab42 2431 rt->rt6i_gateway = *addr;
4e3fd7a0 2432 rt->rt6i_dst.addr = *addr;
1da177e4 2433 rt->rt6i_dst.plen = 128;
5578689a 2434 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2435
d8d1f30b 2436 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2437
2438 return rt;
2439}
2440
c3968a85
DW
2441int ip6_route_get_saddr(struct net *net,
2442 struct rt6_info *rt,
b71d1d42 2443 const struct in6_addr *daddr,
c3968a85
DW
2444 unsigned int prefs,
2445 struct in6_addr *saddr)
2446{
e16e888b
MS
2447 struct inet6_dev *idev =
2448 rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
c3968a85 2449 int err = 0;
e16e888b 2450 if (rt && rt->rt6i_prefsrc.plen)
4e3fd7a0 2451 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2452 else
2453 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2454 daddr, prefs, saddr);
2455 return err;
2456}
2457
2458/* remove deleted ip from prefsrc entries */
2459struct arg_dev_net_ip {
2460 struct net_device *dev;
2461 struct net *net;
2462 struct in6_addr *addr;
2463};
2464
2465static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2466{
2467 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2468 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2469 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2470
d1918542 2471 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2472 rt != net->ipv6.ip6_null_entry &&
2473 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2474 /* remove prefsrc entry */
2475 rt->rt6i_prefsrc.plen = 0;
2476 }
2477 return 0;
2478}
2479
2480void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2481{
2482 struct net *net = dev_net(ifp->idev->dev);
2483 struct arg_dev_net_ip adni = {
2484 .dev = ifp->idev->dev,
2485 .net = net,
2486 .addr = &ifp->addr,
2487 };
0c3584d5 2488 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
2489}
2490
be7a010d
DJ
2491#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2492#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
2493
2494/* Remove routers and update dst entries when gateway turn into host. */
2495static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2496{
2497 struct in6_addr *gateway = (struct in6_addr *)arg;
2498
2499 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2500 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2501 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2502 return -1;
2503 }
2504 return 0;
2505}
2506
2507void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2508{
2509 fib6_clean_all(net, fib6_clean_tohost, gateway);
2510}
2511
8ed67789
DL
2512struct arg_dev_net {
2513 struct net_device *dev;
2514 struct net *net;
2515};
2516
1da177e4
LT
2517static int fib6_ifdown(struct rt6_info *rt, void *arg)
2518{
bc3ef660 2519 const struct arg_dev_net *adn = arg;
2520 const struct net_device *dev = adn->dev;
8ed67789 2521
d1918542 2522 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2523 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2524 return -1;
c159d30c 2525
1da177e4
LT
2526 return 0;
2527}
2528
f3db4851 2529void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2530{
8ed67789
DL
2531 struct arg_dev_net adn = {
2532 .dev = dev,
2533 .net = net,
2534 };
2535
0c3584d5 2536 fib6_clean_all(net, fib6_ifdown, &adn);
1e493d19 2537 icmp6_clean_all(fib6_ifdown, &adn);
8d0b94af 2538 rt6_uncached_list_flush_dev(net, dev);
1da177e4
LT
2539}
2540
95c96174 2541struct rt6_mtu_change_arg {
1da177e4 2542 struct net_device *dev;
95c96174 2543 unsigned int mtu;
1da177e4
LT
2544};
2545
2546static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2547{
2548 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2549 struct inet6_dev *idev;
2550
2551 /* In IPv6 pmtu discovery is not optional,
2552 so that RTAX_MTU lock cannot disable it.
2553 We still use this lock to block changes
2554 caused by addrconf/ndisc.
2555 */
2556
2557 idev = __in6_dev_get(arg->dev);
38308473 2558 if (!idev)
1da177e4
LT
2559 return 0;
2560
2561 /* For administrative MTU increase, there is no way to discover
2562 IPv6 PMTU increase, so PMTU increase should be updated here.
2563 Since RFC 1981 doesn't include administrative MTU increase
2564 update PMTU increase is a MUST. (i.e. jumbo frame)
2565 */
2566 /*
2567 If new MTU is less than route PMTU, this new MTU will be the
2568 lowest MTU in the path, update the route PMTU to reflect PMTU
2569 decreases; if new MTU is greater than route PMTU, and the
2570 old MTU is the lowest MTU in the path, update the route PMTU
2571 to reflect the increase. In this case if the other nodes' MTU
2572 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2573 PMTU discouvery.
2574 */
d1918542 2575 if (rt->dst.dev == arg->dev &&
4b32b5ad
MKL
2576 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2577 if (rt->rt6i_flags & RTF_CACHE) {
2578 /* For RTF_CACHE with rt6i_pmtu == 0
2579 * (i.e. a redirected route),
2580 * the metrics of its rt->dst.from has already
2581 * been updated.
2582 */
2583 if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2584 rt->rt6i_pmtu = arg->mtu;
2585 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2586 (dst_mtu(&rt->dst) < arg->mtu &&
2587 dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2588 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2589 }
566cfd8f 2590 }
1da177e4
LT
2591 return 0;
2592}
2593
95c96174 2594void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 2595{
c71099ac
TG
2596 struct rt6_mtu_change_arg arg = {
2597 .dev = dev,
2598 .mtu = mtu,
2599 };
1da177e4 2600
0c3584d5 2601 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
2602}
2603
ef7c79ed 2604static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2605 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2606 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2607 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2608 [RTA_PRIORITY] = { .type = NLA_U32 },
2609 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 2610 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 2611 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
2612 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
2613 [RTA_ENCAP] = { .type = NLA_NESTED },
86872cb5
TG
2614};
2615
2616static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2617 struct fib6_config *cfg)
1da177e4 2618{
86872cb5
TG
2619 struct rtmsg *rtm;
2620 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 2621 unsigned int pref;
86872cb5 2622 int err;
1da177e4 2623
86872cb5
TG
2624 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2625 if (err < 0)
2626 goto errout;
1da177e4 2627
86872cb5
TG
2628 err = -EINVAL;
2629 rtm = nlmsg_data(nlh);
2630 memset(cfg, 0, sizeof(*cfg));
2631
2632 cfg->fc_table = rtm->rtm_table;
2633 cfg->fc_dst_len = rtm->rtm_dst_len;
2634 cfg->fc_src_len = rtm->rtm_src_len;
2635 cfg->fc_flags = RTF_UP;
2636 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 2637 cfg->fc_type = rtm->rtm_type;
86872cb5 2638
ef2c7d7b
ND
2639 if (rtm->rtm_type == RTN_UNREACHABLE ||
2640 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
2641 rtm->rtm_type == RTN_PROHIBIT ||
2642 rtm->rtm_type == RTN_THROW)
86872cb5
TG
2643 cfg->fc_flags |= RTF_REJECT;
2644
ab79ad14
2645 if (rtm->rtm_type == RTN_LOCAL)
2646 cfg->fc_flags |= RTF_LOCAL;
2647
1f56a01f
MKL
2648 if (rtm->rtm_flags & RTM_F_CLONED)
2649 cfg->fc_flags |= RTF_CACHE;
2650
15e47304 2651 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 2652 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2653 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2654
2655 if (tb[RTA_GATEWAY]) {
67b61f6c 2656 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 2657 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2658 }
86872cb5
TG
2659
2660 if (tb[RTA_DST]) {
2661 int plen = (rtm->rtm_dst_len + 7) >> 3;
2662
2663 if (nla_len(tb[RTA_DST]) < plen)
2664 goto errout;
2665
2666 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2667 }
86872cb5
TG
2668
2669 if (tb[RTA_SRC]) {
2670 int plen = (rtm->rtm_src_len + 7) >> 3;
2671
2672 if (nla_len(tb[RTA_SRC]) < plen)
2673 goto errout;
2674
2675 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2676 }
86872cb5 2677
c3968a85 2678 if (tb[RTA_PREFSRC])
67b61f6c 2679 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 2680
86872cb5
TG
2681 if (tb[RTA_OIF])
2682 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2683
2684 if (tb[RTA_PRIORITY])
2685 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2686
2687 if (tb[RTA_METRICS]) {
2688 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2689 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2690 }
86872cb5
TG
2691
2692 if (tb[RTA_TABLE])
2693 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2694
51ebd318
ND
2695 if (tb[RTA_MULTIPATH]) {
2696 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2697 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2698 }
2699
c78ba6d6
LR
2700 if (tb[RTA_PREF]) {
2701 pref = nla_get_u8(tb[RTA_PREF]);
2702 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2703 pref != ICMPV6_ROUTER_PREF_HIGH)
2704 pref = ICMPV6_ROUTER_PREF_MEDIUM;
2705 cfg->fc_flags |= RTF_PREF(pref);
2706 }
2707
19e42e45
RP
2708 if (tb[RTA_ENCAP])
2709 cfg->fc_encap = tb[RTA_ENCAP];
2710
2711 if (tb[RTA_ENCAP_TYPE])
2712 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2713
86872cb5
TG
2714 err = 0;
2715errout:
2716 return err;
1da177e4
LT
2717}
2718
51ebd318
ND
2719static int ip6_route_multipath(struct fib6_config *cfg, int add)
2720{
2721 struct fib6_config r_cfg;
2722 struct rtnexthop *rtnh;
2723 int remaining;
2724 int attrlen;
2725 int err = 0, last_err = 0;
2726
35f1b4e9 2727 remaining = cfg->fc_mp_len;
51ebd318
ND
2728beginning:
2729 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318
ND
2730
2731 /* Parse a Multipath Entry */
2732 while (rtnh_ok(rtnh, remaining)) {
2733 memcpy(&r_cfg, cfg, sizeof(*cfg));
2734 if (rtnh->rtnh_ifindex)
2735 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2736
2737 attrlen = rtnh_attrlen(rtnh);
2738 if (attrlen > 0) {
2739 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2740
2741 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2742 if (nla) {
67b61f6c 2743 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
2744 r_cfg.fc_flags |= RTF_GATEWAY;
2745 }
19e42e45
RP
2746 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2747 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
2748 if (nla)
2749 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318
ND
2750 }
2751 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2752 if (err) {
2753 last_err = err;
2754 /* If we are trying to remove a route, do not stop the
2755 * loop when ip6_route_del() fails (because next hop is
2756 * already gone), we should try to remove all next hops.
2757 */
2758 if (add) {
2759 /* If add fails, we should try to delete all
2760 * next hops that have been already added.
2761 */
2762 add = 0;
35f1b4e9 2763 remaining = cfg->fc_mp_len - remaining;
51ebd318
ND
2764 goto beginning;
2765 }
2766 }
1a72418b 2767 /* Because each route is added like a single route we remove
27596472
MK
2768 * these flags after the first nexthop: if there is a collision,
2769 * we have already failed to add the first nexthop:
2770 * fib6_add_rt2node() has rejected it; when replacing, old
2771 * nexthops have been replaced by first new, the rest should
2772 * be added to it.
1a72418b 2773 */
27596472
MK
2774 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2775 NLM_F_REPLACE);
51ebd318
ND
2776 rtnh = rtnh_next(rtnh, &remaining);
2777 }
2778
2779 return last_err;
2780}
2781
67ba4152 2782static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
1da177e4 2783{
86872cb5
TG
2784 struct fib6_config cfg;
2785 int err;
1da177e4 2786
86872cb5
TG
2787 err = rtm_to_fib6_config(skb, nlh, &cfg);
2788 if (err < 0)
2789 return err;
2790
51ebd318
ND
2791 if (cfg.fc_mp)
2792 return ip6_route_multipath(&cfg, 0);
2793 else
2794 return ip6_route_del(&cfg);
1da177e4
LT
2795}
2796
67ba4152 2797static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
1da177e4 2798{
86872cb5
TG
2799 struct fib6_config cfg;
2800 int err;
1da177e4 2801
86872cb5
TG
2802 err = rtm_to_fib6_config(skb, nlh, &cfg);
2803 if (err < 0)
2804 return err;
2805
51ebd318
ND
2806 if (cfg.fc_mp)
2807 return ip6_route_multipath(&cfg, 1);
2808 else
2809 return ip6_route_add(&cfg);
1da177e4
LT
2810}
2811
19e42e45 2812static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
339bf98f
TG
2813{
2814 return NLMSG_ALIGN(sizeof(struct rtmsg))
2815 + nla_total_size(16) /* RTA_SRC */
2816 + nla_total_size(16) /* RTA_DST */
2817 + nla_total_size(16) /* RTA_GATEWAY */
2818 + nla_total_size(16) /* RTA_PREFSRC */
2819 + nla_total_size(4) /* RTA_TABLE */
2820 + nla_total_size(4) /* RTA_IIF */
2821 + nla_total_size(4) /* RTA_OIF */
2822 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2823 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 2824 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 2825 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45
RP
2826 + nla_total_size(1) /* RTA_PREF */
2827 + lwtunnel_get_encap_size(rt->rt6i_lwtstate);
339bf98f
TG
2828}
2829
191cd582
BH
2830static int rt6_fill_node(struct net *net,
2831 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80 2832 struct in6_addr *dst, struct in6_addr *src,
15e47304 2833 int iif, int type, u32 portid, u32 seq,
7bc570c8 2834 int prefix, int nowait, unsigned int flags)
1da177e4 2835{
4b32b5ad 2836 u32 metrics[RTAX_MAX];
1da177e4 2837 struct rtmsg *rtm;
2d7202bf 2838 struct nlmsghdr *nlh;
e3703b3d 2839 long expires;
9e762a4a 2840 u32 table;
1da177e4
LT
2841
2842 if (prefix) { /* user wants prefix routes only */
2843 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2844 /* success since this is not a prefix route */
2845 return 1;
2846 }
2847 }
2848
15e47304 2849 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 2850 if (!nlh)
26932566 2851 return -EMSGSIZE;
2d7202bf
TG
2852
2853 rtm = nlmsg_data(nlh);
1da177e4
LT
2854 rtm->rtm_family = AF_INET6;
2855 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2856 rtm->rtm_src_len = rt->rt6i_src.plen;
2857 rtm->rtm_tos = 0;
c71099ac 2858 if (rt->rt6i_table)
9e762a4a 2859 table = rt->rt6i_table->tb6_id;
c71099ac 2860 else
9e762a4a
PM
2861 table = RT6_TABLE_UNSPEC;
2862 rtm->rtm_table = table;
c78679e8
DM
2863 if (nla_put_u32(skb, RTA_TABLE, table))
2864 goto nla_put_failure;
ef2c7d7b
ND
2865 if (rt->rt6i_flags & RTF_REJECT) {
2866 switch (rt->dst.error) {
2867 case -EINVAL:
2868 rtm->rtm_type = RTN_BLACKHOLE;
2869 break;
2870 case -EACCES:
2871 rtm->rtm_type = RTN_PROHIBIT;
2872 break;
b4949ab2
ND
2873 case -EAGAIN:
2874 rtm->rtm_type = RTN_THROW;
2875 break;
ef2c7d7b
ND
2876 default:
2877 rtm->rtm_type = RTN_UNREACHABLE;
2878 break;
2879 }
2880 }
38308473 2881 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 2882 rtm->rtm_type = RTN_LOCAL;
d1918542 2883 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
2884 rtm->rtm_type = RTN_LOCAL;
2885 else
2886 rtm->rtm_type = RTN_UNICAST;
2887 rtm->rtm_flags = 0;
2888 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2889 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 2890 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4 2891 rtm->rtm_protocol = RTPROT_REDIRECT;
f0396f60
DO
2892 else if (rt->rt6i_flags & RTF_ADDRCONF) {
2893 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2894 rtm->rtm_protocol = RTPROT_RA;
2895 else
2896 rtm->rtm_protocol = RTPROT_KERNEL;
2897 }
1da177e4 2898
38308473 2899 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
2900 rtm->rtm_flags |= RTM_F_CLONED;
2901
2902 if (dst) {
930345ea 2903 if (nla_put_in6_addr(skb, RTA_DST, dst))
c78679e8 2904 goto nla_put_failure;
1ab1457c 2905 rtm->rtm_dst_len = 128;
1da177e4 2906 } else if (rtm->rtm_dst_len)
930345ea 2907 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
c78679e8 2908 goto nla_put_failure;
1da177e4
LT
2909#ifdef CONFIG_IPV6_SUBTREES
2910 if (src) {
930345ea 2911 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 2912 goto nla_put_failure;
1ab1457c 2913 rtm->rtm_src_len = 128;
c78679e8 2914 } else if (rtm->rtm_src_len &&
930345ea 2915 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
c78679e8 2916 goto nla_put_failure;
1da177e4 2917#endif
7bc570c8
YH
2918 if (iif) {
2919#ifdef CONFIG_IPV6_MROUTE
2920 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2921 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2922 if (err <= 0) {
2923 if (!nowait) {
2924 if (err == 0)
2925 return 0;
2926 goto nla_put_failure;
2927 } else {
2928 if (err == -EMSGSIZE)
2929 goto nla_put_failure;
2930 }
2931 }
2932 } else
2933#endif
c78679e8
DM
2934 if (nla_put_u32(skb, RTA_IIF, iif))
2935 goto nla_put_failure;
7bc570c8 2936 } else if (dst) {
1da177e4 2937 struct in6_addr saddr_buf;
c78679e8 2938 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
930345ea 2939 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 2940 goto nla_put_failure;
1da177e4 2941 }
2d7202bf 2942
c3968a85
DW
2943 if (rt->rt6i_prefsrc.plen) {
2944 struct in6_addr saddr_buf;
4e3fd7a0 2945 saddr_buf = rt->rt6i_prefsrc.addr;
930345ea 2946 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 2947 goto nla_put_failure;
c3968a85
DW
2948 }
2949
4b32b5ad
MKL
2950 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
2951 if (rt->rt6i_pmtu)
2952 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
2953 if (rtnetlink_put_metrics(skb, metrics) < 0)
2d7202bf
TG
2954 goto nla_put_failure;
2955
dd0cbf29 2956 if (rt->rt6i_flags & RTF_GATEWAY) {
930345ea 2957 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
94f826b8 2958 goto nla_put_failure;
94f826b8 2959 }
2d7202bf 2960
c78679e8
DM
2961 if (rt->dst.dev &&
2962 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2963 goto nla_put_failure;
2964 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2965 goto nla_put_failure;
8253947e
LW
2966
2967 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
69cdf8f9 2968
87a50699 2969 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 2970 goto nla_put_failure;
2d7202bf 2971
c78ba6d6
LR
2972 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
2973 goto nla_put_failure;
2974
19e42e45
RP
2975 lwtunnel_fill_encap(skb, rt->rt6i_lwtstate);
2976
053c095a
JB
2977 nlmsg_end(skb, nlh);
2978 return 0;
2d7202bf
TG
2979
2980nla_put_failure:
26932566
PM
2981 nlmsg_cancel(skb, nlh);
2982 return -EMSGSIZE;
1da177e4
LT
2983}
2984
1b43af54 2985int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2986{
2987 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2988 int prefix;
2989
2d7202bf
TG
2990 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2991 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2992 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2993 } else
2994 prefix = 0;
2995
191cd582
BH
2996 return rt6_fill_node(arg->net,
2997 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
15e47304 2998 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2999 prefix, 0, NLM_F_MULTI);
1da177e4
LT
3000}
3001
67ba4152 3002static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
1da177e4 3003{
3b1e0a65 3004 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
3005 struct nlattr *tb[RTA_MAX+1];
3006 struct rt6_info *rt;
1da177e4 3007 struct sk_buff *skb;
ab364a6f 3008 struct rtmsg *rtm;
4c9483b2 3009 struct flowi6 fl6;
72331bc0 3010 int err, iif = 0, oif = 0;
1da177e4 3011
ab364a6f
TG
3012 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3013 if (err < 0)
3014 goto errout;
1da177e4 3015
ab364a6f 3016 err = -EINVAL;
4c9483b2 3017 memset(&fl6, 0, sizeof(fl6));
1da177e4 3018
ab364a6f
TG
3019 if (tb[RTA_SRC]) {
3020 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3021 goto errout;
3022
4e3fd7a0 3023 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
3024 }
3025
3026 if (tb[RTA_DST]) {
3027 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3028 goto errout;
3029
4e3fd7a0 3030 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
3031 }
3032
3033 if (tb[RTA_IIF])
3034 iif = nla_get_u32(tb[RTA_IIF]);
3035
3036 if (tb[RTA_OIF])
72331bc0 3037 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 3038
2e47b291
LC
3039 if (tb[RTA_MARK])
3040 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3041
1da177e4
LT
3042 if (iif) {
3043 struct net_device *dev;
72331bc0
SL
3044 int flags = 0;
3045
5578689a 3046 dev = __dev_get_by_index(net, iif);
1da177e4
LT
3047 if (!dev) {
3048 err = -ENODEV;
ab364a6f 3049 goto errout;
1da177e4 3050 }
72331bc0
SL
3051
3052 fl6.flowi6_iif = iif;
3053
3054 if (!ipv6_addr_any(&fl6.saddr))
3055 flags |= RT6_LOOKUP_F_HAS_SADDR;
3056
3057 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3058 flags);
3059 } else {
3060 fl6.flowi6_oif = oif;
3061
3062 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
1da177e4
LT
3063 }
3064
ab364a6f 3065 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 3066 if (!skb) {
94e187c0 3067 ip6_rt_put(rt);
ab364a6f
TG
3068 err = -ENOBUFS;
3069 goto errout;
3070 }
1da177e4 3071
ab364a6f
TG
3072 /* Reserve room for dummy headers, this skb can pass
3073 through good chunk of routing engine.
3074 */
459a98ed 3075 skb_reset_mac_header(skb);
ab364a6f 3076 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 3077
d8d1f30b 3078 skb_dst_set(skb, &rt->dst);
1da177e4 3079
4c9483b2 3080 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
15e47304 3081 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
7bc570c8 3082 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 3083 if (err < 0) {
ab364a6f
TG
3084 kfree_skb(skb);
3085 goto errout;
1da177e4
LT
3086 }
3087
15e47304 3088 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 3089errout:
1da177e4 3090 return err;
1da177e4
LT
3091}
3092
86872cb5 3093void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
3094{
3095 struct sk_buff *skb;
5578689a 3096 struct net *net = info->nl_net;
528c4ceb
DL
3097 u32 seq;
3098 int err;
3099
3100 err = -ENOBUFS;
38308473 3101 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 3102
19e42e45 3103 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 3104 if (!skb)
21713ebc
TG
3105 goto errout;
3106
191cd582 3107 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
15e47304 3108 event, info->portid, seq, 0, 0, 0);
26932566
PM
3109 if (err < 0) {
3110 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3111 WARN_ON(err == -EMSGSIZE);
3112 kfree_skb(skb);
3113 goto errout;
3114 }
15e47304 3115 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
3116 info->nlh, gfp_any());
3117 return;
21713ebc
TG
3118errout:
3119 if (err < 0)
5578689a 3120 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
3121}
3122
8ed67789 3123static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 3124 unsigned long event, void *ptr)
8ed67789 3125{
351638e7 3126 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 3127 struct net *net = dev_net(dev);
8ed67789
DL
3128
3129 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 3130 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
3131 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3132#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3133 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 3134 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 3135 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
3136 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3137#endif
3138 }
3139
3140 return NOTIFY_OK;
3141}
3142
1da177e4
LT
3143/*
3144 * /proc
3145 */
3146
3147#ifdef CONFIG_PROC_FS
3148
33120b30
AD
3149static const struct file_operations ipv6_route_proc_fops = {
3150 .owner = THIS_MODULE,
3151 .open = ipv6_route_open,
3152 .read = seq_read,
3153 .llseek = seq_lseek,
8d2ca1d7 3154 .release = seq_release_net,
33120b30
AD
3155};
3156
1da177e4
LT
3157static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3158{
69ddb805 3159 struct net *net = (struct net *)seq->private;
1da177e4 3160 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
3161 net->ipv6.rt6_stats->fib_nodes,
3162 net->ipv6.rt6_stats->fib_route_nodes,
3163 net->ipv6.rt6_stats->fib_rt_alloc,
3164 net->ipv6.rt6_stats->fib_rt_entries,
3165 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 3166 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 3167 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
3168
3169 return 0;
3170}
3171
3172static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3173{
de05c557 3174 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
3175}
3176
9a32144e 3177static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
3178 .owner = THIS_MODULE,
3179 .open = rt6_stats_seq_open,
3180 .read = seq_read,
3181 .llseek = seq_lseek,
b6fcbdb4 3182 .release = single_release_net,
1da177e4
LT
3183};
3184#endif /* CONFIG_PROC_FS */
3185
3186#ifdef CONFIG_SYSCTL
3187
1da177e4 3188static
fe2c6338 3189int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
3190 void __user *buffer, size_t *lenp, loff_t *ppos)
3191{
c486da34
LAG
3192 struct net *net;
3193 int delay;
3194 if (!write)
1da177e4 3195 return -EINVAL;
c486da34
LAG
3196
3197 net = (struct net *)ctl->extra1;
3198 delay = net->ipv6.sysctl.flush_delay;
3199 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 3200 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 3201 return 0;
1da177e4
LT
3202}
3203
fe2c6338 3204struct ctl_table ipv6_route_table_template[] = {
1ab1457c 3205 {
1da177e4 3206 .procname = "flush",
4990509f 3207 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 3208 .maxlen = sizeof(int),
89c8b3a1 3209 .mode = 0200,
6d9f239a 3210 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
3211 },
3212 {
1da177e4 3213 .procname = "gc_thresh",
9a7ec3a9 3214 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
3215 .maxlen = sizeof(int),
3216 .mode = 0644,
6d9f239a 3217 .proc_handler = proc_dointvec,
1da177e4
LT
3218 },
3219 {
1da177e4 3220 .procname = "max_size",
4990509f 3221 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
3222 .maxlen = sizeof(int),
3223 .mode = 0644,
6d9f239a 3224 .proc_handler = proc_dointvec,
1da177e4
LT
3225 },
3226 {
1da177e4 3227 .procname = "gc_min_interval",
4990509f 3228 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
3229 .maxlen = sizeof(int),
3230 .mode = 0644,
6d9f239a 3231 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3232 },
3233 {
1da177e4 3234 .procname = "gc_timeout",
4990509f 3235 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
3236 .maxlen = sizeof(int),
3237 .mode = 0644,
6d9f239a 3238 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3239 },
3240 {
1da177e4 3241 .procname = "gc_interval",
4990509f 3242 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
3243 .maxlen = sizeof(int),
3244 .mode = 0644,
6d9f239a 3245 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3246 },
3247 {
1da177e4 3248 .procname = "gc_elasticity",
4990509f 3249 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
3250 .maxlen = sizeof(int),
3251 .mode = 0644,
f3d3f616 3252 .proc_handler = proc_dointvec,
1da177e4
LT
3253 },
3254 {
1da177e4 3255 .procname = "mtu_expires",
4990509f 3256 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
3257 .maxlen = sizeof(int),
3258 .mode = 0644,
6d9f239a 3259 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3260 },
3261 {
1da177e4 3262 .procname = "min_adv_mss",
4990509f 3263 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
3264 .maxlen = sizeof(int),
3265 .mode = 0644,
f3d3f616 3266 .proc_handler = proc_dointvec,
1da177e4
LT
3267 },
3268 {
1da177e4 3269 .procname = "gc_min_interval_ms",
4990509f 3270 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
3271 .maxlen = sizeof(int),
3272 .mode = 0644,
6d9f239a 3273 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 3274 },
f8572d8f 3275 { }
1da177e4
LT
3276};
3277
2c8c1e72 3278struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
3279{
3280 struct ctl_table *table;
3281
3282 table = kmemdup(ipv6_route_table_template,
3283 sizeof(ipv6_route_table_template),
3284 GFP_KERNEL);
5ee09105
YH
3285
3286 if (table) {
3287 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 3288 table[0].extra1 = net;
86393e52 3289 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
3290 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3291 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3292 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3293 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3294 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3295 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3296 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 3297 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
3298
3299 /* Don't export sysctls to unprivileged users */
3300 if (net->user_ns != &init_user_ns)
3301 table[0].procname = NULL;
5ee09105
YH
3302 }
3303
760f2d01
DL
3304 return table;
3305}
1da177e4
LT
3306#endif
3307
2c8c1e72 3308static int __net_init ip6_route_net_init(struct net *net)
cdb18761 3309{
633d424b 3310 int ret = -ENOMEM;
8ed67789 3311
86393e52
AD
3312 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3313 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 3314
fc66f95c
ED
3315 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3316 goto out_ip6_dst_ops;
3317
8ed67789
DL
3318 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3319 sizeof(*net->ipv6.ip6_null_entry),
3320 GFP_KERNEL);
3321 if (!net->ipv6.ip6_null_entry)
fc66f95c 3322 goto out_ip6_dst_entries;
d8d1f30b 3323 net->ipv6.ip6_null_entry->dst.path =
8ed67789 3324 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 3325 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3326 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3327 ip6_template_metrics, true);
8ed67789
DL
3328
3329#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3330 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3331 sizeof(*net->ipv6.ip6_prohibit_entry),
3332 GFP_KERNEL);
68fffc67
PZ
3333 if (!net->ipv6.ip6_prohibit_entry)
3334 goto out_ip6_null_entry;
d8d1f30b 3335 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 3336 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 3337 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3338 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3339 ip6_template_metrics, true);
8ed67789
DL
3340
3341 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3342 sizeof(*net->ipv6.ip6_blk_hole_entry),
3343 GFP_KERNEL);
68fffc67
PZ
3344 if (!net->ipv6.ip6_blk_hole_entry)
3345 goto out_ip6_prohibit_entry;
d8d1f30b 3346 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 3347 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 3348 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3349 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3350 ip6_template_metrics, true);
8ed67789
DL
3351#endif
3352
b339a47c
PZ
3353 net->ipv6.sysctl.flush_delay = 0;
3354 net->ipv6.sysctl.ip6_rt_max_size = 4096;
3355 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3356 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3357 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3358 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3359 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3360 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3361
6891a346
BT
3362 net->ipv6.ip6_rt_gc_expire = 30*HZ;
3363
8ed67789
DL
3364 ret = 0;
3365out:
3366 return ret;
f2fc6a54 3367
68fffc67
PZ
3368#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3369out_ip6_prohibit_entry:
3370 kfree(net->ipv6.ip6_prohibit_entry);
3371out_ip6_null_entry:
3372 kfree(net->ipv6.ip6_null_entry);
3373#endif
fc66f95c
ED
3374out_ip6_dst_entries:
3375 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 3376out_ip6_dst_ops:
f2fc6a54 3377 goto out;
cdb18761
DL
3378}
3379
2c8c1e72 3380static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 3381{
8ed67789
DL
3382 kfree(net->ipv6.ip6_null_entry);
3383#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3384 kfree(net->ipv6.ip6_prohibit_entry);
3385 kfree(net->ipv6.ip6_blk_hole_entry);
3386#endif
41bb78b4 3387 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
3388}
3389
d189634e
TG
3390static int __net_init ip6_route_net_init_late(struct net *net)
3391{
3392#ifdef CONFIG_PROC_FS
d4beaa66
G
3393 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3394 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
d189634e
TG
3395#endif
3396 return 0;
3397}
3398
3399static void __net_exit ip6_route_net_exit_late(struct net *net)
3400{
3401#ifdef CONFIG_PROC_FS
ece31ffd
G
3402 remove_proc_entry("ipv6_route", net->proc_net);
3403 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
3404#endif
3405}
3406
cdb18761
DL
3407static struct pernet_operations ip6_route_net_ops = {
3408 .init = ip6_route_net_init,
3409 .exit = ip6_route_net_exit,
3410};
3411
c3426b47
DM
3412static int __net_init ipv6_inetpeer_init(struct net *net)
3413{
3414 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3415
3416 if (!bp)
3417 return -ENOMEM;
3418 inet_peer_base_init(bp);
3419 net->ipv6.peers = bp;
3420 return 0;
3421}
3422
3423static void __net_exit ipv6_inetpeer_exit(struct net *net)
3424{
3425 struct inet_peer_base *bp = net->ipv6.peers;
3426
3427 net->ipv6.peers = NULL;
56a6b248 3428 inetpeer_invalidate_tree(bp);
c3426b47
DM
3429 kfree(bp);
3430}
3431
2b823f72 3432static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
3433 .init = ipv6_inetpeer_init,
3434 .exit = ipv6_inetpeer_exit,
3435};
3436
d189634e
TG
3437static struct pernet_operations ip6_route_net_late_ops = {
3438 .init = ip6_route_net_init_late,
3439 .exit = ip6_route_net_exit_late,
3440};
3441
8ed67789
DL
3442static struct notifier_block ip6_route_dev_notifier = {
3443 .notifier_call = ip6_route_dev_notify,
3444 .priority = 0,
3445};
3446
433d49c3 3447int __init ip6_route_init(void)
1da177e4 3448{
433d49c3 3449 int ret;
8d0b94af 3450 int cpu;
433d49c3 3451
9a7ec3a9
DL
3452 ret = -ENOMEM;
3453 ip6_dst_ops_template.kmem_cachep =
e5d679f3 3454 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 3455 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 3456 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 3457 goto out;
14e50e57 3458
fc66f95c 3459 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 3460 if (ret)
bdb3289f 3461 goto out_kmem_cache;
bdb3289f 3462
c3426b47
DM
3463 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3464 if (ret)
e8803b6c 3465 goto out_dst_entries;
2a0c451a 3466
7e52b33b
DM
3467 ret = register_pernet_subsys(&ip6_route_net_ops);
3468 if (ret)
3469 goto out_register_inetpeer;
c3426b47 3470
5dc121e9
AE
3471 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3472
8ed67789
DL
3473 /* Registering of the loopback is done before this portion of code,
3474 * the loopback reference in rt6_info will not be taken, do it
3475 * manually for init_net */
d8d1f30b 3476 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3477 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3478 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3479 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 3480 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 3481 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3482 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3483 #endif
e8803b6c 3484 ret = fib6_init();
433d49c3 3485 if (ret)
8ed67789 3486 goto out_register_subsys;
433d49c3 3487
433d49c3
DL
3488 ret = xfrm6_init();
3489 if (ret)
e8803b6c 3490 goto out_fib6_init;
c35b7e72 3491
433d49c3
DL
3492 ret = fib6_rules_init();
3493 if (ret)
3494 goto xfrm6_init;
7e5449c2 3495
d189634e
TG
3496 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3497 if (ret)
3498 goto fib6_rules_init;
3499
433d49c3 3500 ret = -ENOBUFS;
c7ac8679
GR
3501 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3502 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3503 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
d189634e 3504 goto out_register_late_subsys;
c127ea2c 3505
8ed67789 3506 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 3507 if (ret)
d189634e 3508 goto out_register_late_subsys;
8ed67789 3509
8d0b94af
MKL
3510 for_each_possible_cpu(cpu) {
3511 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3512
3513 INIT_LIST_HEAD(&ul->head);
3514 spin_lock_init(&ul->lock);
3515 }
3516
433d49c3
DL
3517out:
3518 return ret;
3519
d189634e
TG
3520out_register_late_subsys:
3521 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 3522fib6_rules_init:
433d49c3
DL
3523 fib6_rules_cleanup();
3524xfrm6_init:
433d49c3 3525 xfrm6_fini();
2a0c451a
TG
3526out_fib6_init:
3527 fib6_gc_cleanup();
8ed67789
DL
3528out_register_subsys:
3529 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
3530out_register_inetpeer:
3531 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
3532out_dst_entries:
3533 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3534out_kmem_cache:
f2fc6a54 3535 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3536 goto out;
1da177e4
LT
3537}
3538
3539void ip6_route_cleanup(void)
3540{
8ed67789 3541 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 3542 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 3543 fib6_rules_cleanup();
1da177e4 3544 xfrm6_fini();
1da177e4 3545 fib6_gc_cleanup();
c3426b47 3546 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 3547 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3548 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3549 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3550}
This page took 1.188823 seconds and 5 git commands to generate.