Merge tag 'gpio-v4.6-4' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux...
[deliverable/linux.git] / net / ipv4 / ip_tunnel.c
CommitLineData
c5441932
PS
1/*
2 * Copyright (c) 2013 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
17 */
18
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21#include <linux/capability.h>
22#include <linux/module.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/slab.h>
26#include <linux/uaccess.h>
27#include <linux/skbuff.h>
28#include <linux/netdevice.h>
29#include <linux/in.h>
30#include <linux/tcp.h>
31#include <linux/udp.h>
32#include <linux/if_arp.h>
c5441932
PS
33#include <linux/init.h>
34#include <linux/in6.h>
35#include <linux/inetdevice.h>
36#include <linux/igmp.h>
37#include <linux/netfilter_ipv4.h>
38#include <linux/etherdevice.h>
39#include <linux/if_ether.h>
40#include <linux/if_vlan.h>
41#include <linux/rculist.h>
27d79f3b 42#include <linux/err.h>
c5441932
PS
43
44#include <net/sock.h>
45#include <net/ip.h>
46#include <net/icmp.h>
47#include <net/protocol.h>
48#include <net/ip_tunnels.h>
49#include <net/arp.h>
50#include <net/checksum.h>
51#include <net/dsfield.h>
52#include <net/inet_ecn.h>
53#include <net/xfrm.h>
54#include <net/net_namespace.h>
55#include <net/netns/generic.h>
56#include <net/rtnetlink.h>
56328486 57#include <net/udp.h>
63487bab 58
c5441932
PS
59#if IS_ENABLED(CONFIG_IPV6)
60#include <net/ipv6.h>
61#include <net/ip6_fib.h>
62#include <net/ip6_route.h>
63#endif
64
967680e0 65static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
c5441932
PS
66{
67 return hash_32((__force u32)key ^ (__force u32)remote,
68 IP_TNL_HASH_BITS);
69}
70
c5441932
PS
71static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
72 __be16 flags, __be32 key)
73{
74 if (p->i_flags & TUNNEL_KEY) {
75 if (flags & TUNNEL_KEY)
76 return key == p->i_key;
77 else
78 /* key expected, none present */
79 return false;
80 } else
81 return !(flags & TUNNEL_KEY);
82}
83
84/* Fallback tunnel: no source, no destination, no key, no options
85
86 Tunnel hash table:
87 We require exact key match i.e. if a key is present in packet
88 it will match only tunnel with the same key; if it is not present,
89 it will match only keyless tunnel.
90
91 All keysless packets, if not matched configured keyless tunnels
92 will match fallback tunnel.
93 Given src, dst and key, find appropriate for input tunnel.
94*/
95struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
96 int link, __be16 flags,
97 __be32 remote, __be32 local,
98 __be32 key)
99{
100 unsigned int hash;
101 struct ip_tunnel *t, *cand = NULL;
102 struct hlist_head *head;
103
967680e0 104 hash = ip_tunnel_hash(key, remote);
c5441932
PS
105 head = &itn->tunnels[hash];
106
107 hlist_for_each_entry_rcu(t, head, hash_node) {
108 if (local != t->parms.iph.saddr ||
109 remote != t->parms.iph.daddr ||
110 !(t->dev->flags & IFF_UP))
111 continue;
112
113 if (!ip_tunnel_key_match(&t->parms, flags, key))
114 continue;
115
116 if (t->parms.link == link)
117 return t;
118 else
119 cand = t;
120 }
121
122 hlist_for_each_entry_rcu(t, head, hash_node) {
123 if (remote != t->parms.iph.daddr ||
e0056593 124 t->parms.iph.saddr != 0 ||
c5441932
PS
125 !(t->dev->flags & IFF_UP))
126 continue;
127
128 if (!ip_tunnel_key_match(&t->parms, flags, key))
129 continue;
130
131 if (t->parms.link == link)
132 return t;
133 else if (!cand)
134 cand = t;
135 }
136
967680e0 137 hash = ip_tunnel_hash(key, 0);
c5441932
PS
138 head = &itn->tunnels[hash];
139
140 hlist_for_each_entry_rcu(t, head, hash_node) {
e0056593
DP
141 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
142 (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
143 continue;
144
145 if (!(t->dev->flags & IFF_UP))
c5441932
PS
146 continue;
147
148 if (!ip_tunnel_key_match(&t->parms, flags, key))
149 continue;
150
151 if (t->parms.link == link)
152 return t;
153 else if (!cand)
154 cand = t;
155 }
156
157 if (flags & TUNNEL_NO_KEY)
158 goto skip_key_lookup;
159
160 hlist_for_each_entry_rcu(t, head, hash_node) {
161 if (t->parms.i_key != key ||
e0056593
DP
162 t->parms.iph.saddr != 0 ||
163 t->parms.iph.daddr != 0 ||
c5441932
PS
164 !(t->dev->flags & IFF_UP))
165 continue;
166
167 if (t->parms.link == link)
168 return t;
169 else if (!cand)
170 cand = t;
171 }
172
173skip_key_lookup:
174 if (cand)
175 return cand;
176
2e15ea39
PS
177 t = rcu_dereference(itn->collect_md_tun);
178 if (t)
179 return t;
180
c5441932
PS
181 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
182 return netdev_priv(itn->fb_tunnel_dev);
183
c5441932
PS
184 return NULL;
185}
186EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
187
188static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
189 struct ip_tunnel_parm *parms)
190{
191 unsigned int h;
192 __be32 remote;
6d608f06 193 __be32 i_key = parms->i_key;
c5441932
PS
194
195 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
196 remote = parms->iph.daddr;
197 else
198 remote = 0;
199
6d608f06
SK
200 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
201 i_key = 0;
202
203 h = ip_tunnel_hash(i_key, remote);
c5441932
PS
204 return &itn->tunnels[h];
205}
206
207static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
208{
209 struct hlist_head *head = ip_bucket(itn, &t->parms);
210
2e15ea39
PS
211 if (t->collect_md)
212 rcu_assign_pointer(itn->collect_md_tun, t);
c5441932
PS
213 hlist_add_head_rcu(&t->hash_node, head);
214}
215
2e15ea39 216static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
c5441932 217{
2e15ea39
PS
218 if (t->collect_md)
219 rcu_assign_pointer(itn->collect_md_tun, NULL);
c5441932
PS
220 hlist_del_init_rcu(&t->hash_node);
221}
222
223static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
224 struct ip_tunnel_parm *parms,
225 int type)
226{
227 __be32 remote = parms->iph.daddr;
228 __be32 local = parms->iph.saddr;
229 __be32 key = parms->i_key;
5ce54af1 230 __be16 flags = parms->i_flags;
c5441932
PS
231 int link = parms->link;
232 struct ip_tunnel *t = NULL;
233 struct hlist_head *head = ip_bucket(itn, parms);
234
235 hlist_for_each_entry_rcu(t, head, hash_node) {
236 if (local == t->parms.iph.saddr &&
237 remote == t->parms.iph.daddr &&
c5441932 238 link == t->parms.link &&
5ce54af1
DP
239 type == t->dev->type &&
240 ip_tunnel_key_match(&t->parms, flags, key))
c5441932
PS
241 break;
242 }
243 return t;
244}
245
246static struct net_device *__ip_tunnel_create(struct net *net,
247 const struct rtnl_link_ops *ops,
248 struct ip_tunnel_parm *parms)
249{
250 int err;
251 struct ip_tunnel *tunnel;
252 struct net_device *dev;
253 char name[IFNAMSIZ];
254
255 if (parms->name[0])
256 strlcpy(name, parms->name, IFNAMSIZ);
257 else {
54a5d382 258 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
c5441932
PS
259 err = -E2BIG;
260 goto failed;
261 }
262 strlcpy(name, ops->kind, IFNAMSIZ);
263 strncat(name, "%d", 2);
264 }
265
266 ASSERT_RTNL();
c835a677 267 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
c5441932
PS
268 if (!dev) {
269 err = -ENOMEM;
270 goto failed;
271 }
272 dev_net_set(dev, net);
273
274 dev->rtnl_link_ops = ops;
275
276 tunnel = netdev_priv(dev);
277 tunnel->parms = *parms;
5e6700b3 278 tunnel->net = net;
c5441932
PS
279
280 err = register_netdevice(dev);
281 if (err)
282 goto failed_free;
283
284 return dev;
285
286failed_free:
287 free_netdev(dev);
288failed:
289 return ERR_PTR(err);
290}
291
7d442fab
TH
292static inline void init_tunnel_flow(struct flowi4 *fl4,
293 int proto,
294 __be32 daddr, __be32 saddr,
295 __be32 key, __u8 tos, int oif)
c5441932
PS
296{
297 memset(fl4, 0, sizeof(*fl4));
298 fl4->flowi4_oif = oif;
299 fl4->daddr = daddr;
300 fl4->saddr = saddr;
301 fl4->flowi4_tos = tos;
302 fl4->flowi4_proto = proto;
303 fl4->fl4_gre_key = key;
c5441932
PS
304}
305
306static int ip_tunnel_bind_dev(struct net_device *dev)
307{
308 struct net_device *tdev = NULL;
309 struct ip_tunnel *tunnel = netdev_priv(dev);
310 const struct iphdr *iph;
311 int hlen = LL_MAX_HEADER;
312 int mtu = ETH_DATA_LEN;
313 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
314
315 iph = &tunnel->parms.iph;
316
317 /* Guess output device to choose reasonable mtu and needed_headroom */
318 if (iph->daddr) {
319 struct flowi4 fl4;
320 struct rtable *rt;
321
7d442fab
TH
322 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
323 iph->saddr, tunnel->parms.o_key,
324 RT_TOS(iph->tos), tunnel->parms.link);
325 rt = ip_route_output_key(tunnel->net, &fl4);
326
c5441932
PS
327 if (!IS_ERR(rt)) {
328 tdev = rt->dst.dev;
329 ip_rt_put(rt);
330 }
331 if (dev->type != ARPHRD_ETHER)
332 dev->flags |= IFF_POINTOPOINT;
f27337e1
PA
333
334 dst_cache_reset(&tunnel->dst_cache);
c5441932
PS
335 }
336
337 if (!tdev && tunnel->parms.link)
6c742e71 338 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
c5441932
PS
339
340 if (tdev) {
341 hlen = tdev->hard_header_len + tdev->needed_headroom;
342 mtu = tdev->mtu;
343 }
c5441932
PS
344
345 dev->needed_headroom = t_hlen + hlen;
346 mtu -= (dev->hard_header_len + t_hlen);
347
348 if (mtu < 68)
349 mtu = 68;
350
351 return mtu;
352}
353
354static struct ip_tunnel *ip_tunnel_create(struct net *net,
355 struct ip_tunnel_net *itn,
356 struct ip_tunnel_parm *parms)
357{
4929fd8c 358 struct ip_tunnel *nt;
c5441932
PS
359 struct net_device *dev;
360
361 BUG_ON(!itn->fb_tunnel_dev);
c5441932
PS
362 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
363 if (IS_ERR(dev))
6dd3c9ec 364 return ERR_CAST(dev);
c5441932
PS
365
366 dev->mtu = ip_tunnel_bind_dev(dev);
367
368 nt = netdev_priv(dev);
369 ip_tunnel_add(itn, nt);
370 return nt;
371}
372
373int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
2e15ea39
PS
374 const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
375 bool log_ecn_error)
c5441932 376{
8f84985f 377 struct pcpu_sw_netstats *tstats;
c5441932
PS
378 const struct iphdr *iph = ip_hdr(skb);
379 int err;
380
c5441932
PS
381#ifdef CONFIG_NET_IPGRE_BROADCAST
382 if (ipv4_is_multicast(iph->daddr)) {
c5441932
PS
383 tunnel->dev->stats.multicast++;
384 skb->pkt_type = PACKET_BROADCAST;
385 }
386#endif
387
388 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
389 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
390 tunnel->dev->stats.rx_crc_errors++;
391 tunnel->dev->stats.rx_errors++;
392 goto drop;
393 }
394
395 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
396 if (!(tpi->flags&TUNNEL_SEQ) ||
397 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
398 tunnel->dev->stats.rx_fifo_errors++;
399 tunnel->dev->stats.rx_errors++;
400 goto drop;
401 }
402 tunnel->i_seqno = ntohl(tpi->seq) + 1;
403 }
404
e96f2e7c
YC
405 skb_reset_network_header(skb);
406
c5441932
PS
407 err = IP_ECN_decapsulate(iph, skb);
408 if (unlikely(err)) {
409 if (log_ecn_error)
410 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
411 &iph->saddr, iph->tos);
412 if (err > 1) {
413 ++tunnel->dev->stats.rx_frame_errors;
414 ++tunnel->dev->stats.rx_errors;
415 goto drop;
416 }
417 }
418
419 tstats = this_cpu_ptr(tunnel->dev->tstats);
420 u64_stats_update_begin(&tstats->syncp);
421 tstats->rx_packets++;
422 tstats->rx_bytes += skb->len;
423 u64_stats_update_end(&tstats->syncp);
424
81b9eab5
AS
425 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
426
3d7b46cd
PS
427 if (tunnel->dev->type == ARPHRD_ETHER) {
428 skb->protocol = eth_type_trans(skb, tunnel->dev);
429 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
430 } else {
431 skb->dev = tunnel->dev;
432 }
64261f23 433
2e15ea39
PS
434 if (tun_dst)
435 skb_dst_set(skb, (struct dst_entry *)tun_dst);
436
c5441932
PS
437 gro_cells_receive(&tunnel->gro_cells, skb);
438 return 0;
439
440drop:
441 kfree_skb(skb);
442 return 0;
443}
444EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
445
56328486
TH
446static int ip_encap_hlen(struct ip_tunnel_encap *e)
447{
a8c5f90f
TH
448 const struct ip_tunnel_encap_ops *ops;
449 int hlen = -EINVAL;
450
451 if (e->type == TUNNEL_ENCAP_NONE)
56328486 452 return 0;
a8c5f90f
TH
453
454 if (e->type >= MAX_IPTUN_ENCAP_OPS)
56328486 455 return -EINVAL;
a8c5f90f
TH
456
457 rcu_read_lock();
458 ops = rcu_dereference(iptun_encaps[e->type]);
459 if (likely(ops && ops->encap_hlen))
460 hlen = ops->encap_hlen(e);
461 rcu_read_unlock();
462
463 return hlen;
464}
465
466const struct ip_tunnel_encap_ops __rcu *
467 iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly;
468
469int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
470 unsigned int num)
471{
bb1553c8
TG
472 if (num >= MAX_IPTUN_ENCAP_OPS)
473 return -ERANGE;
474
a8c5f90f
TH
475 return !cmpxchg((const struct ip_tunnel_encap_ops **)
476 &iptun_encaps[num],
477 NULL, ops) ? 0 : -1;
56328486 478}
a8c5f90f
TH
479EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
480
481int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
482 unsigned int num)
483{
484 int ret;
485
bb1553c8
TG
486 if (num >= MAX_IPTUN_ENCAP_OPS)
487 return -ERANGE;
488
a8c5f90f
TH
489 ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
490 &iptun_encaps[num],
491 ops, NULL) == ops) ? 0 : -1;
492
493 synchronize_net();
494
495 return ret;
496}
497EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
56328486
TH
498
499int ip_tunnel_encap_setup(struct ip_tunnel *t,
500 struct ip_tunnel_encap *ipencap)
501{
502 int hlen;
503
504 memset(&t->encap, 0, sizeof(t->encap));
505
506 hlen = ip_encap_hlen(ipencap);
507 if (hlen < 0)
508 return hlen;
509
510 t->encap.type = ipencap->type;
511 t->encap.sport = ipencap->sport;
512 t->encap.dport = ipencap->dport;
513 t->encap.flags = ipencap->flags;
514
515 t->encap_hlen = hlen;
516 t->hlen = t->encap_hlen + t->tun_hlen;
517
518 return 0;
519}
520EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
521
56328486
TH
522int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
523 u8 *protocol, struct flowi4 *fl4)
524{
a8c5f90f
TH
525 const struct ip_tunnel_encap_ops *ops;
526 int ret = -EINVAL;
527
528 if (t->encap.type == TUNNEL_ENCAP_NONE)
56328486 529 return 0;
a8c5f90f 530
f1fb521f
TG
531 if (t->encap.type >= MAX_IPTUN_ENCAP_OPS)
532 return -EINVAL;
533
a8c5f90f
TH
534 rcu_read_lock();
535 ops = rcu_dereference(iptun_encaps[t->encap.type]);
536 if (likely(ops && ops->build_header))
537 ret = ops->build_header(skb, &t->encap, protocol, fl4);
538 rcu_read_unlock();
539
540 return ret;
56328486
TH
541}
542EXPORT_SYMBOL(ip_tunnel_encap);
543
23a3647b 544static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
fc24f2b2
TT
545 struct rtable *rt, __be16 df,
546 const struct iphdr *inner_iph)
23a3647b
PS
547{
548 struct ip_tunnel *tunnel = netdev_priv(dev);
8c91e162 549 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
23a3647b
PS
550 int mtu;
551
552 if (df)
553 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
554 - sizeof(struct iphdr) - tunnel->hlen;
555 else
556 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
557
558 if (skb_dst(skb))
559 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
560
561 if (skb->protocol == htons(ETH_P_IP)) {
562 if (!skb_is_gso(skb) &&
fc24f2b2
TT
563 (inner_iph->frag_off & htons(IP_DF)) &&
564 mtu < pkt_size) {
23a3647b
PS
565 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
566 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
567 return -E2BIG;
568 }
569 }
570#if IS_ENABLED(CONFIG_IPV6)
571 else if (skb->protocol == htons(ETH_P_IPV6)) {
572 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
573
574 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
575 mtu >= IPV6_MIN_MTU) {
576 if ((tunnel->parms.iph.daddr &&
577 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
578 rt6->rt6i_dst.plen == 128) {
579 rt6->rt6i_flags |= RTF_MODIFIED;
580 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
581 }
582 }
583
584 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
585 mtu < pkt_size) {
586 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
587 return -E2BIG;
588 }
589 }
590#endif
591 return 0;
592}
593
c5441932 594void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
56328486 595 const struct iphdr *tnl_params, u8 protocol)
c5441932
PS
596{
597 struct ip_tunnel *tunnel = netdev_priv(dev);
598 const struct iphdr *inner_iph;
c5441932
PS
599 struct flowi4 fl4;
600 u8 tos, ttl;
601 __be16 df;
b045d37b 602 struct rtable *rt; /* Route to the other host */
c5441932
PS
603 unsigned int max_headroom; /* The extra header space needed */
604 __be32 dst;
22fb22ea 605 bool connected;
c5441932
PS
606
607 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
22fb22ea 608 connected = (tunnel->parms.iph.daddr != 0);
c5441932 609
5146d1f1
BH
610 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
611
c5441932
PS
612 dst = tnl_params->daddr;
613 if (dst == 0) {
614 /* NBMA tunnel */
615
51456b29 616 if (!skb_dst(skb)) {
c5441932
PS
617 dev->stats.tx_fifo_errors++;
618 goto tx_error;
619 }
620
621 if (skb->protocol == htons(ETH_P_IP)) {
622 rt = skb_rtable(skb);
623 dst = rt_nexthop(rt, inner_iph->daddr);
624 }
625#if IS_ENABLED(CONFIG_IPV6)
626 else if (skb->protocol == htons(ETH_P_IPV6)) {
627 const struct in6_addr *addr6;
628 struct neighbour *neigh;
629 bool do_tx_error_icmp;
630 int addr_type;
631
632 neigh = dst_neigh_lookup(skb_dst(skb),
633 &ipv6_hdr(skb)->daddr);
51456b29 634 if (!neigh)
c5441932
PS
635 goto tx_error;
636
637 addr6 = (const struct in6_addr *)&neigh->primary_key;
638 addr_type = ipv6_addr_type(addr6);
639
640 if (addr_type == IPV6_ADDR_ANY) {
641 addr6 = &ipv6_hdr(skb)->daddr;
642 addr_type = ipv6_addr_type(addr6);
643 }
644
645 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
646 do_tx_error_icmp = true;
647 else {
648 do_tx_error_icmp = false;
649 dst = addr6->s6_addr32[3];
650 }
651 neigh_release(neigh);
652 if (do_tx_error_icmp)
653 goto tx_error_icmp;
654 }
655#endif
656 else
657 goto tx_error;
7d442fab
TH
658
659 connected = false;
c5441932
PS
660 }
661
662 tos = tnl_params->tos;
663 if (tos & 0x1) {
664 tos &= ~0x1;
7d442fab 665 if (skb->protocol == htons(ETH_P_IP)) {
c5441932 666 tos = inner_iph->tos;
7d442fab
TH
667 connected = false;
668 } else if (skb->protocol == htons(ETH_P_IPV6)) {
c5441932 669 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
7d442fab
TH
670 connected = false;
671 }
c5441932
PS
672 }
673
7d442fab
TH
674 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
675 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
676
56328486
TH
677 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
678 goto tx_error;
679
e09acddf
PA
680 rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr) :
681 NULL;
7d442fab
TH
682
683 if (!rt) {
684 rt = ip_route_output_key(tunnel->net, &fl4);
685
686 if (IS_ERR(rt)) {
687 dev->stats.tx_carrier_errors++;
688 goto tx_error;
689 }
690 if (connected)
e09acddf
PA
691 dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
692 fl4.saddr);
c5441932 693 }
7d442fab 694
0e6fbc5b 695 if (rt->dst.dev == dev) {
c5441932
PS
696 ip_rt_put(rt);
697 dev->stats.collisions++;
698 goto tx_error;
699 }
c5441932 700
fc24f2b2 701 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) {
23a3647b
PS
702 ip_rt_put(rt);
703 goto tx_error;
c5441932 704 }
c5441932
PS
705
706 if (tunnel->err_count > 0) {
707 if (time_before(jiffies,
708 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
709 tunnel->err_count--;
710
711 dst_link_failure(skb);
712 } else
713 tunnel->err_count = 0;
714 }
715
d4a71b15 716 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
c5441932
PS
717 ttl = tnl_params->ttl;
718 if (ttl == 0) {
719 if (skb->protocol == htons(ETH_P_IP))
720 ttl = inner_iph->ttl;
721#if IS_ENABLED(CONFIG_IPV6)
722 else if (skb->protocol == htons(ETH_P_IPV6))
723 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
724#endif
725 else
726 ttl = ip4_dst_hoplimit(&rt->dst);
727 }
728
23a3647b
PS
729 df = tnl_params->frag_off;
730 if (skb->protocol == htons(ETH_P_IP))
731 df |= (inner_iph->frag_off&htons(IP_DF));
732
0e6fbc5b 733 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
7371e022 734 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
3e08f4a7 735 if (max_headroom > dev->needed_headroom)
c5441932 736 dev->needed_headroom = max_headroom;
3e08f4a7
SK
737
738 if (skb_cow_head(skb, dev->needed_headroom)) {
586d5fc8 739 ip_rt_put(rt);
3e08f4a7 740 dev->stats.tx_dropped++;
3acfa1e7 741 kfree_skb(skb);
3e08f4a7 742 return;
c5441932
PS
743 }
744
039f5062
PS
745 iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
746 df, !net_eq(tunnel->net, dev_net(dev)));
c5441932
PS
747 return;
748
749#if IS_ENABLED(CONFIG_IPV6)
750tx_error_icmp:
751 dst_link_failure(skb);
752#endif
753tx_error:
754 dev->stats.tx_errors++;
3acfa1e7 755 kfree_skb(skb);
c5441932
PS
756}
757EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
758
759static void ip_tunnel_update(struct ip_tunnel_net *itn,
760 struct ip_tunnel *t,
761 struct net_device *dev,
762 struct ip_tunnel_parm *p,
763 bool set_mtu)
764{
2e15ea39 765 ip_tunnel_del(itn, t);
c5441932
PS
766 t->parms.iph.saddr = p->iph.saddr;
767 t->parms.iph.daddr = p->iph.daddr;
768 t->parms.i_key = p->i_key;
769 t->parms.o_key = p->o_key;
770 if (dev->type != ARPHRD_ETHER) {
771 memcpy(dev->dev_addr, &p->iph.saddr, 4);
772 memcpy(dev->broadcast, &p->iph.daddr, 4);
773 }
774 ip_tunnel_add(itn, t);
775
776 t->parms.iph.ttl = p->iph.ttl;
777 t->parms.iph.tos = p->iph.tos;
778 t->parms.iph.frag_off = p->iph.frag_off;
779
780 if (t->parms.link != p->link) {
781 int mtu;
782
783 t->parms.link = p->link;
784 mtu = ip_tunnel_bind_dev(dev);
785 if (set_mtu)
786 dev->mtu = mtu;
787 }
e09acddf 788 dst_cache_reset(&t->dst_cache);
c5441932
PS
789 netdev_state_change(dev);
790}
791
792int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
793{
794 int err = 0;
8c923ce2
ND
795 struct ip_tunnel *t = netdev_priv(dev);
796 struct net *net = t->net;
797 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
c5441932
PS
798
799 BUG_ON(!itn->fb_tunnel_dev);
800 switch (cmd) {
801 case SIOCGETTUNNEL:
8c923ce2 802 if (dev == itn->fb_tunnel_dev) {
c5441932 803 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
51456b29 804 if (!t)
8c923ce2
ND
805 t = netdev_priv(dev);
806 }
c5441932
PS
807 memcpy(p, &t->parms, sizeof(*p));
808 break;
809
810 case SIOCADDTUNNEL:
811 case SIOCCHGTUNNEL:
812 err = -EPERM;
813 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
814 goto done;
815 if (p->iph.ttl)
816 p->iph.frag_off |= htons(IP_DF);
7c8e6b9c
DP
817 if (!(p->i_flags & VTI_ISVTI)) {
818 if (!(p->i_flags & TUNNEL_KEY))
819 p->i_key = 0;
820 if (!(p->o_flags & TUNNEL_KEY))
821 p->o_key = 0;
822 }
c5441932
PS
823
824 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
825
d61746b2
SK
826 if (cmd == SIOCADDTUNNEL) {
827 if (!t) {
828 t = ip_tunnel_create(net, itn, p);
829 err = PTR_ERR_OR_ZERO(t);
830 break;
831 }
832
833 err = -EEXIST;
ee30ef4d 834 break;
6dd3c9ec 835 }
c5441932 836 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
00db4124 837 if (t) {
c5441932
PS
838 if (t->dev != dev) {
839 err = -EEXIST;
840 break;
841 }
842 } else {
843 unsigned int nflags = 0;
844
845 if (ipv4_is_multicast(p->iph.daddr))
846 nflags = IFF_BROADCAST;
847 else if (p->iph.daddr)
848 nflags = IFF_POINTOPOINT;
849
850 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
851 err = -EINVAL;
852 break;
853 }
854
855 t = netdev_priv(dev);
856 }
857 }
858
859 if (t) {
860 err = 0;
861 ip_tunnel_update(itn, t, dev, p, true);
6dd3c9ec
FW
862 } else {
863 err = -ENOENT;
864 }
c5441932
PS
865 break;
866
867 case SIOCDELTUNNEL:
868 err = -EPERM;
869 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
870 goto done;
871
872 if (dev == itn->fb_tunnel_dev) {
873 err = -ENOENT;
874 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
51456b29 875 if (!t)
c5441932
PS
876 goto done;
877 err = -EPERM;
878 if (t == netdev_priv(itn->fb_tunnel_dev))
879 goto done;
880 dev = t->dev;
881 }
882 unregister_netdevice(dev);
883 err = 0;
884 break;
885
886 default:
887 err = -EINVAL;
888 }
889
890done:
891 return err;
892}
893EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
894
7e059158 895int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
c5441932
PS
896{
897 struct ip_tunnel *tunnel = netdev_priv(dev);
898 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
7e059158 899 int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
c5441932 900
7e059158 901 if (new_mtu < 68)
c5441932 902 return -EINVAL;
7e059158
DW
903
904 if (new_mtu > max_mtu) {
905 if (strict)
906 return -EINVAL;
907
908 new_mtu = max_mtu;
909 }
910
c5441932
PS
911 dev->mtu = new_mtu;
912 return 0;
913}
7e059158
DW
914EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
915
916int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
917{
918 return __ip_tunnel_change_mtu(dev, new_mtu, true);
919}
c5441932
PS
920EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
921
922static void ip_tunnel_dev_free(struct net_device *dev)
923{
924 struct ip_tunnel *tunnel = netdev_priv(dev);
925
926 gro_cells_destroy(&tunnel->gro_cells);
e09acddf 927 dst_cache_destroy(&tunnel->dst_cache);
c5441932
PS
928 free_percpu(dev->tstats);
929 free_netdev(dev);
930}
931
932void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
933{
c5441932
PS
934 struct ip_tunnel *tunnel = netdev_priv(dev);
935 struct ip_tunnel_net *itn;
936
6c742e71 937 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
c5441932
PS
938
939 if (itn->fb_tunnel_dev != dev) {
2e15ea39 940 ip_tunnel_del(itn, netdev_priv(dev));
c5441932
PS
941 unregister_netdevice_queue(dev, head);
942 }
943}
944EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
945
1728d4fa
ND
946struct net *ip_tunnel_get_link_net(const struct net_device *dev)
947{
948 struct ip_tunnel *tunnel = netdev_priv(dev);
949
950 return tunnel->net;
951}
952EXPORT_SYMBOL(ip_tunnel_get_link_net);
953
1e99584b
ND
954int ip_tunnel_get_iflink(const struct net_device *dev)
955{
956 struct ip_tunnel *tunnel = netdev_priv(dev);
957
958 return tunnel->parms.link;
959}
960EXPORT_SYMBOL(ip_tunnel_get_iflink);
961
d3b6f614 962int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
c5441932
PS
963 struct rtnl_link_ops *ops, char *devname)
964{
965 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
966 struct ip_tunnel_parm parms;
6261d983 967 unsigned int i;
c5441932 968
6261d983 969 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
970 INIT_HLIST_HEAD(&itn->tunnels[i]);
c5441932
PS
971
972 if (!ops) {
973 itn->fb_tunnel_dev = NULL;
974 return 0;
975 }
6261d983 976
c5441932
PS
977 memset(&parms, 0, sizeof(parms));
978 if (devname)
979 strlcpy(parms.name, devname, IFNAMSIZ);
980
981 rtnl_lock();
982 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
ea857f28
DC
983 /* FB netdevice is special: we have one, and only one per netns.
984 * Allowing to move it to another netns is clearly unsafe.
985 */
67013282 986 if (!IS_ERR(itn->fb_tunnel_dev)) {
b4de77ad 987 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
78ff4be4 988 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
67013282
SK
989 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
990 }
b4de77ad 991 rtnl_unlock();
c5441932 992
27d79f3b 993 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
c5441932
PS
994}
995EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
996
6c742e71
ND
997static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
998 struct rtnl_link_ops *ops)
c5441932 999{
6c742e71
ND
1000 struct net *net = dev_net(itn->fb_tunnel_dev);
1001 struct net_device *dev, *aux;
c5441932
PS
1002 int h;
1003
6c742e71
ND
1004 for_each_netdev_safe(net, dev, aux)
1005 if (dev->rtnl_link_ops == ops)
1006 unregister_netdevice_queue(dev, head);
1007
c5441932
PS
1008 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1009 struct ip_tunnel *t;
1010 struct hlist_node *n;
1011 struct hlist_head *thead = &itn->tunnels[h];
1012
1013 hlist_for_each_entry_safe(t, n, thead, hash_node)
6c742e71
ND
1014 /* If dev is in the same netns, it has already
1015 * been added to the list by the previous loop.
1016 */
1017 if (!net_eq(dev_net(t->dev), net))
1018 unregister_netdevice_queue(t->dev, head);
c5441932 1019 }
c5441932
PS
1020}
1021
6c742e71 1022void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
c5441932
PS
1023{
1024 LIST_HEAD(list);
1025
1026 rtnl_lock();
6c742e71 1027 ip_tunnel_destroy(itn, &list, ops);
c5441932
PS
1028 unregister_netdevice_many(&list);
1029 rtnl_unlock();
c5441932
PS
1030}
1031EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
1032
1033int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1034 struct ip_tunnel_parm *p)
1035{
1036 struct ip_tunnel *nt;
1037 struct net *net = dev_net(dev);
1038 struct ip_tunnel_net *itn;
1039 int mtu;
1040 int err;
1041
1042 nt = netdev_priv(dev);
1043 itn = net_generic(net, nt->ip_tnl_net_id);
1044
2e15ea39
PS
1045 if (nt->collect_md) {
1046 if (rtnl_dereference(itn->collect_md_tun))
1047 return -EEXIST;
1048 } else {
1049 if (ip_tunnel_find(itn, p, dev->type))
1050 return -EEXIST;
1051 }
c5441932 1052
5e6700b3 1053 nt->net = net;
c5441932
PS
1054 nt->parms = *p;
1055 err = register_netdevice(dev);
1056 if (err)
1057 goto out;
1058
1059 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1060 eth_hw_addr_random(dev);
1061
1062 mtu = ip_tunnel_bind_dev(dev);
1063 if (!tb[IFLA_MTU])
1064 dev->mtu = mtu;
1065
1066 ip_tunnel_add(itn, nt);
c5441932
PS
1067out:
1068 return err;
1069}
1070EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1071
1072int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1073 struct ip_tunnel_parm *p)
1074{
6c742e71 1075 struct ip_tunnel *t;
c5441932 1076 struct ip_tunnel *tunnel = netdev_priv(dev);
6c742e71 1077 struct net *net = tunnel->net;
c5441932
PS
1078 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1079
1080 if (dev == itn->fb_tunnel_dev)
1081 return -EINVAL;
1082
c5441932
PS
1083 t = ip_tunnel_find(itn, p, dev->type);
1084
1085 if (t) {
1086 if (t->dev != dev)
1087 return -EEXIST;
1088 } else {
6c742e71 1089 t = tunnel;
c5441932
PS
1090
1091 if (dev->type != ARPHRD_ETHER) {
1092 unsigned int nflags = 0;
1093
1094 if (ipv4_is_multicast(p->iph.daddr))
1095 nflags = IFF_BROADCAST;
1096 else if (p->iph.daddr)
1097 nflags = IFF_POINTOPOINT;
1098
1099 if ((dev->flags ^ nflags) &
1100 (IFF_POINTOPOINT | IFF_BROADCAST))
1101 return -EINVAL;
1102 }
1103 }
1104
1105 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1106 return 0;
1107}
1108EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1109
1110int ip_tunnel_init(struct net_device *dev)
1111{
1112 struct ip_tunnel *tunnel = netdev_priv(dev);
1113 struct iphdr *iph = &tunnel->parms.iph;
1c213bd2 1114 int err;
c5441932
PS
1115
1116 dev->destructor = ip_tunnel_dev_free;
1c213bd2 1117 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
c5441932
PS
1118 if (!dev->tstats)
1119 return -ENOMEM;
1120
e09acddf
PA
1121 err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1122 if (err) {
9a4aa9af 1123 free_percpu(dev->tstats);
e09acddf 1124 return err;
9a4aa9af
TH
1125 }
1126
c5441932
PS
1127 err = gro_cells_init(&tunnel->gro_cells, dev);
1128 if (err) {
e09acddf 1129 dst_cache_destroy(&tunnel->dst_cache);
c5441932
PS
1130 free_percpu(dev->tstats);
1131 return err;
1132 }
1133
1134 tunnel->dev = dev;
6c742e71 1135 tunnel->net = dev_net(dev);
c5441932
PS
1136 strcpy(tunnel->parms.name, dev->name);
1137 iph->version = 4;
1138 iph->ihl = 5;
1139
2e15ea39
PS
1140 if (tunnel->collect_md) {
1141 dev->features |= NETIF_F_NETNS_LOCAL;
1142 netif_keep_dst(dev);
1143 }
c5441932
PS
1144 return 0;
1145}
1146EXPORT_SYMBOL_GPL(ip_tunnel_init);
1147
1148void ip_tunnel_uninit(struct net_device *dev)
1149{
c5441932 1150 struct ip_tunnel *tunnel = netdev_priv(dev);
6c742e71 1151 struct net *net = tunnel->net;
c5441932
PS
1152 struct ip_tunnel_net *itn;
1153
1154 itn = net_generic(net, tunnel->ip_tnl_net_id);
1155 /* fb_tunnel_dev will be unregisted in net-exit call. */
1156 if (itn->fb_tunnel_dev != dev)
2e15ea39 1157 ip_tunnel_del(itn, netdev_priv(dev));
7d442fab 1158
e09acddf 1159 dst_cache_reset(&tunnel->dst_cache);
c5441932
PS
1160}
1161EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1162
1163/* Do least required initialization, rest of init is done in tunnel_init call */
1164void ip_tunnel_setup(struct net_device *dev, int net_id)
1165{
1166 struct ip_tunnel *tunnel = netdev_priv(dev);
1167 tunnel->ip_tnl_net_id = net_id;
1168}
1169EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1170
1171MODULE_LICENSE("GPL");
This page took 0.249875 seconds and 5 git commands to generate.