Merge branch 'akpm' (patchbomb from Andrew)
[deliverable/linux.git] / net / ipv4 / ip_tunnel.c
CommitLineData
c5441932
PS
1/*
2 * Copyright (c) 2013 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
17 */
18
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21#include <linux/capability.h>
22#include <linux/module.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/slab.h>
26#include <linux/uaccess.h>
27#include <linux/skbuff.h>
28#include <linux/netdevice.h>
29#include <linux/in.h>
30#include <linux/tcp.h>
31#include <linux/udp.h>
32#include <linux/if_arp.h>
33#include <linux/mroute.h>
34#include <linux/init.h>
35#include <linux/in6.h>
36#include <linux/inetdevice.h>
37#include <linux/igmp.h>
38#include <linux/netfilter_ipv4.h>
39#include <linux/etherdevice.h>
40#include <linux/if_ether.h>
41#include <linux/if_vlan.h>
42#include <linux/rculist.h>
27d79f3b 43#include <linux/err.h>
c5441932
PS
44
45#include <net/sock.h>
46#include <net/ip.h>
47#include <net/icmp.h>
48#include <net/protocol.h>
49#include <net/ip_tunnels.h>
50#include <net/arp.h>
51#include <net/checksum.h>
52#include <net/dsfield.h>
53#include <net/inet_ecn.h>
54#include <net/xfrm.h>
55#include <net/net_namespace.h>
56#include <net/netns/generic.h>
57#include <net/rtnetlink.h>
56328486 58#include <net/udp.h>
bc1fc390 59#include <net/gue.h>
c5441932
PS
60
61#if IS_ENABLED(CONFIG_IPV6)
62#include <net/ipv6.h>
63#include <net/ip6_fib.h>
64#include <net/ip6_route.h>
65#endif
66
967680e0 67static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
c5441932
PS
68{
69 return hash_32((__force u32)key ^ (__force u32)remote,
70 IP_TNL_HASH_BITS);
71}
72
6c7e7610 73static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
95cb5745 74 struct dst_entry *dst, __be32 saddr)
7d442fab
TH
75{
76 struct dst_entry *old_dst;
77
f8864972 78 dst_clone(dst);
6c7e7610 79 old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
7d442fab 80 dst_release(old_dst);
95cb5745 81 idst->saddr = saddr;
7d442fab
TH
82}
83
a35165ca 84static noinline void tunnel_dst_set(struct ip_tunnel *t,
95cb5745 85 struct dst_entry *dst, __be32 saddr)
7d442fab 86{
a35165ca 87 __tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr);
7d442fab
TH
88}
89
6c7e7610 90static void tunnel_dst_reset(struct ip_tunnel *t)
7d442fab 91{
95cb5745 92 tunnel_dst_set(t, NULL, 0);
7d442fab
TH
93}
94
cf71d2bc 95void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
9a4aa9af
TH
96{
97 int i;
98
99 for_each_possible_cpu(i)
95cb5745 100 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0);
9a4aa9af 101}
cf71d2bc 102EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
9a4aa9af 103
95cb5745
DP
104static struct rtable *tunnel_rtable_get(struct ip_tunnel *t,
105 u32 cookie, __be32 *saddr)
7d442fab 106{
95cb5745 107 struct ip_tunnel_dst *idst;
7d442fab
TH
108 struct dst_entry *dst;
109
110 rcu_read_lock();
a35165ca 111 idst = raw_cpu_ptr(t->dst_cache);
95cb5745 112 dst = rcu_dereference(idst->dst);
f8864972
ED
113 if (dst && !atomic_inc_not_zero(&dst->__refcnt))
114 dst = NULL;
b045d37b 115 if (dst) {
95cb5745
DP
116 if (!dst->obsolete || dst->ops->check(dst, cookie)) {
117 *saddr = idst->saddr;
118 } else {
b045d37b 119 tunnel_dst_reset(t);
f8864972
ED
120 dst_release(dst);
121 dst = NULL;
b045d37b 122 }
7d442fab 123 }
b045d37b
ED
124 rcu_read_unlock();
125 return (struct rtable *)dst;
7d442fab
TH
126}
127
c5441932
PS
128static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
129 __be16 flags, __be32 key)
130{
131 if (p->i_flags & TUNNEL_KEY) {
132 if (flags & TUNNEL_KEY)
133 return key == p->i_key;
134 else
135 /* key expected, none present */
136 return false;
137 } else
138 return !(flags & TUNNEL_KEY);
139}
140
141/* Fallback tunnel: no source, no destination, no key, no options
142
143 Tunnel hash table:
144 We require exact key match i.e. if a key is present in packet
145 it will match only tunnel with the same key; if it is not present,
146 it will match only keyless tunnel.
147
148 All keysless packets, if not matched configured keyless tunnels
149 will match fallback tunnel.
150 Given src, dst and key, find appropriate for input tunnel.
151*/
152struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
153 int link, __be16 flags,
154 __be32 remote, __be32 local,
155 __be32 key)
156{
157 unsigned int hash;
158 struct ip_tunnel *t, *cand = NULL;
159 struct hlist_head *head;
160
967680e0 161 hash = ip_tunnel_hash(key, remote);
c5441932
PS
162 head = &itn->tunnels[hash];
163
164 hlist_for_each_entry_rcu(t, head, hash_node) {
165 if (local != t->parms.iph.saddr ||
166 remote != t->parms.iph.daddr ||
167 !(t->dev->flags & IFF_UP))
168 continue;
169
170 if (!ip_tunnel_key_match(&t->parms, flags, key))
171 continue;
172
173 if (t->parms.link == link)
174 return t;
175 else
176 cand = t;
177 }
178
179 hlist_for_each_entry_rcu(t, head, hash_node) {
180 if (remote != t->parms.iph.daddr ||
e0056593 181 t->parms.iph.saddr != 0 ||
c5441932
PS
182 !(t->dev->flags & IFF_UP))
183 continue;
184
185 if (!ip_tunnel_key_match(&t->parms, flags, key))
186 continue;
187
188 if (t->parms.link == link)
189 return t;
190 else if (!cand)
191 cand = t;
192 }
193
967680e0 194 hash = ip_tunnel_hash(key, 0);
c5441932
PS
195 head = &itn->tunnels[hash];
196
197 hlist_for_each_entry_rcu(t, head, hash_node) {
e0056593
DP
198 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
199 (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
200 continue;
201
202 if (!(t->dev->flags & IFF_UP))
c5441932
PS
203 continue;
204
205 if (!ip_tunnel_key_match(&t->parms, flags, key))
206 continue;
207
208 if (t->parms.link == link)
209 return t;
210 else if (!cand)
211 cand = t;
212 }
213
214 if (flags & TUNNEL_NO_KEY)
215 goto skip_key_lookup;
216
217 hlist_for_each_entry_rcu(t, head, hash_node) {
218 if (t->parms.i_key != key ||
e0056593
DP
219 t->parms.iph.saddr != 0 ||
220 t->parms.iph.daddr != 0 ||
c5441932
PS
221 !(t->dev->flags & IFF_UP))
222 continue;
223
224 if (t->parms.link == link)
225 return t;
226 else if (!cand)
227 cand = t;
228 }
229
230skip_key_lookup:
231 if (cand)
232 return cand;
233
234 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
235 return netdev_priv(itn->fb_tunnel_dev);
236
237
238 return NULL;
239}
240EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
241
242static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
243 struct ip_tunnel_parm *parms)
244{
245 unsigned int h;
246 __be32 remote;
6d608f06 247 __be32 i_key = parms->i_key;
c5441932
PS
248
249 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
250 remote = parms->iph.daddr;
251 else
252 remote = 0;
253
6d608f06
SK
254 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
255 i_key = 0;
256
257 h = ip_tunnel_hash(i_key, remote);
c5441932
PS
258 return &itn->tunnels[h];
259}
260
261static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
262{
263 struct hlist_head *head = ip_bucket(itn, &t->parms);
264
265 hlist_add_head_rcu(&t->hash_node, head);
266}
267
268static void ip_tunnel_del(struct ip_tunnel *t)
269{
270 hlist_del_init_rcu(&t->hash_node);
271}
272
273static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
274 struct ip_tunnel_parm *parms,
275 int type)
276{
277 __be32 remote = parms->iph.daddr;
278 __be32 local = parms->iph.saddr;
279 __be32 key = parms->i_key;
5ce54af1 280 __be16 flags = parms->i_flags;
c5441932
PS
281 int link = parms->link;
282 struct ip_tunnel *t = NULL;
283 struct hlist_head *head = ip_bucket(itn, parms);
284
285 hlist_for_each_entry_rcu(t, head, hash_node) {
286 if (local == t->parms.iph.saddr &&
287 remote == t->parms.iph.daddr &&
c5441932 288 link == t->parms.link &&
5ce54af1
DP
289 type == t->dev->type &&
290 ip_tunnel_key_match(&t->parms, flags, key))
c5441932
PS
291 break;
292 }
293 return t;
294}
295
296static struct net_device *__ip_tunnel_create(struct net *net,
297 const struct rtnl_link_ops *ops,
298 struct ip_tunnel_parm *parms)
299{
300 int err;
301 struct ip_tunnel *tunnel;
302 struct net_device *dev;
303 char name[IFNAMSIZ];
304
305 if (parms->name[0])
306 strlcpy(name, parms->name, IFNAMSIZ);
307 else {
54a5d382 308 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
c5441932
PS
309 err = -E2BIG;
310 goto failed;
311 }
312 strlcpy(name, ops->kind, IFNAMSIZ);
313 strncat(name, "%d", 2);
314 }
315
316 ASSERT_RTNL();
c835a677 317 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
c5441932
PS
318 if (!dev) {
319 err = -ENOMEM;
320 goto failed;
321 }
322 dev_net_set(dev, net);
323
324 dev->rtnl_link_ops = ops;
325
326 tunnel = netdev_priv(dev);
327 tunnel->parms = *parms;
5e6700b3 328 tunnel->net = net;
c5441932
PS
329
330 err = register_netdevice(dev);
331 if (err)
332 goto failed_free;
333
334 return dev;
335
336failed_free:
337 free_netdev(dev);
338failed:
339 return ERR_PTR(err);
340}
341
7d442fab
TH
342static inline void init_tunnel_flow(struct flowi4 *fl4,
343 int proto,
344 __be32 daddr, __be32 saddr,
345 __be32 key, __u8 tos, int oif)
c5441932
PS
346{
347 memset(fl4, 0, sizeof(*fl4));
348 fl4->flowi4_oif = oif;
349 fl4->daddr = daddr;
350 fl4->saddr = saddr;
351 fl4->flowi4_tos = tos;
352 fl4->flowi4_proto = proto;
353 fl4->fl4_gre_key = key;
c5441932
PS
354}
355
356static int ip_tunnel_bind_dev(struct net_device *dev)
357{
358 struct net_device *tdev = NULL;
359 struct ip_tunnel *tunnel = netdev_priv(dev);
360 const struct iphdr *iph;
361 int hlen = LL_MAX_HEADER;
362 int mtu = ETH_DATA_LEN;
363 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
364
365 iph = &tunnel->parms.iph;
366
367 /* Guess output device to choose reasonable mtu and needed_headroom */
368 if (iph->daddr) {
369 struct flowi4 fl4;
370 struct rtable *rt;
371
7d442fab
TH
372 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
373 iph->saddr, tunnel->parms.o_key,
374 RT_TOS(iph->tos), tunnel->parms.link);
375 rt = ip_route_output_key(tunnel->net, &fl4);
376
c5441932
PS
377 if (!IS_ERR(rt)) {
378 tdev = rt->dst.dev;
95cb5745 379 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
c5441932
PS
380 ip_rt_put(rt);
381 }
382 if (dev->type != ARPHRD_ETHER)
383 dev->flags |= IFF_POINTOPOINT;
384 }
385
386 if (!tdev && tunnel->parms.link)
6c742e71 387 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
c5441932
PS
388
389 if (tdev) {
390 hlen = tdev->hard_header_len + tdev->needed_headroom;
391 mtu = tdev->mtu;
392 }
393 dev->iflink = tunnel->parms.link;
394
395 dev->needed_headroom = t_hlen + hlen;
396 mtu -= (dev->hard_header_len + t_hlen);
397
398 if (mtu < 68)
399 mtu = 68;
400
401 return mtu;
402}
403
404static struct ip_tunnel *ip_tunnel_create(struct net *net,
405 struct ip_tunnel_net *itn,
406 struct ip_tunnel_parm *parms)
407{
4929fd8c 408 struct ip_tunnel *nt;
c5441932
PS
409 struct net_device *dev;
410
411 BUG_ON(!itn->fb_tunnel_dev);
c5441932
PS
412 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
413 if (IS_ERR(dev))
6dd3c9ec 414 return ERR_CAST(dev);
c5441932
PS
415
416 dev->mtu = ip_tunnel_bind_dev(dev);
417
418 nt = netdev_priv(dev);
419 ip_tunnel_add(itn, nt);
420 return nt;
421}
422
423int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
424 const struct tnl_ptk_info *tpi, bool log_ecn_error)
425{
8f84985f 426 struct pcpu_sw_netstats *tstats;
c5441932
PS
427 const struct iphdr *iph = ip_hdr(skb);
428 int err;
429
c5441932
PS
430#ifdef CONFIG_NET_IPGRE_BROADCAST
431 if (ipv4_is_multicast(iph->daddr)) {
c5441932
PS
432 tunnel->dev->stats.multicast++;
433 skb->pkt_type = PACKET_BROADCAST;
434 }
435#endif
436
437 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
438 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
439 tunnel->dev->stats.rx_crc_errors++;
440 tunnel->dev->stats.rx_errors++;
441 goto drop;
442 }
443
444 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
445 if (!(tpi->flags&TUNNEL_SEQ) ||
446 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
447 tunnel->dev->stats.rx_fifo_errors++;
448 tunnel->dev->stats.rx_errors++;
449 goto drop;
450 }
451 tunnel->i_seqno = ntohl(tpi->seq) + 1;
452 }
453
e96f2e7c
YC
454 skb_reset_network_header(skb);
455
c5441932
PS
456 err = IP_ECN_decapsulate(iph, skb);
457 if (unlikely(err)) {
458 if (log_ecn_error)
459 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
460 &iph->saddr, iph->tos);
461 if (err > 1) {
462 ++tunnel->dev->stats.rx_frame_errors;
463 ++tunnel->dev->stats.rx_errors;
464 goto drop;
465 }
466 }
467
468 tstats = this_cpu_ptr(tunnel->dev->tstats);
469 u64_stats_update_begin(&tstats->syncp);
470 tstats->rx_packets++;
471 tstats->rx_bytes += skb->len;
472 u64_stats_update_end(&tstats->syncp);
473
81b9eab5
AS
474 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
475
3d7b46cd
PS
476 if (tunnel->dev->type == ARPHRD_ETHER) {
477 skb->protocol = eth_type_trans(skb, tunnel->dev);
478 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
479 } else {
480 skb->dev = tunnel->dev;
481 }
64261f23 482
c5441932
PS
483 gro_cells_receive(&tunnel->gro_cells, skb);
484 return 0;
485
486drop:
487 kfree_skb(skb);
488 return 0;
489}
490EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
491
56328486
TH
492static int ip_encap_hlen(struct ip_tunnel_encap *e)
493{
494 switch (e->type) {
495 case TUNNEL_ENCAP_NONE:
496 return 0;
497 case TUNNEL_ENCAP_FOU:
498 return sizeof(struct udphdr);
bc1fc390
TH
499 case TUNNEL_ENCAP_GUE:
500 return sizeof(struct udphdr) + sizeof(struct guehdr);
56328486
TH
501 default:
502 return -EINVAL;
503 }
504}
505
506int ip_tunnel_encap_setup(struct ip_tunnel *t,
507 struct ip_tunnel_encap *ipencap)
508{
509 int hlen;
510
511 memset(&t->encap, 0, sizeof(t->encap));
512
513 hlen = ip_encap_hlen(ipencap);
514 if (hlen < 0)
515 return hlen;
516
517 t->encap.type = ipencap->type;
518 t->encap.sport = ipencap->sport;
519 t->encap.dport = ipencap->dport;
520 t->encap.flags = ipencap->flags;
521
522 t->encap_hlen = hlen;
523 t->hlen = t->encap_hlen + t->tun_hlen;
524
525 return 0;
526}
527EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
528
529static int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
530 size_t hdr_len, u8 *protocol, struct flowi4 *fl4)
531{
532 struct udphdr *uh;
533 __be16 sport;
534 bool csum = !!(e->flags & TUNNEL_ENCAP_FLAG_CSUM);
535 int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
536
537 skb = iptunnel_handle_offloads(skb, csum, type);
538
539 if (IS_ERR(skb))
540 return PTR_ERR(skb);
541
542 /* Get length and hash before making space in skb */
543
544 sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev),
545 skb, 0, 0, false);
546
547 skb_push(skb, hdr_len);
548
549 skb_reset_transport_header(skb);
550 uh = udp_hdr(skb);
551
bc1fc390
TH
552 if (e->type == TUNNEL_ENCAP_GUE) {
553 struct guehdr *guehdr = (struct guehdr *)&uh[1];
554
555 guehdr->version = 0;
556 guehdr->hlen = 0;
557 guehdr->flags = 0;
558 guehdr->next_hdr = *protocol;
559 }
560
56328486
TH
561 uh->dest = e->dport;
562 uh->source = sport;
563 uh->len = htons(skb->len);
564 uh->check = 0;
565 udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb,
566 fl4->saddr, fl4->daddr, skb->len);
567
568 *protocol = IPPROTO_UDP;
569
570 return 0;
571}
572
573int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
574 u8 *protocol, struct flowi4 *fl4)
575{
576 switch (t->encap.type) {
577 case TUNNEL_ENCAP_NONE:
578 return 0;
579 case TUNNEL_ENCAP_FOU:
bc1fc390 580 case TUNNEL_ENCAP_GUE:
56328486
TH
581 return fou_build_header(skb, &t->encap, t->encap_hlen,
582 protocol, fl4);
583 default:
584 return -EINVAL;
585 }
586}
587EXPORT_SYMBOL(ip_tunnel_encap);
588
23a3647b
PS
589static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
590 struct rtable *rt, __be16 df)
591{
592 struct ip_tunnel *tunnel = netdev_priv(dev);
8c91e162 593 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
23a3647b
PS
594 int mtu;
595
596 if (df)
597 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
598 - sizeof(struct iphdr) - tunnel->hlen;
599 else
600 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
601
602 if (skb_dst(skb))
603 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
604
605 if (skb->protocol == htons(ETH_P_IP)) {
606 if (!skb_is_gso(skb) &&
607 (df & htons(IP_DF)) && mtu < pkt_size) {
608 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
609 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
610 return -E2BIG;
611 }
612 }
613#if IS_ENABLED(CONFIG_IPV6)
614 else if (skb->protocol == htons(ETH_P_IPV6)) {
615 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
616
617 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
618 mtu >= IPV6_MIN_MTU) {
619 if ((tunnel->parms.iph.daddr &&
620 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
621 rt6->rt6i_dst.plen == 128) {
622 rt6->rt6i_flags |= RTF_MODIFIED;
623 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
624 }
625 }
626
627 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
628 mtu < pkt_size) {
629 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
630 return -E2BIG;
631 }
632 }
633#endif
634 return 0;
635}
636
c5441932 637void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
56328486 638 const struct iphdr *tnl_params, u8 protocol)
c5441932
PS
639{
640 struct ip_tunnel *tunnel = netdev_priv(dev);
641 const struct iphdr *inner_iph;
c5441932
PS
642 struct flowi4 fl4;
643 u8 tos, ttl;
644 __be16 df;
b045d37b 645 struct rtable *rt; /* Route to the other host */
c5441932
PS
646 unsigned int max_headroom; /* The extra header space needed */
647 __be32 dst;
0e6fbc5b 648 int err;
22fb22ea 649 bool connected;
c5441932
PS
650
651 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
22fb22ea 652 connected = (tunnel->parms.iph.daddr != 0);
c5441932
PS
653
654 dst = tnl_params->daddr;
655 if (dst == 0) {
656 /* NBMA tunnel */
657
658 if (skb_dst(skb) == NULL) {
659 dev->stats.tx_fifo_errors++;
660 goto tx_error;
661 }
662
663 if (skb->protocol == htons(ETH_P_IP)) {
664 rt = skb_rtable(skb);
665 dst = rt_nexthop(rt, inner_iph->daddr);
666 }
667#if IS_ENABLED(CONFIG_IPV6)
668 else if (skb->protocol == htons(ETH_P_IPV6)) {
669 const struct in6_addr *addr6;
670 struct neighbour *neigh;
671 bool do_tx_error_icmp;
672 int addr_type;
673
674 neigh = dst_neigh_lookup(skb_dst(skb),
675 &ipv6_hdr(skb)->daddr);
676 if (neigh == NULL)
677 goto tx_error;
678
679 addr6 = (const struct in6_addr *)&neigh->primary_key;
680 addr_type = ipv6_addr_type(addr6);
681
682 if (addr_type == IPV6_ADDR_ANY) {
683 addr6 = &ipv6_hdr(skb)->daddr;
684 addr_type = ipv6_addr_type(addr6);
685 }
686
687 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
688 do_tx_error_icmp = true;
689 else {
690 do_tx_error_icmp = false;
691 dst = addr6->s6_addr32[3];
692 }
693 neigh_release(neigh);
694 if (do_tx_error_icmp)
695 goto tx_error_icmp;
696 }
697#endif
698 else
699 goto tx_error;
7d442fab
TH
700
701 connected = false;
c5441932
PS
702 }
703
704 tos = tnl_params->tos;
705 if (tos & 0x1) {
706 tos &= ~0x1;
7d442fab 707 if (skb->protocol == htons(ETH_P_IP)) {
c5441932 708 tos = inner_iph->tos;
7d442fab
TH
709 connected = false;
710 } else if (skb->protocol == htons(ETH_P_IPV6)) {
c5441932 711 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
7d442fab
TH
712 connected = false;
713 }
c5441932
PS
714 }
715
7d442fab
TH
716 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
717 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
718
56328486
TH
719 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
720 goto tx_error;
721
95cb5745 722 rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL;
7d442fab
TH
723
724 if (!rt) {
725 rt = ip_route_output_key(tunnel->net, &fl4);
726
727 if (IS_ERR(rt)) {
728 dev->stats.tx_carrier_errors++;
729 goto tx_error;
730 }
731 if (connected)
95cb5745 732 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
c5441932 733 }
7d442fab 734
0e6fbc5b 735 if (rt->dst.dev == dev) {
c5441932
PS
736 ip_rt_put(rt);
737 dev->stats.collisions++;
738 goto tx_error;
739 }
c5441932 740
23a3647b
PS
741 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
742 ip_rt_put(rt);
743 goto tx_error;
c5441932 744 }
c5441932
PS
745
746 if (tunnel->err_count > 0) {
747 if (time_before(jiffies,
748 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
749 tunnel->err_count--;
750
11c21a30 751 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
c5441932
PS
752 dst_link_failure(skb);
753 } else
754 tunnel->err_count = 0;
755 }
756
d4a71b15 757 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
c5441932
PS
758 ttl = tnl_params->ttl;
759 if (ttl == 0) {
760 if (skb->protocol == htons(ETH_P_IP))
761 ttl = inner_iph->ttl;
762#if IS_ENABLED(CONFIG_IPV6)
763 else if (skb->protocol == htons(ETH_P_IPV6))
764 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
765#endif
766 else
767 ttl = ip4_dst_hoplimit(&rt->dst);
768 }
769
23a3647b
PS
770 df = tnl_params->frag_off;
771 if (skb->protocol == htons(ETH_P_IP))
772 df |= (inner_iph->frag_off&htons(IP_DF));
773
0e6fbc5b 774 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
7371e022 775 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
3e08f4a7 776 if (max_headroom > dev->needed_headroom)
c5441932 777 dev->needed_headroom = max_headroom;
3e08f4a7
SK
778
779 if (skb_cow_head(skb, dev->needed_headroom)) {
586d5fc8 780 ip_rt_put(rt);
3e08f4a7 781 dev->stats.tx_dropped++;
3acfa1e7 782 kfree_skb(skb);
3e08f4a7 783 return;
c5441932
PS
784 }
785
aad88724 786 err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol,
d4a71b15 787 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
0e6fbc5b 788 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
c5441932 789
c5441932
PS
790 return;
791
792#if IS_ENABLED(CONFIG_IPV6)
793tx_error_icmp:
794 dst_link_failure(skb);
795#endif
796tx_error:
797 dev->stats.tx_errors++;
3acfa1e7 798 kfree_skb(skb);
c5441932
PS
799}
800EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
801
802static void ip_tunnel_update(struct ip_tunnel_net *itn,
803 struct ip_tunnel *t,
804 struct net_device *dev,
805 struct ip_tunnel_parm *p,
806 bool set_mtu)
807{
808 ip_tunnel_del(t);
809 t->parms.iph.saddr = p->iph.saddr;
810 t->parms.iph.daddr = p->iph.daddr;
811 t->parms.i_key = p->i_key;
812 t->parms.o_key = p->o_key;
813 if (dev->type != ARPHRD_ETHER) {
814 memcpy(dev->dev_addr, &p->iph.saddr, 4);
815 memcpy(dev->broadcast, &p->iph.daddr, 4);
816 }
817 ip_tunnel_add(itn, t);
818
819 t->parms.iph.ttl = p->iph.ttl;
820 t->parms.iph.tos = p->iph.tos;
821 t->parms.iph.frag_off = p->iph.frag_off;
822
823 if (t->parms.link != p->link) {
824 int mtu;
825
826 t->parms.link = p->link;
827 mtu = ip_tunnel_bind_dev(dev);
828 if (set_mtu)
829 dev->mtu = mtu;
830 }
cf71d2bc 831 ip_tunnel_dst_reset_all(t);
c5441932
PS
832 netdev_state_change(dev);
833}
834
835int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
836{
837 int err = 0;
8c923ce2
ND
838 struct ip_tunnel *t = netdev_priv(dev);
839 struct net *net = t->net;
840 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
c5441932
PS
841
842 BUG_ON(!itn->fb_tunnel_dev);
843 switch (cmd) {
844 case SIOCGETTUNNEL:
8c923ce2 845 if (dev == itn->fb_tunnel_dev) {
c5441932 846 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
8c923ce2
ND
847 if (t == NULL)
848 t = netdev_priv(dev);
849 }
c5441932
PS
850 memcpy(p, &t->parms, sizeof(*p));
851 break;
852
853 case SIOCADDTUNNEL:
854 case SIOCCHGTUNNEL:
855 err = -EPERM;
856 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
857 goto done;
858 if (p->iph.ttl)
859 p->iph.frag_off |= htons(IP_DF);
7c8e6b9c
DP
860 if (!(p->i_flags & VTI_ISVTI)) {
861 if (!(p->i_flags & TUNNEL_KEY))
862 p->i_key = 0;
863 if (!(p->o_flags & TUNNEL_KEY))
864 p->o_key = 0;
865 }
c5441932
PS
866
867 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
868
d61746b2
SK
869 if (cmd == SIOCADDTUNNEL) {
870 if (!t) {
871 t = ip_tunnel_create(net, itn, p);
872 err = PTR_ERR_OR_ZERO(t);
873 break;
874 }
875
876 err = -EEXIST;
ee30ef4d 877 break;
6dd3c9ec 878 }
c5441932
PS
879 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
880 if (t != NULL) {
881 if (t->dev != dev) {
882 err = -EEXIST;
883 break;
884 }
885 } else {
886 unsigned int nflags = 0;
887
888 if (ipv4_is_multicast(p->iph.daddr))
889 nflags = IFF_BROADCAST;
890 else if (p->iph.daddr)
891 nflags = IFF_POINTOPOINT;
892
893 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
894 err = -EINVAL;
895 break;
896 }
897
898 t = netdev_priv(dev);
899 }
900 }
901
902 if (t) {
903 err = 0;
904 ip_tunnel_update(itn, t, dev, p, true);
6dd3c9ec
FW
905 } else {
906 err = -ENOENT;
907 }
c5441932
PS
908 break;
909
910 case SIOCDELTUNNEL:
911 err = -EPERM;
912 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
913 goto done;
914
915 if (dev == itn->fb_tunnel_dev) {
916 err = -ENOENT;
917 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
918 if (t == NULL)
919 goto done;
920 err = -EPERM;
921 if (t == netdev_priv(itn->fb_tunnel_dev))
922 goto done;
923 dev = t->dev;
924 }
925 unregister_netdevice(dev);
926 err = 0;
927 break;
928
929 default:
930 err = -EINVAL;
931 }
932
933done:
934 return err;
935}
936EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
937
938int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
939{
940 struct ip_tunnel *tunnel = netdev_priv(dev);
941 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
942
943 if (new_mtu < 68 ||
944 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
945 return -EINVAL;
946 dev->mtu = new_mtu;
947 return 0;
948}
949EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
950
951static void ip_tunnel_dev_free(struct net_device *dev)
952{
953 struct ip_tunnel *tunnel = netdev_priv(dev);
954
955 gro_cells_destroy(&tunnel->gro_cells);
9a4aa9af 956 free_percpu(tunnel->dst_cache);
c5441932
PS
957 free_percpu(dev->tstats);
958 free_netdev(dev);
959}
960
961void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
962{
c5441932
PS
963 struct ip_tunnel *tunnel = netdev_priv(dev);
964 struct ip_tunnel_net *itn;
965
6c742e71 966 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
c5441932
PS
967
968 if (itn->fb_tunnel_dev != dev) {
969 ip_tunnel_del(netdev_priv(dev));
970 unregister_netdevice_queue(dev, head);
971 }
972}
973EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
974
d3b6f614 975int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
c5441932
PS
976 struct rtnl_link_ops *ops, char *devname)
977{
978 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
979 struct ip_tunnel_parm parms;
6261d983 980 unsigned int i;
c5441932 981
6261d983 982 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
983 INIT_HLIST_HEAD(&itn->tunnels[i]);
c5441932
PS
984
985 if (!ops) {
986 itn->fb_tunnel_dev = NULL;
987 return 0;
988 }
6261d983 989
c5441932
PS
990 memset(&parms, 0, sizeof(parms));
991 if (devname)
992 strlcpy(parms.name, devname, IFNAMSIZ);
993
994 rtnl_lock();
995 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
ea857f28
DC
996 /* FB netdevice is special: we have one, and only one per netns.
997 * Allowing to move it to another netns is clearly unsafe.
998 */
67013282 999 if (!IS_ERR(itn->fb_tunnel_dev)) {
b4de77ad 1000 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
78ff4be4 1001 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
67013282
SK
1002 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1003 }
b4de77ad 1004 rtnl_unlock();
c5441932 1005
27d79f3b 1006 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
c5441932
PS
1007}
1008EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1009
6c742e71
ND
1010static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
1011 struct rtnl_link_ops *ops)
c5441932 1012{
6c742e71
ND
1013 struct net *net = dev_net(itn->fb_tunnel_dev);
1014 struct net_device *dev, *aux;
c5441932
PS
1015 int h;
1016
6c742e71
ND
1017 for_each_netdev_safe(net, dev, aux)
1018 if (dev->rtnl_link_ops == ops)
1019 unregister_netdevice_queue(dev, head);
1020
c5441932
PS
1021 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1022 struct ip_tunnel *t;
1023 struct hlist_node *n;
1024 struct hlist_head *thead = &itn->tunnels[h];
1025
1026 hlist_for_each_entry_safe(t, n, thead, hash_node)
6c742e71
ND
1027 /* If dev is in the same netns, it has already
1028 * been added to the list by the previous loop.
1029 */
1030 if (!net_eq(dev_net(t->dev), net))
1031 unregister_netdevice_queue(t->dev, head);
c5441932 1032 }
c5441932
PS
1033}
1034
6c742e71 1035void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
c5441932
PS
1036{
1037 LIST_HEAD(list);
1038
1039 rtnl_lock();
6c742e71 1040 ip_tunnel_destroy(itn, &list, ops);
c5441932
PS
1041 unregister_netdevice_many(&list);
1042 rtnl_unlock();
c5441932
PS
1043}
1044EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
1045
1046int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1047 struct ip_tunnel_parm *p)
1048{
1049 struct ip_tunnel *nt;
1050 struct net *net = dev_net(dev);
1051 struct ip_tunnel_net *itn;
1052 int mtu;
1053 int err;
1054
1055 nt = netdev_priv(dev);
1056 itn = net_generic(net, nt->ip_tnl_net_id);
1057
1058 if (ip_tunnel_find(itn, p, dev->type))
1059 return -EEXIST;
1060
5e6700b3 1061 nt->net = net;
c5441932
PS
1062 nt->parms = *p;
1063 err = register_netdevice(dev);
1064 if (err)
1065 goto out;
1066
1067 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1068 eth_hw_addr_random(dev);
1069
1070 mtu = ip_tunnel_bind_dev(dev);
1071 if (!tb[IFLA_MTU])
1072 dev->mtu = mtu;
1073
1074 ip_tunnel_add(itn, nt);
1075
1076out:
1077 return err;
1078}
1079EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1080
1081int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1082 struct ip_tunnel_parm *p)
1083{
6c742e71 1084 struct ip_tunnel *t;
c5441932 1085 struct ip_tunnel *tunnel = netdev_priv(dev);
6c742e71 1086 struct net *net = tunnel->net;
c5441932
PS
1087 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1088
1089 if (dev == itn->fb_tunnel_dev)
1090 return -EINVAL;
1091
c5441932
PS
1092 t = ip_tunnel_find(itn, p, dev->type);
1093
1094 if (t) {
1095 if (t->dev != dev)
1096 return -EEXIST;
1097 } else {
6c742e71 1098 t = tunnel;
c5441932
PS
1099
1100 if (dev->type != ARPHRD_ETHER) {
1101 unsigned int nflags = 0;
1102
1103 if (ipv4_is_multicast(p->iph.daddr))
1104 nflags = IFF_BROADCAST;
1105 else if (p->iph.daddr)
1106 nflags = IFF_POINTOPOINT;
1107
1108 if ((dev->flags ^ nflags) &
1109 (IFF_POINTOPOINT | IFF_BROADCAST))
1110 return -EINVAL;
1111 }
1112 }
1113
1114 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1115 return 0;
1116}
1117EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1118
1119int ip_tunnel_init(struct net_device *dev)
1120{
1121 struct ip_tunnel *tunnel = netdev_priv(dev);
1122 struct iphdr *iph = &tunnel->parms.iph;
1c213bd2 1123 int err;
c5441932
PS
1124
1125 dev->destructor = ip_tunnel_dev_free;
1c213bd2 1126 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
c5441932
PS
1127 if (!dev->tstats)
1128 return -ENOMEM;
1129
9a4aa9af
TH
1130 tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1131 if (!tunnel->dst_cache) {
1132 free_percpu(dev->tstats);
1133 return -ENOMEM;
1134 }
1135
c5441932
PS
1136 err = gro_cells_init(&tunnel->gro_cells, dev);
1137 if (err) {
9a4aa9af 1138 free_percpu(tunnel->dst_cache);
c5441932
PS
1139 free_percpu(dev->tstats);
1140 return err;
1141 }
1142
1143 tunnel->dev = dev;
6c742e71 1144 tunnel->net = dev_net(dev);
c5441932
PS
1145 strcpy(tunnel->parms.name, dev->name);
1146 iph->version = 4;
1147 iph->ihl = 5;
1148
1149 return 0;
1150}
1151EXPORT_SYMBOL_GPL(ip_tunnel_init);
1152
1153void ip_tunnel_uninit(struct net_device *dev)
1154{
c5441932 1155 struct ip_tunnel *tunnel = netdev_priv(dev);
6c742e71 1156 struct net *net = tunnel->net;
c5441932
PS
1157 struct ip_tunnel_net *itn;
1158
1159 itn = net_generic(net, tunnel->ip_tnl_net_id);
1160 /* fb_tunnel_dev will be unregisted in net-exit call. */
1161 if (itn->fb_tunnel_dev != dev)
1162 ip_tunnel_del(netdev_priv(dev));
7d442fab 1163
cf71d2bc 1164 ip_tunnel_dst_reset_all(tunnel);
c5441932
PS
1165}
1166EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1167
1168/* Do least required initialization, rest of init is done in tunnel_init call */
1169void ip_tunnel_setup(struct net_device *dev, int net_id)
1170{
1171 struct ip_tunnel *tunnel = netdev_priv(dev);
1172 tunnel->ip_tnl_net_id = net_id;
1173}
1174EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1175
1176MODULE_LICENSE("GPL");
This page took 0.168809 seconds and 5 git commands to generate.