tun: remove bogus hardware vlan acceleration flags from vlan_features
[deliverable/linux.git] / net / ipv4 / ip_tunnel.c
CommitLineData
c5441932
PS
1/*
2 * Copyright (c) 2013 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
17 */
18
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21#include <linux/capability.h>
22#include <linux/module.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/slab.h>
26#include <linux/uaccess.h>
27#include <linux/skbuff.h>
28#include <linux/netdevice.h>
29#include <linux/in.h>
30#include <linux/tcp.h>
31#include <linux/udp.h>
32#include <linux/if_arp.h>
33#include <linux/mroute.h>
34#include <linux/init.h>
35#include <linux/in6.h>
36#include <linux/inetdevice.h>
37#include <linux/igmp.h>
38#include <linux/netfilter_ipv4.h>
39#include <linux/etherdevice.h>
40#include <linux/if_ether.h>
41#include <linux/if_vlan.h>
42#include <linux/rculist.h>
27d79f3b 43#include <linux/err.h>
c5441932
PS
44
45#include <net/sock.h>
46#include <net/ip.h>
47#include <net/icmp.h>
48#include <net/protocol.h>
49#include <net/ip_tunnels.h>
50#include <net/arp.h>
51#include <net/checksum.h>
52#include <net/dsfield.h>
53#include <net/inet_ecn.h>
54#include <net/xfrm.h>
55#include <net/net_namespace.h>
56#include <net/netns/generic.h>
57#include <net/rtnetlink.h>
58
59#if IS_ENABLED(CONFIG_IPV6)
60#include <net/ipv6.h>
61#include <net/ip6_fib.h>
62#include <net/ip6_route.h>
63#endif
64
967680e0 65static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
c5441932
PS
66{
67 return hash_32((__force u32)key ^ (__force u32)remote,
68 IP_TNL_HASH_BITS);
69}
70
6c7e7610
ED
71static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
72 struct dst_entry *dst)
7d442fab
TH
73{
74 struct dst_entry *old_dst;
75
6c7e7610
ED
76 if (dst) {
77 if (dst->flags & DST_NOCACHE)
78 dst = NULL;
79 else
80 dst_clone(dst);
81 }
82 old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
7d442fab 83 dst_release(old_dst);
7d442fab
TH
84}
85
6c7e7610 86static void tunnel_dst_set(struct ip_tunnel *t, struct dst_entry *dst)
7d442fab 87{
9a4aa9af 88 __tunnel_dst_set(this_cpu_ptr(t->dst_cache), dst);
7d442fab
TH
89}
90
6c7e7610 91static void tunnel_dst_reset(struct ip_tunnel *t)
7d442fab
TH
92{
93 tunnel_dst_set(t, NULL);
94}
95
9a4aa9af
TH
96static void tunnel_dst_reset_all(struct ip_tunnel *t)
97{
98 int i;
99
100 for_each_possible_cpu(i)
101 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
102}
103
b045d37b 104static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie)
7d442fab
TH
105{
106 struct dst_entry *dst;
107
108 rcu_read_lock();
9a4aa9af 109 dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst);
b045d37b
ED
110 if (dst) {
111 if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
112 rcu_read_unlock();
113 tunnel_dst_reset(t);
114 return NULL;
115 }
7d442fab 116 dst_hold(dst);
7d442fab 117 }
b045d37b
ED
118 rcu_read_unlock();
119 return (struct rtable *)dst;
7d442fab
TH
120}
121
c5441932
PS
122static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
123 __be16 flags, __be32 key)
124{
125 if (p->i_flags & TUNNEL_KEY) {
126 if (flags & TUNNEL_KEY)
127 return key == p->i_key;
128 else
129 /* key expected, none present */
130 return false;
131 } else
132 return !(flags & TUNNEL_KEY);
133}
134
135/* Fallback tunnel: no source, no destination, no key, no options
136
137 Tunnel hash table:
138 We require exact key match i.e. if a key is present in packet
139 it will match only tunnel with the same key; if it is not present,
140 it will match only keyless tunnel.
141
142 All keysless packets, if not matched configured keyless tunnels
143 will match fallback tunnel.
144 Given src, dst and key, find appropriate for input tunnel.
145*/
146struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
147 int link, __be16 flags,
148 __be32 remote, __be32 local,
149 __be32 key)
150{
151 unsigned int hash;
152 struct ip_tunnel *t, *cand = NULL;
153 struct hlist_head *head;
154
967680e0 155 hash = ip_tunnel_hash(key, remote);
c5441932
PS
156 head = &itn->tunnels[hash];
157
158 hlist_for_each_entry_rcu(t, head, hash_node) {
159 if (local != t->parms.iph.saddr ||
160 remote != t->parms.iph.daddr ||
161 !(t->dev->flags & IFF_UP))
162 continue;
163
164 if (!ip_tunnel_key_match(&t->parms, flags, key))
165 continue;
166
167 if (t->parms.link == link)
168 return t;
169 else
170 cand = t;
171 }
172
173 hlist_for_each_entry_rcu(t, head, hash_node) {
174 if (remote != t->parms.iph.daddr ||
175 !(t->dev->flags & IFF_UP))
176 continue;
177
178 if (!ip_tunnel_key_match(&t->parms, flags, key))
179 continue;
180
181 if (t->parms.link == link)
182 return t;
183 else if (!cand)
184 cand = t;
185 }
186
967680e0 187 hash = ip_tunnel_hash(key, 0);
c5441932
PS
188 head = &itn->tunnels[hash];
189
190 hlist_for_each_entry_rcu(t, head, hash_node) {
191 if ((local != t->parms.iph.saddr &&
192 (local != t->parms.iph.daddr ||
193 !ipv4_is_multicast(local))) ||
194 !(t->dev->flags & IFF_UP))
195 continue;
196
197 if (!ip_tunnel_key_match(&t->parms, flags, key))
198 continue;
199
200 if (t->parms.link == link)
201 return t;
202 else if (!cand)
203 cand = t;
204 }
205
206 if (flags & TUNNEL_NO_KEY)
207 goto skip_key_lookup;
208
209 hlist_for_each_entry_rcu(t, head, hash_node) {
210 if (t->parms.i_key != key ||
211 !(t->dev->flags & IFF_UP))
212 continue;
213
214 if (t->parms.link == link)
215 return t;
216 else if (!cand)
217 cand = t;
218 }
219
220skip_key_lookup:
221 if (cand)
222 return cand;
223
224 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
225 return netdev_priv(itn->fb_tunnel_dev);
226
227
228 return NULL;
229}
230EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
231
232static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
233 struct ip_tunnel_parm *parms)
234{
235 unsigned int h;
236 __be32 remote;
237
238 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
239 remote = parms->iph.daddr;
240 else
241 remote = 0;
242
967680e0 243 h = ip_tunnel_hash(parms->i_key, remote);
c5441932
PS
244 return &itn->tunnels[h];
245}
246
247static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
248{
249 struct hlist_head *head = ip_bucket(itn, &t->parms);
250
251 hlist_add_head_rcu(&t->hash_node, head);
252}
253
254static void ip_tunnel_del(struct ip_tunnel *t)
255{
256 hlist_del_init_rcu(&t->hash_node);
257}
258
259static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
260 struct ip_tunnel_parm *parms,
261 int type)
262{
263 __be32 remote = parms->iph.daddr;
264 __be32 local = parms->iph.saddr;
265 __be32 key = parms->i_key;
266 int link = parms->link;
267 struct ip_tunnel *t = NULL;
268 struct hlist_head *head = ip_bucket(itn, parms);
269
270 hlist_for_each_entry_rcu(t, head, hash_node) {
271 if (local == t->parms.iph.saddr &&
272 remote == t->parms.iph.daddr &&
273 key == t->parms.i_key &&
274 link == t->parms.link &&
275 type == t->dev->type)
276 break;
277 }
278 return t;
279}
280
281static struct net_device *__ip_tunnel_create(struct net *net,
282 const struct rtnl_link_ops *ops,
283 struct ip_tunnel_parm *parms)
284{
285 int err;
286 struct ip_tunnel *tunnel;
287 struct net_device *dev;
288 char name[IFNAMSIZ];
289
290 if (parms->name[0])
291 strlcpy(name, parms->name, IFNAMSIZ);
292 else {
54a5d382 293 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
c5441932
PS
294 err = -E2BIG;
295 goto failed;
296 }
297 strlcpy(name, ops->kind, IFNAMSIZ);
298 strncat(name, "%d", 2);
299 }
300
301 ASSERT_RTNL();
302 dev = alloc_netdev(ops->priv_size, name, ops->setup);
303 if (!dev) {
304 err = -ENOMEM;
305 goto failed;
306 }
307 dev_net_set(dev, net);
308
309 dev->rtnl_link_ops = ops;
310
311 tunnel = netdev_priv(dev);
312 tunnel->parms = *parms;
5e6700b3 313 tunnel->net = net;
c5441932
PS
314
315 err = register_netdevice(dev);
316 if (err)
317 goto failed_free;
318
319 return dev;
320
321failed_free:
322 free_netdev(dev);
323failed:
324 return ERR_PTR(err);
325}
326
7d442fab
TH
327static inline void init_tunnel_flow(struct flowi4 *fl4,
328 int proto,
329 __be32 daddr, __be32 saddr,
330 __be32 key, __u8 tos, int oif)
c5441932
PS
331{
332 memset(fl4, 0, sizeof(*fl4));
333 fl4->flowi4_oif = oif;
334 fl4->daddr = daddr;
335 fl4->saddr = saddr;
336 fl4->flowi4_tos = tos;
337 fl4->flowi4_proto = proto;
338 fl4->fl4_gre_key = key;
c5441932
PS
339}
340
341static int ip_tunnel_bind_dev(struct net_device *dev)
342{
343 struct net_device *tdev = NULL;
344 struct ip_tunnel *tunnel = netdev_priv(dev);
345 const struct iphdr *iph;
346 int hlen = LL_MAX_HEADER;
347 int mtu = ETH_DATA_LEN;
348 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
349
350 iph = &tunnel->parms.iph;
351
352 /* Guess output device to choose reasonable mtu and needed_headroom */
353 if (iph->daddr) {
354 struct flowi4 fl4;
355 struct rtable *rt;
356
7d442fab
TH
357 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
358 iph->saddr, tunnel->parms.o_key,
359 RT_TOS(iph->tos), tunnel->parms.link);
360 rt = ip_route_output_key(tunnel->net, &fl4);
361
c5441932
PS
362 if (!IS_ERR(rt)) {
363 tdev = rt->dst.dev;
6c7e7610 364 tunnel_dst_set(tunnel, &rt->dst);
c5441932
PS
365 ip_rt_put(rt);
366 }
367 if (dev->type != ARPHRD_ETHER)
368 dev->flags |= IFF_POINTOPOINT;
369 }
370
371 if (!tdev && tunnel->parms.link)
6c742e71 372 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
c5441932
PS
373
374 if (tdev) {
375 hlen = tdev->hard_header_len + tdev->needed_headroom;
376 mtu = tdev->mtu;
377 }
378 dev->iflink = tunnel->parms.link;
379
380 dev->needed_headroom = t_hlen + hlen;
381 mtu -= (dev->hard_header_len + t_hlen);
382
383 if (mtu < 68)
384 mtu = 68;
385
386 return mtu;
387}
388
389static struct ip_tunnel *ip_tunnel_create(struct net *net,
390 struct ip_tunnel_net *itn,
391 struct ip_tunnel_parm *parms)
392{
393 struct ip_tunnel *nt, *fbt;
394 struct net_device *dev;
395
396 BUG_ON(!itn->fb_tunnel_dev);
397 fbt = netdev_priv(itn->fb_tunnel_dev);
398 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
399 if (IS_ERR(dev))
400 return NULL;
401
402 dev->mtu = ip_tunnel_bind_dev(dev);
403
404 nt = netdev_priv(dev);
405 ip_tunnel_add(itn, nt);
406 return nt;
407}
408
409int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
410 const struct tnl_ptk_info *tpi, bool log_ecn_error)
411{
8f84985f 412 struct pcpu_sw_netstats *tstats;
c5441932
PS
413 const struct iphdr *iph = ip_hdr(skb);
414 int err;
415
c5441932
PS
416#ifdef CONFIG_NET_IPGRE_BROADCAST
417 if (ipv4_is_multicast(iph->daddr)) {
418 /* Looped back packet, drop it! */
419 if (rt_is_output_route(skb_rtable(skb)))
420 goto drop;
421 tunnel->dev->stats.multicast++;
422 skb->pkt_type = PACKET_BROADCAST;
423 }
424#endif
425
426 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
427 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
428 tunnel->dev->stats.rx_crc_errors++;
429 tunnel->dev->stats.rx_errors++;
430 goto drop;
431 }
432
433 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
434 if (!(tpi->flags&TUNNEL_SEQ) ||
435 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
436 tunnel->dev->stats.rx_fifo_errors++;
437 tunnel->dev->stats.rx_errors++;
438 goto drop;
439 }
440 tunnel->i_seqno = ntohl(tpi->seq) + 1;
441 }
442
c5441932
PS
443 err = IP_ECN_decapsulate(iph, skb);
444 if (unlikely(err)) {
445 if (log_ecn_error)
446 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
447 &iph->saddr, iph->tos);
448 if (err > 1) {
449 ++tunnel->dev->stats.rx_frame_errors;
450 ++tunnel->dev->stats.rx_errors;
451 goto drop;
452 }
453 }
454
455 tstats = this_cpu_ptr(tunnel->dev->tstats);
456 u64_stats_update_begin(&tstats->syncp);
457 tstats->rx_packets++;
458 tstats->rx_bytes += skb->len;
459 u64_stats_update_end(&tstats->syncp);
460
81b9eab5
AS
461 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
462
3d7b46cd
PS
463 if (tunnel->dev->type == ARPHRD_ETHER) {
464 skb->protocol = eth_type_trans(skb, tunnel->dev);
465 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
466 } else {
467 skb->dev = tunnel->dev;
468 }
64261f23 469
c5441932
PS
470 gro_cells_receive(&tunnel->gro_cells, skb);
471 return 0;
472
473drop:
474 kfree_skb(skb);
475 return 0;
476}
477EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
478
23a3647b
PS
479static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
480 struct rtable *rt, __be16 df)
481{
482 struct ip_tunnel *tunnel = netdev_priv(dev);
8c91e162 483 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
23a3647b
PS
484 int mtu;
485
486 if (df)
487 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
488 - sizeof(struct iphdr) - tunnel->hlen;
489 else
490 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
491
492 if (skb_dst(skb))
493 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
494
495 if (skb->protocol == htons(ETH_P_IP)) {
496 if (!skb_is_gso(skb) &&
497 (df & htons(IP_DF)) && mtu < pkt_size) {
498 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
499 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
500 return -E2BIG;
501 }
502 }
503#if IS_ENABLED(CONFIG_IPV6)
504 else if (skb->protocol == htons(ETH_P_IPV6)) {
505 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
506
507 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
508 mtu >= IPV6_MIN_MTU) {
509 if ((tunnel->parms.iph.daddr &&
510 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
511 rt6->rt6i_dst.plen == 128) {
512 rt6->rt6i_flags |= RTF_MODIFIED;
513 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
514 }
515 }
516
517 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
518 mtu < pkt_size) {
519 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
520 return -E2BIG;
521 }
522 }
523#endif
524 return 0;
525}
526
c5441932 527void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
bf3d6a8f 528 const struct iphdr *tnl_params, const u8 protocol)
c5441932
PS
529{
530 struct ip_tunnel *tunnel = netdev_priv(dev);
531 const struct iphdr *inner_iph;
c5441932
PS
532 struct flowi4 fl4;
533 u8 tos, ttl;
534 __be16 df;
b045d37b 535 struct rtable *rt; /* Route to the other host */
c5441932
PS
536 unsigned int max_headroom; /* The extra header space needed */
537 __be32 dst;
0e6fbc5b 538 int err;
7d442fab 539 bool connected = true;
c5441932
PS
540
541 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
542
543 dst = tnl_params->daddr;
544 if (dst == 0) {
545 /* NBMA tunnel */
546
547 if (skb_dst(skb) == NULL) {
548 dev->stats.tx_fifo_errors++;
549 goto tx_error;
550 }
551
552 if (skb->protocol == htons(ETH_P_IP)) {
553 rt = skb_rtable(skb);
554 dst = rt_nexthop(rt, inner_iph->daddr);
555 }
556#if IS_ENABLED(CONFIG_IPV6)
557 else if (skb->protocol == htons(ETH_P_IPV6)) {
558 const struct in6_addr *addr6;
559 struct neighbour *neigh;
560 bool do_tx_error_icmp;
561 int addr_type;
562
563 neigh = dst_neigh_lookup(skb_dst(skb),
564 &ipv6_hdr(skb)->daddr);
565 if (neigh == NULL)
566 goto tx_error;
567
568 addr6 = (const struct in6_addr *)&neigh->primary_key;
569 addr_type = ipv6_addr_type(addr6);
570
571 if (addr_type == IPV6_ADDR_ANY) {
572 addr6 = &ipv6_hdr(skb)->daddr;
573 addr_type = ipv6_addr_type(addr6);
574 }
575
576 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
577 do_tx_error_icmp = true;
578 else {
579 do_tx_error_icmp = false;
580 dst = addr6->s6_addr32[3];
581 }
582 neigh_release(neigh);
583 if (do_tx_error_icmp)
584 goto tx_error_icmp;
585 }
586#endif
587 else
588 goto tx_error;
7d442fab
TH
589
590 connected = false;
c5441932
PS
591 }
592
593 tos = tnl_params->tos;
594 if (tos & 0x1) {
595 tos &= ~0x1;
7d442fab 596 if (skb->protocol == htons(ETH_P_IP)) {
c5441932 597 tos = inner_iph->tos;
7d442fab
TH
598 connected = false;
599 } else if (skb->protocol == htons(ETH_P_IPV6)) {
c5441932 600 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
7d442fab
TH
601 connected = false;
602 }
c5441932
PS
603 }
604
7d442fab
TH
605 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
606 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
607
b045d37b 608 rt = connected ? tunnel_rtable_get(tunnel, 0) : NULL;
7d442fab
TH
609
610 if (!rt) {
611 rt = ip_route_output_key(tunnel->net, &fl4);
612
613 if (IS_ERR(rt)) {
614 dev->stats.tx_carrier_errors++;
615 goto tx_error;
616 }
617 if (connected)
6c7e7610 618 tunnel_dst_set(tunnel, &rt->dst);
c5441932 619 }
7d442fab 620
0e6fbc5b 621 if (rt->dst.dev == dev) {
c5441932
PS
622 ip_rt_put(rt);
623 dev->stats.collisions++;
624 goto tx_error;
625 }
c5441932 626
23a3647b
PS
627 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
628 ip_rt_put(rt);
629 goto tx_error;
c5441932 630 }
c5441932
PS
631
632 if (tunnel->err_count > 0) {
633 if (time_before(jiffies,
634 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
635 tunnel->err_count--;
636
11c21a30 637 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
c5441932
PS
638 dst_link_failure(skb);
639 } else
640 tunnel->err_count = 0;
641 }
642
d4a71b15 643 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
c5441932
PS
644 ttl = tnl_params->ttl;
645 if (ttl == 0) {
646 if (skb->protocol == htons(ETH_P_IP))
647 ttl = inner_iph->ttl;
648#if IS_ENABLED(CONFIG_IPV6)
649 else if (skb->protocol == htons(ETH_P_IPV6))
650 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
651#endif
652 else
653 ttl = ip4_dst_hoplimit(&rt->dst);
654 }
655
23a3647b
PS
656 df = tnl_params->frag_off;
657 if (skb->protocol == htons(ETH_P_IP))
658 df |= (inner_iph->frag_off&htons(IP_DF));
659
0e6fbc5b
PS
660 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
661 + rt->dst.header_len;
3e08f4a7 662 if (max_headroom > dev->needed_headroom)
c5441932 663 dev->needed_headroom = max_headroom;
3e08f4a7
SK
664
665 if (skb_cow_head(skb, dev->needed_headroom)) {
666 dev->stats.tx_dropped++;
3acfa1e7 667 kfree_skb(skb);
3e08f4a7 668 return;
c5441932
PS
669 }
670
8b7ed2d9 671 err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol,
d4a71b15 672 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
0e6fbc5b 673 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
c5441932 674
c5441932
PS
675 return;
676
677#if IS_ENABLED(CONFIG_IPV6)
678tx_error_icmp:
679 dst_link_failure(skb);
680#endif
681tx_error:
682 dev->stats.tx_errors++;
3acfa1e7 683 kfree_skb(skb);
c5441932
PS
684}
685EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
686
687static void ip_tunnel_update(struct ip_tunnel_net *itn,
688 struct ip_tunnel *t,
689 struct net_device *dev,
690 struct ip_tunnel_parm *p,
691 bool set_mtu)
692{
693 ip_tunnel_del(t);
694 t->parms.iph.saddr = p->iph.saddr;
695 t->parms.iph.daddr = p->iph.daddr;
696 t->parms.i_key = p->i_key;
697 t->parms.o_key = p->o_key;
698 if (dev->type != ARPHRD_ETHER) {
699 memcpy(dev->dev_addr, &p->iph.saddr, 4);
700 memcpy(dev->broadcast, &p->iph.daddr, 4);
701 }
702 ip_tunnel_add(itn, t);
703
704 t->parms.iph.ttl = p->iph.ttl;
705 t->parms.iph.tos = p->iph.tos;
706 t->parms.iph.frag_off = p->iph.frag_off;
707
708 if (t->parms.link != p->link) {
709 int mtu;
710
711 t->parms.link = p->link;
712 mtu = ip_tunnel_bind_dev(dev);
713 if (set_mtu)
714 dev->mtu = mtu;
715 }
9a4aa9af 716 tunnel_dst_reset_all(t);
c5441932
PS
717 netdev_state_change(dev);
718}
719
720int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
721{
722 int err = 0;
723 struct ip_tunnel *t;
724 struct net *net = dev_net(dev);
725 struct ip_tunnel *tunnel = netdev_priv(dev);
726 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
727
728 BUG_ON(!itn->fb_tunnel_dev);
729 switch (cmd) {
730 case SIOCGETTUNNEL:
731 t = NULL;
732 if (dev == itn->fb_tunnel_dev)
733 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
734 if (t == NULL)
735 t = netdev_priv(dev);
736 memcpy(p, &t->parms, sizeof(*p));
737 break;
738
739 case SIOCADDTUNNEL:
740 case SIOCCHGTUNNEL:
741 err = -EPERM;
742 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
743 goto done;
744 if (p->iph.ttl)
745 p->iph.frag_off |= htons(IP_DF);
746 if (!(p->i_flags&TUNNEL_KEY))
747 p->i_key = 0;
748 if (!(p->o_flags&TUNNEL_KEY))
749 p->o_key = 0;
750
751 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
752
753 if (!t && (cmd == SIOCADDTUNNEL))
754 t = ip_tunnel_create(net, itn, p);
755
756 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
757 if (t != NULL) {
758 if (t->dev != dev) {
759 err = -EEXIST;
760 break;
761 }
762 } else {
763 unsigned int nflags = 0;
764
765 if (ipv4_is_multicast(p->iph.daddr))
766 nflags = IFF_BROADCAST;
767 else if (p->iph.daddr)
768 nflags = IFF_POINTOPOINT;
769
770 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
771 err = -EINVAL;
772 break;
773 }
774
775 t = netdev_priv(dev);
776 }
777 }
778
779 if (t) {
780 err = 0;
781 ip_tunnel_update(itn, t, dev, p, true);
782 } else
783 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
784 break;
785
786 case SIOCDELTUNNEL:
787 err = -EPERM;
788 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
789 goto done;
790
791 if (dev == itn->fb_tunnel_dev) {
792 err = -ENOENT;
793 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
794 if (t == NULL)
795 goto done;
796 err = -EPERM;
797 if (t == netdev_priv(itn->fb_tunnel_dev))
798 goto done;
799 dev = t->dev;
800 }
801 unregister_netdevice(dev);
802 err = 0;
803 break;
804
805 default:
806 err = -EINVAL;
807 }
808
809done:
810 return err;
811}
812EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
813
814int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
815{
816 struct ip_tunnel *tunnel = netdev_priv(dev);
817 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
818
819 if (new_mtu < 68 ||
820 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
821 return -EINVAL;
822 dev->mtu = new_mtu;
823 return 0;
824}
825EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
826
827static void ip_tunnel_dev_free(struct net_device *dev)
828{
829 struct ip_tunnel *tunnel = netdev_priv(dev);
830
831 gro_cells_destroy(&tunnel->gro_cells);
9a4aa9af 832 free_percpu(tunnel->dst_cache);
c5441932
PS
833 free_percpu(dev->tstats);
834 free_netdev(dev);
835}
836
837void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
838{
c5441932
PS
839 struct ip_tunnel *tunnel = netdev_priv(dev);
840 struct ip_tunnel_net *itn;
841
6c742e71 842 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
c5441932
PS
843
844 if (itn->fb_tunnel_dev != dev) {
845 ip_tunnel_del(netdev_priv(dev));
846 unregister_netdevice_queue(dev, head);
847 }
848}
849EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
850
d3b6f614 851int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
c5441932
PS
852 struct rtnl_link_ops *ops, char *devname)
853{
854 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
855 struct ip_tunnel_parm parms;
6261d983 856 unsigned int i;
c5441932 857
6261d983 858 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
859 INIT_HLIST_HEAD(&itn->tunnels[i]);
c5441932
PS
860
861 if (!ops) {
862 itn->fb_tunnel_dev = NULL;
863 return 0;
864 }
6261d983 865
c5441932
PS
866 memset(&parms, 0, sizeof(parms));
867 if (devname)
868 strlcpy(parms.name, devname, IFNAMSIZ);
869
870 rtnl_lock();
871 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
ea857f28
DC
872 /* FB netdevice is special: we have one, and only one per netns.
873 * Allowing to move it to another netns is clearly unsafe.
874 */
67013282 875 if (!IS_ERR(itn->fb_tunnel_dev)) {
b4de77ad 876 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
67013282
SK
877 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
878 }
b4de77ad 879 rtnl_unlock();
c5441932 880
27d79f3b 881 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
c5441932
PS
882}
883EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
884
6c742e71
ND
885static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
886 struct rtnl_link_ops *ops)
c5441932 887{
6c742e71
ND
888 struct net *net = dev_net(itn->fb_tunnel_dev);
889 struct net_device *dev, *aux;
c5441932
PS
890 int h;
891
6c742e71
ND
892 for_each_netdev_safe(net, dev, aux)
893 if (dev->rtnl_link_ops == ops)
894 unregister_netdevice_queue(dev, head);
895
c5441932
PS
896 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
897 struct ip_tunnel *t;
898 struct hlist_node *n;
899 struct hlist_head *thead = &itn->tunnels[h];
900
901 hlist_for_each_entry_safe(t, n, thead, hash_node)
6c742e71
ND
902 /* If dev is in the same netns, it has already
903 * been added to the list by the previous loop.
904 */
905 if (!net_eq(dev_net(t->dev), net))
906 unregister_netdevice_queue(t->dev, head);
c5441932 907 }
c5441932
PS
908}
909
6c742e71 910void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
c5441932
PS
911{
912 LIST_HEAD(list);
913
914 rtnl_lock();
6c742e71 915 ip_tunnel_destroy(itn, &list, ops);
c5441932
PS
916 unregister_netdevice_many(&list);
917 rtnl_unlock();
c5441932
PS
918}
919EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
920
921int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
922 struct ip_tunnel_parm *p)
923{
924 struct ip_tunnel *nt;
925 struct net *net = dev_net(dev);
926 struct ip_tunnel_net *itn;
927 int mtu;
928 int err;
929
930 nt = netdev_priv(dev);
931 itn = net_generic(net, nt->ip_tnl_net_id);
932
933 if (ip_tunnel_find(itn, p, dev->type))
934 return -EEXIST;
935
5e6700b3 936 nt->net = net;
c5441932
PS
937 nt->parms = *p;
938 err = register_netdevice(dev);
939 if (err)
940 goto out;
941
942 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
943 eth_hw_addr_random(dev);
944
945 mtu = ip_tunnel_bind_dev(dev);
946 if (!tb[IFLA_MTU])
947 dev->mtu = mtu;
948
949 ip_tunnel_add(itn, nt);
950
951out:
952 return err;
953}
954EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
955
956int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
957 struct ip_tunnel_parm *p)
958{
6c742e71 959 struct ip_tunnel *t;
c5441932 960 struct ip_tunnel *tunnel = netdev_priv(dev);
6c742e71 961 struct net *net = tunnel->net;
c5441932
PS
962 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
963
964 if (dev == itn->fb_tunnel_dev)
965 return -EINVAL;
966
c5441932
PS
967 t = ip_tunnel_find(itn, p, dev->type);
968
969 if (t) {
970 if (t->dev != dev)
971 return -EEXIST;
972 } else {
6c742e71 973 t = tunnel;
c5441932
PS
974
975 if (dev->type != ARPHRD_ETHER) {
976 unsigned int nflags = 0;
977
978 if (ipv4_is_multicast(p->iph.daddr))
979 nflags = IFF_BROADCAST;
980 else if (p->iph.daddr)
981 nflags = IFF_POINTOPOINT;
982
983 if ((dev->flags ^ nflags) &
984 (IFF_POINTOPOINT | IFF_BROADCAST))
985 return -EINVAL;
986 }
987 }
988
989 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
990 return 0;
991}
992EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
993
994int ip_tunnel_init(struct net_device *dev)
995{
996 struct ip_tunnel *tunnel = netdev_priv(dev);
997 struct iphdr *iph = &tunnel->parms.iph;
827da44c 998 int i, err;
c5441932
PS
999
1000 dev->destructor = ip_tunnel_dev_free;
8f84985f 1001 dev->tstats = alloc_percpu(struct pcpu_sw_netstats);
c5441932
PS
1002 if (!dev->tstats)
1003 return -ENOMEM;
1004
827da44c 1005 for_each_possible_cpu(i) {
8f84985f 1006 struct pcpu_sw_netstats *ipt_stats;
827da44c
JS
1007 ipt_stats = per_cpu_ptr(dev->tstats, i);
1008 u64_stats_init(&ipt_stats->syncp);
1009 }
1010
9a4aa9af
TH
1011 tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1012 if (!tunnel->dst_cache) {
1013 free_percpu(dev->tstats);
1014 return -ENOMEM;
1015 }
1016
c5441932
PS
1017 err = gro_cells_init(&tunnel->gro_cells, dev);
1018 if (err) {
9a4aa9af 1019 free_percpu(tunnel->dst_cache);
c5441932
PS
1020 free_percpu(dev->tstats);
1021 return err;
1022 }
1023
1024 tunnel->dev = dev;
6c742e71 1025 tunnel->net = dev_net(dev);
c5441932
PS
1026 strcpy(tunnel->parms.name, dev->name);
1027 iph->version = 4;
1028 iph->ihl = 5;
1029
1030 return 0;
1031}
1032EXPORT_SYMBOL_GPL(ip_tunnel_init);
1033
1034void ip_tunnel_uninit(struct net_device *dev)
1035{
c5441932 1036 struct ip_tunnel *tunnel = netdev_priv(dev);
6c742e71 1037 struct net *net = tunnel->net;
c5441932
PS
1038 struct ip_tunnel_net *itn;
1039
1040 itn = net_generic(net, tunnel->ip_tnl_net_id);
1041 /* fb_tunnel_dev will be unregisted in net-exit call. */
1042 if (itn->fb_tunnel_dev != dev)
1043 ip_tunnel_del(netdev_priv(dev));
7d442fab 1044
9a4aa9af 1045 tunnel_dst_reset_all(tunnel);
c5441932
PS
1046}
1047EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1048
1049/* Do least required initialization, rest of init is done in tunnel_init call */
1050void ip_tunnel_setup(struct net_device *dev, int net_id)
1051{
1052 struct ip_tunnel *tunnel = netdev_priv(dev);
1053 tunnel->ip_tnl_net_id = net_id;
1054}
1055EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1056
1057MODULE_LICENSE("GPL");
This page took 0.126361 seconds and 5 git commands to generate.