ipv4: do not use this_cpu_ptr() in preemptible context
[deliverable/linux.git] / net / ipv4 / ip_tunnel.c
CommitLineData
c5441932
PS
1/*
2 * Copyright (c) 2013 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
17 */
18
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21#include <linux/capability.h>
22#include <linux/module.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/slab.h>
26#include <linux/uaccess.h>
27#include <linux/skbuff.h>
28#include <linux/netdevice.h>
29#include <linux/in.h>
30#include <linux/tcp.h>
31#include <linux/udp.h>
32#include <linux/if_arp.h>
33#include <linux/mroute.h>
34#include <linux/init.h>
35#include <linux/in6.h>
36#include <linux/inetdevice.h>
37#include <linux/igmp.h>
38#include <linux/netfilter_ipv4.h>
39#include <linux/etherdevice.h>
40#include <linux/if_ether.h>
41#include <linux/if_vlan.h>
42#include <linux/rculist.h>
27d79f3b 43#include <linux/err.h>
c5441932
PS
44
45#include <net/sock.h>
46#include <net/ip.h>
47#include <net/icmp.h>
48#include <net/protocol.h>
49#include <net/ip_tunnels.h>
50#include <net/arp.h>
51#include <net/checksum.h>
52#include <net/dsfield.h>
53#include <net/inet_ecn.h>
54#include <net/xfrm.h>
55#include <net/net_namespace.h>
56#include <net/netns/generic.h>
57#include <net/rtnetlink.h>
58
59#if IS_ENABLED(CONFIG_IPV6)
60#include <net/ipv6.h>
61#include <net/ip6_fib.h>
62#include <net/ip6_route.h>
63#endif
64
967680e0 65static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
c5441932
PS
66{
67 return hash_32((__force u32)key ^ (__force u32)remote,
68 IP_TNL_HASH_BITS);
69}
70
6c7e7610 71static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
95cb5745 72 struct dst_entry *dst, __be32 saddr)
7d442fab
TH
73{
74 struct dst_entry *old_dst;
75
f8864972 76 dst_clone(dst);
6c7e7610 77 old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
7d442fab 78 dst_release(old_dst);
95cb5745 79 idst->saddr = saddr;
7d442fab
TH
80}
81
a35165ca 82static noinline void tunnel_dst_set(struct ip_tunnel *t,
95cb5745 83 struct dst_entry *dst, __be32 saddr)
7d442fab 84{
a35165ca 85 __tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr);
7d442fab
TH
86}
87
6c7e7610 88static void tunnel_dst_reset(struct ip_tunnel *t)
7d442fab 89{
95cb5745 90 tunnel_dst_set(t, NULL, 0);
7d442fab
TH
91}
92
cf71d2bc 93void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
9a4aa9af
TH
94{
95 int i;
96
97 for_each_possible_cpu(i)
95cb5745 98 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0);
9a4aa9af 99}
cf71d2bc 100EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
9a4aa9af 101
95cb5745
DP
102static struct rtable *tunnel_rtable_get(struct ip_tunnel *t,
103 u32 cookie, __be32 *saddr)
7d442fab 104{
95cb5745 105 struct ip_tunnel_dst *idst;
7d442fab
TH
106 struct dst_entry *dst;
107
108 rcu_read_lock();
a35165ca 109 idst = raw_cpu_ptr(t->dst_cache);
95cb5745 110 dst = rcu_dereference(idst->dst);
f8864972
ED
111 if (dst && !atomic_inc_not_zero(&dst->__refcnt))
112 dst = NULL;
b045d37b 113 if (dst) {
95cb5745
DP
114 if (!dst->obsolete || dst->ops->check(dst, cookie)) {
115 *saddr = idst->saddr;
116 } else {
b045d37b 117 tunnel_dst_reset(t);
f8864972
ED
118 dst_release(dst);
119 dst = NULL;
b045d37b 120 }
7d442fab 121 }
b045d37b
ED
122 rcu_read_unlock();
123 return (struct rtable *)dst;
7d442fab
TH
124}
125
c5441932
PS
126static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
127 __be16 flags, __be32 key)
128{
129 if (p->i_flags & TUNNEL_KEY) {
130 if (flags & TUNNEL_KEY)
131 return key == p->i_key;
132 else
133 /* key expected, none present */
134 return false;
135 } else
136 return !(flags & TUNNEL_KEY);
137}
138
139/* Fallback tunnel: no source, no destination, no key, no options
140
141 Tunnel hash table:
142 We require exact key match i.e. if a key is present in packet
143 it will match only tunnel with the same key; if it is not present,
144 it will match only keyless tunnel.
145
146 All keysless packets, if not matched configured keyless tunnels
147 will match fallback tunnel.
148 Given src, dst and key, find appropriate for input tunnel.
149*/
150struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
151 int link, __be16 flags,
152 __be32 remote, __be32 local,
153 __be32 key)
154{
155 unsigned int hash;
156 struct ip_tunnel *t, *cand = NULL;
157 struct hlist_head *head;
158
967680e0 159 hash = ip_tunnel_hash(key, remote);
c5441932
PS
160 head = &itn->tunnels[hash];
161
162 hlist_for_each_entry_rcu(t, head, hash_node) {
163 if (local != t->parms.iph.saddr ||
164 remote != t->parms.iph.daddr ||
165 !(t->dev->flags & IFF_UP))
166 continue;
167
168 if (!ip_tunnel_key_match(&t->parms, flags, key))
169 continue;
170
171 if (t->parms.link == link)
172 return t;
173 else
174 cand = t;
175 }
176
177 hlist_for_each_entry_rcu(t, head, hash_node) {
178 if (remote != t->parms.iph.daddr ||
e0056593 179 t->parms.iph.saddr != 0 ||
c5441932
PS
180 !(t->dev->flags & IFF_UP))
181 continue;
182
183 if (!ip_tunnel_key_match(&t->parms, flags, key))
184 continue;
185
186 if (t->parms.link == link)
187 return t;
188 else if (!cand)
189 cand = t;
190 }
191
967680e0 192 hash = ip_tunnel_hash(key, 0);
c5441932
PS
193 head = &itn->tunnels[hash];
194
195 hlist_for_each_entry_rcu(t, head, hash_node) {
e0056593
DP
196 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
197 (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
198 continue;
199
200 if (!(t->dev->flags & IFF_UP))
c5441932
PS
201 continue;
202
203 if (!ip_tunnel_key_match(&t->parms, flags, key))
204 continue;
205
206 if (t->parms.link == link)
207 return t;
208 else if (!cand)
209 cand = t;
210 }
211
212 if (flags & TUNNEL_NO_KEY)
213 goto skip_key_lookup;
214
215 hlist_for_each_entry_rcu(t, head, hash_node) {
216 if (t->parms.i_key != key ||
e0056593
DP
217 t->parms.iph.saddr != 0 ||
218 t->parms.iph.daddr != 0 ||
c5441932
PS
219 !(t->dev->flags & IFF_UP))
220 continue;
221
222 if (t->parms.link == link)
223 return t;
224 else if (!cand)
225 cand = t;
226 }
227
228skip_key_lookup:
229 if (cand)
230 return cand;
231
232 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
233 return netdev_priv(itn->fb_tunnel_dev);
234
235
236 return NULL;
237}
238EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
239
240static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
241 struct ip_tunnel_parm *parms)
242{
243 unsigned int h;
244 __be32 remote;
6d608f06 245 __be32 i_key = parms->i_key;
c5441932
PS
246
247 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
248 remote = parms->iph.daddr;
249 else
250 remote = 0;
251
6d608f06
SK
252 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
253 i_key = 0;
254
255 h = ip_tunnel_hash(i_key, remote);
c5441932
PS
256 return &itn->tunnels[h];
257}
258
259static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
260{
261 struct hlist_head *head = ip_bucket(itn, &t->parms);
262
263 hlist_add_head_rcu(&t->hash_node, head);
264}
265
266static void ip_tunnel_del(struct ip_tunnel *t)
267{
268 hlist_del_init_rcu(&t->hash_node);
269}
270
271static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
272 struct ip_tunnel_parm *parms,
273 int type)
274{
275 __be32 remote = parms->iph.daddr;
276 __be32 local = parms->iph.saddr;
277 __be32 key = parms->i_key;
5ce54af1 278 __be16 flags = parms->i_flags;
c5441932
PS
279 int link = parms->link;
280 struct ip_tunnel *t = NULL;
281 struct hlist_head *head = ip_bucket(itn, parms);
282
283 hlist_for_each_entry_rcu(t, head, hash_node) {
284 if (local == t->parms.iph.saddr &&
285 remote == t->parms.iph.daddr &&
c5441932 286 link == t->parms.link &&
5ce54af1
DP
287 type == t->dev->type &&
288 ip_tunnel_key_match(&t->parms, flags, key))
c5441932
PS
289 break;
290 }
291 return t;
292}
293
294static struct net_device *__ip_tunnel_create(struct net *net,
295 const struct rtnl_link_ops *ops,
296 struct ip_tunnel_parm *parms)
297{
298 int err;
299 struct ip_tunnel *tunnel;
300 struct net_device *dev;
301 char name[IFNAMSIZ];
302
303 if (parms->name[0])
304 strlcpy(name, parms->name, IFNAMSIZ);
305 else {
54a5d382 306 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
c5441932
PS
307 err = -E2BIG;
308 goto failed;
309 }
310 strlcpy(name, ops->kind, IFNAMSIZ);
311 strncat(name, "%d", 2);
312 }
313
314 ASSERT_RTNL();
c835a677 315 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
c5441932
PS
316 if (!dev) {
317 err = -ENOMEM;
318 goto failed;
319 }
320 dev_net_set(dev, net);
321
322 dev->rtnl_link_ops = ops;
323
324 tunnel = netdev_priv(dev);
325 tunnel->parms = *parms;
5e6700b3 326 tunnel->net = net;
c5441932
PS
327
328 err = register_netdevice(dev);
329 if (err)
330 goto failed_free;
331
332 return dev;
333
334failed_free:
335 free_netdev(dev);
336failed:
337 return ERR_PTR(err);
338}
339
7d442fab
TH
340static inline void init_tunnel_flow(struct flowi4 *fl4,
341 int proto,
342 __be32 daddr, __be32 saddr,
343 __be32 key, __u8 tos, int oif)
c5441932
PS
344{
345 memset(fl4, 0, sizeof(*fl4));
346 fl4->flowi4_oif = oif;
347 fl4->daddr = daddr;
348 fl4->saddr = saddr;
349 fl4->flowi4_tos = tos;
350 fl4->flowi4_proto = proto;
351 fl4->fl4_gre_key = key;
c5441932
PS
352}
353
354static int ip_tunnel_bind_dev(struct net_device *dev)
355{
356 struct net_device *tdev = NULL;
357 struct ip_tunnel *tunnel = netdev_priv(dev);
358 const struct iphdr *iph;
359 int hlen = LL_MAX_HEADER;
360 int mtu = ETH_DATA_LEN;
361 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
362
363 iph = &tunnel->parms.iph;
364
365 /* Guess output device to choose reasonable mtu and needed_headroom */
366 if (iph->daddr) {
367 struct flowi4 fl4;
368 struct rtable *rt;
369
7d442fab
TH
370 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
371 iph->saddr, tunnel->parms.o_key,
372 RT_TOS(iph->tos), tunnel->parms.link);
373 rt = ip_route_output_key(tunnel->net, &fl4);
374
c5441932
PS
375 if (!IS_ERR(rt)) {
376 tdev = rt->dst.dev;
95cb5745 377 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
c5441932
PS
378 ip_rt_put(rt);
379 }
380 if (dev->type != ARPHRD_ETHER)
381 dev->flags |= IFF_POINTOPOINT;
382 }
383
384 if (!tdev && tunnel->parms.link)
6c742e71 385 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
c5441932
PS
386
387 if (tdev) {
388 hlen = tdev->hard_header_len + tdev->needed_headroom;
389 mtu = tdev->mtu;
390 }
391 dev->iflink = tunnel->parms.link;
392
393 dev->needed_headroom = t_hlen + hlen;
394 mtu -= (dev->hard_header_len + t_hlen);
395
396 if (mtu < 68)
397 mtu = 68;
398
399 return mtu;
400}
401
402static struct ip_tunnel *ip_tunnel_create(struct net *net,
403 struct ip_tunnel_net *itn,
404 struct ip_tunnel_parm *parms)
405{
4929fd8c 406 struct ip_tunnel *nt;
c5441932
PS
407 struct net_device *dev;
408
409 BUG_ON(!itn->fb_tunnel_dev);
c5441932
PS
410 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
411 if (IS_ERR(dev))
6dd3c9ec 412 return ERR_CAST(dev);
c5441932
PS
413
414 dev->mtu = ip_tunnel_bind_dev(dev);
415
416 nt = netdev_priv(dev);
417 ip_tunnel_add(itn, nt);
418 return nt;
419}
420
421int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
422 const struct tnl_ptk_info *tpi, bool log_ecn_error)
423{
8f84985f 424 struct pcpu_sw_netstats *tstats;
c5441932
PS
425 const struct iphdr *iph = ip_hdr(skb);
426 int err;
427
c5441932
PS
428#ifdef CONFIG_NET_IPGRE_BROADCAST
429 if (ipv4_is_multicast(iph->daddr)) {
c5441932
PS
430 tunnel->dev->stats.multicast++;
431 skb->pkt_type = PACKET_BROADCAST;
432 }
433#endif
434
435 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
436 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
437 tunnel->dev->stats.rx_crc_errors++;
438 tunnel->dev->stats.rx_errors++;
439 goto drop;
440 }
441
442 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
443 if (!(tpi->flags&TUNNEL_SEQ) ||
444 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
445 tunnel->dev->stats.rx_fifo_errors++;
446 tunnel->dev->stats.rx_errors++;
447 goto drop;
448 }
449 tunnel->i_seqno = ntohl(tpi->seq) + 1;
450 }
451
e96f2e7c
YC
452 skb_reset_network_header(skb);
453
c5441932
PS
454 err = IP_ECN_decapsulate(iph, skb);
455 if (unlikely(err)) {
456 if (log_ecn_error)
457 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
458 &iph->saddr, iph->tos);
459 if (err > 1) {
460 ++tunnel->dev->stats.rx_frame_errors;
461 ++tunnel->dev->stats.rx_errors;
462 goto drop;
463 }
464 }
465
466 tstats = this_cpu_ptr(tunnel->dev->tstats);
467 u64_stats_update_begin(&tstats->syncp);
468 tstats->rx_packets++;
469 tstats->rx_bytes += skb->len;
470 u64_stats_update_end(&tstats->syncp);
471
81b9eab5
AS
472 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
473
3d7b46cd
PS
474 if (tunnel->dev->type == ARPHRD_ETHER) {
475 skb->protocol = eth_type_trans(skb, tunnel->dev);
476 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
477 } else {
478 skb->dev = tunnel->dev;
479 }
64261f23 480
c5441932
PS
481 gro_cells_receive(&tunnel->gro_cells, skb);
482 return 0;
483
484drop:
485 kfree_skb(skb);
486 return 0;
487}
488EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
489
23a3647b
PS
490static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
491 struct rtable *rt, __be16 df)
492{
493 struct ip_tunnel *tunnel = netdev_priv(dev);
8c91e162 494 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
23a3647b
PS
495 int mtu;
496
497 if (df)
498 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
499 - sizeof(struct iphdr) - tunnel->hlen;
500 else
501 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
502
503 if (skb_dst(skb))
504 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
505
506 if (skb->protocol == htons(ETH_P_IP)) {
507 if (!skb_is_gso(skb) &&
508 (df & htons(IP_DF)) && mtu < pkt_size) {
509 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
510 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
511 return -E2BIG;
512 }
513 }
514#if IS_ENABLED(CONFIG_IPV6)
515 else if (skb->protocol == htons(ETH_P_IPV6)) {
516 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
517
518 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
519 mtu >= IPV6_MIN_MTU) {
520 if ((tunnel->parms.iph.daddr &&
521 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
522 rt6->rt6i_dst.plen == 128) {
523 rt6->rt6i_flags |= RTF_MODIFIED;
524 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
525 }
526 }
527
528 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
529 mtu < pkt_size) {
530 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
531 return -E2BIG;
532 }
533 }
534#endif
535 return 0;
536}
537
c5441932 538void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
bf3d6a8f 539 const struct iphdr *tnl_params, const u8 protocol)
c5441932
PS
540{
541 struct ip_tunnel *tunnel = netdev_priv(dev);
542 const struct iphdr *inner_iph;
c5441932
PS
543 struct flowi4 fl4;
544 u8 tos, ttl;
545 __be16 df;
b045d37b 546 struct rtable *rt; /* Route to the other host */
c5441932
PS
547 unsigned int max_headroom; /* The extra header space needed */
548 __be32 dst;
0e6fbc5b 549 int err;
22fb22ea 550 bool connected;
c5441932
PS
551
552 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
22fb22ea 553 connected = (tunnel->parms.iph.daddr != 0);
c5441932
PS
554
555 dst = tnl_params->daddr;
556 if (dst == 0) {
557 /* NBMA tunnel */
558
559 if (skb_dst(skb) == NULL) {
560 dev->stats.tx_fifo_errors++;
561 goto tx_error;
562 }
563
564 if (skb->protocol == htons(ETH_P_IP)) {
565 rt = skb_rtable(skb);
566 dst = rt_nexthop(rt, inner_iph->daddr);
567 }
568#if IS_ENABLED(CONFIG_IPV6)
569 else if (skb->protocol == htons(ETH_P_IPV6)) {
570 const struct in6_addr *addr6;
571 struct neighbour *neigh;
572 bool do_tx_error_icmp;
573 int addr_type;
574
575 neigh = dst_neigh_lookup(skb_dst(skb),
576 &ipv6_hdr(skb)->daddr);
577 if (neigh == NULL)
578 goto tx_error;
579
580 addr6 = (const struct in6_addr *)&neigh->primary_key;
581 addr_type = ipv6_addr_type(addr6);
582
583 if (addr_type == IPV6_ADDR_ANY) {
584 addr6 = &ipv6_hdr(skb)->daddr;
585 addr_type = ipv6_addr_type(addr6);
586 }
587
588 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
589 do_tx_error_icmp = true;
590 else {
591 do_tx_error_icmp = false;
592 dst = addr6->s6_addr32[3];
593 }
594 neigh_release(neigh);
595 if (do_tx_error_icmp)
596 goto tx_error_icmp;
597 }
598#endif
599 else
600 goto tx_error;
7d442fab
TH
601
602 connected = false;
c5441932
PS
603 }
604
605 tos = tnl_params->tos;
606 if (tos & 0x1) {
607 tos &= ~0x1;
7d442fab 608 if (skb->protocol == htons(ETH_P_IP)) {
c5441932 609 tos = inner_iph->tos;
7d442fab
TH
610 connected = false;
611 } else if (skb->protocol == htons(ETH_P_IPV6)) {
c5441932 612 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
7d442fab
TH
613 connected = false;
614 }
c5441932
PS
615 }
616
7d442fab
TH
617 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
618 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
619
95cb5745 620 rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL;
7d442fab
TH
621
622 if (!rt) {
623 rt = ip_route_output_key(tunnel->net, &fl4);
624
625 if (IS_ERR(rt)) {
626 dev->stats.tx_carrier_errors++;
627 goto tx_error;
628 }
629 if (connected)
95cb5745 630 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
c5441932 631 }
7d442fab 632
0e6fbc5b 633 if (rt->dst.dev == dev) {
c5441932
PS
634 ip_rt_put(rt);
635 dev->stats.collisions++;
636 goto tx_error;
637 }
c5441932 638
23a3647b
PS
639 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
640 ip_rt_put(rt);
641 goto tx_error;
c5441932 642 }
c5441932
PS
643
644 if (tunnel->err_count > 0) {
645 if (time_before(jiffies,
646 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
647 tunnel->err_count--;
648
11c21a30 649 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
c5441932
PS
650 dst_link_failure(skb);
651 } else
652 tunnel->err_count = 0;
653 }
654
d4a71b15 655 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
c5441932
PS
656 ttl = tnl_params->ttl;
657 if (ttl == 0) {
658 if (skb->protocol == htons(ETH_P_IP))
659 ttl = inner_iph->ttl;
660#if IS_ENABLED(CONFIG_IPV6)
661 else if (skb->protocol == htons(ETH_P_IPV6))
662 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
663#endif
664 else
665 ttl = ip4_dst_hoplimit(&rt->dst);
666 }
667
23a3647b
PS
668 df = tnl_params->frag_off;
669 if (skb->protocol == htons(ETH_P_IP))
670 df |= (inner_iph->frag_off&htons(IP_DF));
671
0e6fbc5b
PS
672 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
673 + rt->dst.header_len;
3e08f4a7 674 if (max_headroom > dev->needed_headroom)
c5441932 675 dev->needed_headroom = max_headroom;
3e08f4a7
SK
676
677 if (skb_cow_head(skb, dev->needed_headroom)) {
586d5fc8 678 ip_rt_put(rt);
3e08f4a7 679 dev->stats.tx_dropped++;
3acfa1e7 680 kfree_skb(skb);
3e08f4a7 681 return;
c5441932
PS
682 }
683
aad88724 684 err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol,
d4a71b15 685 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
0e6fbc5b 686 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
c5441932 687
c5441932
PS
688 return;
689
690#if IS_ENABLED(CONFIG_IPV6)
691tx_error_icmp:
692 dst_link_failure(skb);
693#endif
694tx_error:
695 dev->stats.tx_errors++;
3acfa1e7 696 kfree_skb(skb);
c5441932
PS
697}
698EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
699
700static void ip_tunnel_update(struct ip_tunnel_net *itn,
701 struct ip_tunnel *t,
702 struct net_device *dev,
703 struct ip_tunnel_parm *p,
704 bool set_mtu)
705{
706 ip_tunnel_del(t);
707 t->parms.iph.saddr = p->iph.saddr;
708 t->parms.iph.daddr = p->iph.daddr;
709 t->parms.i_key = p->i_key;
710 t->parms.o_key = p->o_key;
711 if (dev->type != ARPHRD_ETHER) {
712 memcpy(dev->dev_addr, &p->iph.saddr, 4);
713 memcpy(dev->broadcast, &p->iph.daddr, 4);
714 }
715 ip_tunnel_add(itn, t);
716
717 t->parms.iph.ttl = p->iph.ttl;
718 t->parms.iph.tos = p->iph.tos;
719 t->parms.iph.frag_off = p->iph.frag_off;
720
721 if (t->parms.link != p->link) {
722 int mtu;
723
724 t->parms.link = p->link;
725 mtu = ip_tunnel_bind_dev(dev);
726 if (set_mtu)
727 dev->mtu = mtu;
728 }
cf71d2bc 729 ip_tunnel_dst_reset_all(t);
c5441932
PS
730 netdev_state_change(dev);
731}
732
733int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
734{
735 int err = 0;
8c923ce2
ND
736 struct ip_tunnel *t = netdev_priv(dev);
737 struct net *net = t->net;
738 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
c5441932
PS
739
740 BUG_ON(!itn->fb_tunnel_dev);
741 switch (cmd) {
742 case SIOCGETTUNNEL:
8c923ce2 743 if (dev == itn->fb_tunnel_dev) {
c5441932 744 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
8c923ce2
ND
745 if (t == NULL)
746 t = netdev_priv(dev);
747 }
c5441932
PS
748 memcpy(p, &t->parms, sizeof(*p));
749 break;
750
751 case SIOCADDTUNNEL:
752 case SIOCCHGTUNNEL:
753 err = -EPERM;
754 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
755 goto done;
756 if (p->iph.ttl)
757 p->iph.frag_off |= htons(IP_DF);
7c8e6b9c
DP
758 if (!(p->i_flags & VTI_ISVTI)) {
759 if (!(p->i_flags & TUNNEL_KEY))
760 p->i_key = 0;
761 if (!(p->o_flags & TUNNEL_KEY))
762 p->o_key = 0;
763 }
c5441932
PS
764
765 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
766
6dd3c9ec 767 if (!t && (cmd == SIOCADDTUNNEL)) {
c5441932 768 t = ip_tunnel_create(net, itn, p);
ee30ef4d
DJ
769 err = PTR_ERR_OR_ZERO(t);
770 break;
6dd3c9ec 771 }
c5441932
PS
772 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
773 if (t != NULL) {
774 if (t->dev != dev) {
775 err = -EEXIST;
776 break;
777 }
778 } else {
779 unsigned int nflags = 0;
780
781 if (ipv4_is_multicast(p->iph.daddr))
782 nflags = IFF_BROADCAST;
783 else if (p->iph.daddr)
784 nflags = IFF_POINTOPOINT;
785
786 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
787 err = -EINVAL;
788 break;
789 }
790
791 t = netdev_priv(dev);
792 }
793 }
794
795 if (t) {
796 err = 0;
797 ip_tunnel_update(itn, t, dev, p, true);
6dd3c9ec
FW
798 } else {
799 err = -ENOENT;
800 }
c5441932
PS
801 break;
802
803 case SIOCDELTUNNEL:
804 err = -EPERM;
805 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
806 goto done;
807
808 if (dev == itn->fb_tunnel_dev) {
809 err = -ENOENT;
810 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
811 if (t == NULL)
812 goto done;
813 err = -EPERM;
814 if (t == netdev_priv(itn->fb_tunnel_dev))
815 goto done;
816 dev = t->dev;
817 }
818 unregister_netdevice(dev);
819 err = 0;
820 break;
821
822 default:
823 err = -EINVAL;
824 }
825
826done:
827 return err;
828}
829EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
830
831int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
832{
833 struct ip_tunnel *tunnel = netdev_priv(dev);
834 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
835
836 if (new_mtu < 68 ||
837 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
838 return -EINVAL;
839 dev->mtu = new_mtu;
840 return 0;
841}
842EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
843
844static void ip_tunnel_dev_free(struct net_device *dev)
845{
846 struct ip_tunnel *tunnel = netdev_priv(dev);
847
848 gro_cells_destroy(&tunnel->gro_cells);
9a4aa9af 849 free_percpu(tunnel->dst_cache);
c5441932
PS
850 free_percpu(dev->tstats);
851 free_netdev(dev);
852}
853
854void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
855{
c5441932
PS
856 struct ip_tunnel *tunnel = netdev_priv(dev);
857 struct ip_tunnel_net *itn;
858
6c742e71 859 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
c5441932
PS
860
861 if (itn->fb_tunnel_dev != dev) {
862 ip_tunnel_del(netdev_priv(dev));
863 unregister_netdevice_queue(dev, head);
864 }
865}
866EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
867
d3b6f614 868int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
c5441932
PS
869 struct rtnl_link_ops *ops, char *devname)
870{
871 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
872 struct ip_tunnel_parm parms;
6261d983 873 unsigned int i;
c5441932 874
6261d983 875 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
876 INIT_HLIST_HEAD(&itn->tunnels[i]);
c5441932
PS
877
878 if (!ops) {
879 itn->fb_tunnel_dev = NULL;
880 return 0;
881 }
6261d983 882
c5441932
PS
883 memset(&parms, 0, sizeof(parms));
884 if (devname)
885 strlcpy(parms.name, devname, IFNAMSIZ);
886
887 rtnl_lock();
888 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
ea857f28
DC
889 /* FB netdevice is special: we have one, and only one per netns.
890 * Allowing to move it to another netns is clearly unsafe.
891 */
67013282 892 if (!IS_ERR(itn->fb_tunnel_dev)) {
b4de77ad 893 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
78ff4be4 894 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
67013282
SK
895 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
896 }
b4de77ad 897 rtnl_unlock();
c5441932 898
27d79f3b 899 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
c5441932
PS
900}
901EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
902
6c742e71
ND
903static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
904 struct rtnl_link_ops *ops)
c5441932 905{
6c742e71
ND
906 struct net *net = dev_net(itn->fb_tunnel_dev);
907 struct net_device *dev, *aux;
c5441932
PS
908 int h;
909
6c742e71
ND
910 for_each_netdev_safe(net, dev, aux)
911 if (dev->rtnl_link_ops == ops)
912 unregister_netdevice_queue(dev, head);
913
c5441932
PS
914 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
915 struct ip_tunnel *t;
916 struct hlist_node *n;
917 struct hlist_head *thead = &itn->tunnels[h];
918
919 hlist_for_each_entry_safe(t, n, thead, hash_node)
6c742e71
ND
920 /* If dev is in the same netns, it has already
921 * been added to the list by the previous loop.
922 */
923 if (!net_eq(dev_net(t->dev), net))
924 unregister_netdevice_queue(t->dev, head);
c5441932 925 }
c5441932
PS
926}
927
6c742e71 928void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
c5441932
PS
929{
930 LIST_HEAD(list);
931
932 rtnl_lock();
6c742e71 933 ip_tunnel_destroy(itn, &list, ops);
c5441932
PS
934 unregister_netdevice_many(&list);
935 rtnl_unlock();
c5441932
PS
936}
937EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
938
939int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
940 struct ip_tunnel_parm *p)
941{
942 struct ip_tunnel *nt;
943 struct net *net = dev_net(dev);
944 struct ip_tunnel_net *itn;
945 int mtu;
946 int err;
947
948 nt = netdev_priv(dev);
949 itn = net_generic(net, nt->ip_tnl_net_id);
950
951 if (ip_tunnel_find(itn, p, dev->type))
952 return -EEXIST;
953
5e6700b3 954 nt->net = net;
c5441932
PS
955 nt->parms = *p;
956 err = register_netdevice(dev);
957 if (err)
958 goto out;
959
960 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
961 eth_hw_addr_random(dev);
962
963 mtu = ip_tunnel_bind_dev(dev);
964 if (!tb[IFLA_MTU])
965 dev->mtu = mtu;
966
967 ip_tunnel_add(itn, nt);
968
969out:
970 return err;
971}
972EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
973
974int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
975 struct ip_tunnel_parm *p)
976{
6c742e71 977 struct ip_tunnel *t;
c5441932 978 struct ip_tunnel *tunnel = netdev_priv(dev);
6c742e71 979 struct net *net = tunnel->net;
c5441932
PS
980 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
981
982 if (dev == itn->fb_tunnel_dev)
983 return -EINVAL;
984
c5441932
PS
985 t = ip_tunnel_find(itn, p, dev->type);
986
987 if (t) {
988 if (t->dev != dev)
989 return -EEXIST;
990 } else {
6c742e71 991 t = tunnel;
c5441932
PS
992
993 if (dev->type != ARPHRD_ETHER) {
994 unsigned int nflags = 0;
995
996 if (ipv4_is_multicast(p->iph.daddr))
997 nflags = IFF_BROADCAST;
998 else if (p->iph.daddr)
999 nflags = IFF_POINTOPOINT;
1000
1001 if ((dev->flags ^ nflags) &
1002 (IFF_POINTOPOINT | IFF_BROADCAST))
1003 return -EINVAL;
1004 }
1005 }
1006
1007 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1008 return 0;
1009}
1010EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1011
1012int ip_tunnel_init(struct net_device *dev)
1013{
1014 struct ip_tunnel *tunnel = netdev_priv(dev);
1015 struct iphdr *iph = &tunnel->parms.iph;
1c213bd2 1016 int err;
c5441932
PS
1017
1018 dev->destructor = ip_tunnel_dev_free;
1c213bd2 1019 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
c5441932
PS
1020 if (!dev->tstats)
1021 return -ENOMEM;
1022
9a4aa9af
TH
1023 tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1024 if (!tunnel->dst_cache) {
1025 free_percpu(dev->tstats);
1026 return -ENOMEM;
1027 }
1028
c5441932
PS
1029 err = gro_cells_init(&tunnel->gro_cells, dev);
1030 if (err) {
9a4aa9af 1031 free_percpu(tunnel->dst_cache);
c5441932
PS
1032 free_percpu(dev->tstats);
1033 return err;
1034 }
1035
1036 tunnel->dev = dev;
6c742e71 1037 tunnel->net = dev_net(dev);
c5441932
PS
1038 strcpy(tunnel->parms.name, dev->name);
1039 iph->version = 4;
1040 iph->ihl = 5;
1041
1042 return 0;
1043}
1044EXPORT_SYMBOL_GPL(ip_tunnel_init);
1045
1046void ip_tunnel_uninit(struct net_device *dev)
1047{
c5441932 1048 struct ip_tunnel *tunnel = netdev_priv(dev);
6c742e71 1049 struct net *net = tunnel->net;
c5441932
PS
1050 struct ip_tunnel_net *itn;
1051
1052 itn = net_generic(net, tunnel->ip_tnl_net_id);
1053 /* fb_tunnel_dev will be unregisted in net-exit call. */
1054 if (itn->fb_tunnel_dev != dev)
1055 ip_tunnel_del(netdev_priv(dev));
7d442fab 1056
cf71d2bc 1057 ip_tunnel_dst_reset_all(tunnel);
c5441932
PS
1058}
1059EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1060
1061/* Do least required initialization, rest of init is done in tunnel_init call */
1062void ip_tunnel_setup(struct net_device *dev, int net_id)
1063{
1064 struct ip_tunnel *tunnel = netdev_priv(dev);
1065 tunnel->ip_tnl_net_id = net_id;
1066}
1067EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1068
1069MODULE_LICENSE("GPL");
This page took 0.175261 seconds and 5 git commands to generate.