ipv4: do not use this_cpu_ptr() in preemptible context
[deliverable/linux.git] / net / ipv4 / ip_tunnel.c
1 /*
2 * Copyright (c) 2013 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
17 */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
29 #include <linux/in.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/in6.h>
36 #include <linux/inetdevice.h>
37 #include <linux/igmp.h>
38 #include <linux/netfilter_ipv4.h>
39 #include <linux/etherdevice.h>
40 #include <linux/if_ether.h>
41 #include <linux/if_vlan.h>
42 #include <linux/rculist.h>
43 #include <linux/err.h>
44
45 #include <net/sock.h>
46 #include <net/ip.h>
47 #include <net/icmp.h>
48 #include <net/protocol.h>
49 #include <net/ip_tunnels.h>
50 #include <net/arp.h>
51 #include <net/checksum.h>
52 #include <net/dsfield.h>
53 #include <net/inet_ecn.h>
54 #include <net/xfrm.h>
55 #include <net/net_namespace.h>
56 #include <net/netns/generic.h>
57 #include <net/rtnetlink.h>
58
59 #if IS_ENABLED(CONFIG_IPV6)
60 #include <net/ipv6.h>
61 #include <net/ip6_fib.h>
62 #include <net/ip6_route.h>
63 #endif
64
65 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
66 {
67 return hash_32((__force u32)key ^ (__force u32)remote,
68 IP_TNL_HASH_BITS);
69 }
70
71 static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
72 struct dst_entry *dst, __be32 saddr)
73 {
74 struct dst_entry *old_dst;
75
76 dst_clone(dst);
77 old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
78 dst_release(old_dst);
79 idst->saddr = saddr;
80 }
81
82 static noinline void tunnel_dst_set(struct ip_tunnel *t,
83 struct dst_entry *dst, __be32 saddr)
84 {
85 __tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr);
86 }
87
88 static void tunnel_dst_reset(struct ip_tunnel *t)
89 {
90 tunnel_dst_set(t, NULL, 0);
91 }
92
93 void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
94 {
95 int i;
96
97 for_each_possible_cpu(i)
98 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0);
99 }
100 EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
101
102 static struct rtable *tunnel_rtable_get(struct ip_tunnel *t,
103 u32 cookie, __be32 *saddr)
104 {
105 struct ip_tunnel_dst *idst;
106 struct dst_entry *dst;
107
108 rcu_read_lock();
109 idst = raw_cpu_ptr(t->dst_cache);
110 dst = rcu_dereference(idst->dst);
111 if (dst && !atomic_inc_not_zero(&dst->__refcnt))
112 dst = NULL;
113 if (dst) {
114 if (!dst->obsolete || dst->ops->check(dst, cookie)) {
115 *saddr = idst->saddr;
116 } else {
117 tunnel_dst_reset(t);
118 dst_release(dst);
119 dst = NULL;
120 }
121 }
122 rcu_read_unlock();
123 return (struct rtable *)dst;
124 }
125
126 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
127 __be16 flags, __be32 key)
128 {
129 if (p->i_flags & TUNNEL_KEY) {
130 if (flags & TUNNEL_KEY)
131 return key == p->i_key;
132 else
133 /* key expected, none present */
134 return false;
135 } else
136 return !(flags & TUNNEL_KEY);
137 }
138
139 /* Fallback tunnel: no source, no destination, no key, no options
140
141 Tunnel hash table:
142 We require exact key match i.e. if a key is present in packet
143 it will match only tunnel with the same key; if it is not present,
144 it will match only keyless tunnel.
145
146 All keysless packets, if not matched configured keyless tunnels
147 will match fallback tunnel.
148 Given src, dst and key, find appropriate for input tunnel.
149 */
150 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
151 int link, __be16 flags,
152 __be32 remote, __be32 local,
153 __be32 key)
154 {
155 unsigned int hash;
156 struct ip_tunnel *t, *cand = NULL;
157 struct hlist_head *head;
158
159 hash = ip_tunnel_hash(key, remote);
160 head = &itn->tunnels[hash];
161
162 hlist_for_each_entry_rcu(t, head, hash_node) {
163 if (local != t->parms.iph.saddr ||
164 remote != t->parms.iph.daddr ||
165 !(t->dev->flags & IFF_UP))
166 continue;
167
168 if (!ip_tunnel_key_match(&t->parms, flags, key))
169 continue;
170
171 if (t->parms.link == link)
172 return t;
173 else
174 cand = t;
175 }
176
177 hlist_for_each_entry_rcu(t, head, hash_node) {
178 if (remote != t->parms.iph.daddr ||
179 t->parms.iph.saddr != 0 ||
180 !(t->dev->flags & IFF_UP))
181 continue;
182
183 if (!ip_tunnel_key_match(&t->parms, flags, key))
184 continue;
185
186 if (t->parms.link == link)
187 return t;
188 else if (!cand)
189 cand = t;
190 }
191
192 hash = ip_tunnel_hash(key, 0);
193 head = &itn->tunnels[hash];
194
195 hlist_for_each_entry_rcu(t, head, hash_node) {
196 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
197 (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
198 continue;
199
200 if (!(t->dev->flags & IFF_UP))
201 continue;
202
203 if (!ip_tunnel_key_match(&t->parms, flags, key))
204 continue;
205
206 if (t->parms.link == link)
207 return t;
208 else if (!cand)
209 cand = t;
210 }
211
212 if (flags & TUNNEL_NO_KEY)
213 goto skip_key_lookup;
214
215 hlist_for_each_entry_rcu(t, head, hash_node) {
216 if (t->parms.i_key != key ||
217 t->parms.iph.saddr != 0 ||
218 t->parms.iph.daddr != 0 ||
219 !(t->dev->flags & IFF_UP))
220 continue;
221
222 if (t->parms.link == link)
223 return t;
224 else if (!cand)
225 cand = t;
226 }
227
228 skip_key_lookup:
229 if (cand)
230 return cand;
231
232 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
233 return netdev_priv(itn->fb_tunnel_dev);
234
235
236 return NULL;
237 }
238 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
239
240 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
241 struct ip_tunnel_parm *parms)
242 {
243 unsigned int h;
244 __be32 remote;
245 __be32 i_key = parms->i_key;
246
247 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
248 remote = parms->iph.daddr;
249 else
250 remote = 0;
251
252 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
253 i_key = 0;
254
255 h = ip_tunnel_hash(i_key, remote);
256 return &itn->tunnels[h];
257 }
258
259 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
260 {
261 struct hlist_head *head = ip_bucket(itn, &t->parms);
262
263 hlist_add_head_rcu(&t->hash_node, head);
264 }
265
266 static void ip_tunnel_del(struct ip_tunnel *t)
267 {
268 hlist_del_init_rcu(&t->hash_node);
269 }
270
271 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
272 struct ip_tunnel_parm *parms,
273 int type)
274 {
275 __be32 remote = parms->iph.daddr;
276 __be32 local = parms->iph.saddr;
277 __be32 key = parms->i_key;
278 __be16 flags = parms->i_flags;
279 int link = parms->link;
280 struct ip_tunnel *t = NULL;
281 struct hlist_head *head = ip_bucket(itn, parms);
282
283 hlist_for_each_entry_rcu(t, head, hash_node) {
284 if (local == t->parms.iph.saddr &&
285 remote == t->parms.iph.daddr &&
286 link == t->parms.link &&
287 type == t->dev->type &&
288 ip_tunnel_key_match(&t->parms, flags, key))
289 break;
290 }
291 return t;
292 }
293
294 static struct net_device *__ip_tunnel_create(struct net *net,
295 const struct rtnl_link_ops *ops,
296 struct ip_tunnel_parm *parms)
297 {
298 int err;
299 struct ip_tunnel *tunnel;
300 struct net_device *dev;
301 char name[IFNAMSIZ];
302
303 if (parms->name[0])
304 strlcpy(name, parms->name, IFNAMSIZ);
305 else {
306 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
307 err = -E2BIG;
308 goto failed;
309 }
310 strlcpy(name, ops->kind, IFNAMSIZ);
311 strncat(name, "%d", 2);
312 }
313
314 ASSERT_RTNL();
315 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
316 if (!dev) {
317 err = -ENOMEM;
318 goto failed;
319 }
320 dev_net_set(dev, net);
321
322 dev->rtnl_link_ops = ops;
323
324 tunnel = netdev_priv(dev);
325 tunnel->parms = *parms;
326 tunnel->net = net;
327
328 err = register_netdevice(dev);
329 if (err)
330 goto failed_free;
331
332 return dev;
333
334 failed_free:
335 free_netdev(dev);
336 failed:
337 return ERR_PTR(err);
338 }
339
340 static inline void init_tunnel_flow(struct flowi4 *fl4,
341 int proto,
342 __be32 daddr, __be32 saddr,
343 __be32 key, __u8 tos, int oif)
344 {
345 memset(fl4, 0, sizeof(*fl4));
346 fl4->flowi4_oif = oif;
347 fl4->daddr = daddr;
348 fl4->saddr = saddr;
349 fl4->flowi4_tos = tos;
350 fl4->flowi4_proto = proto;
351 fl4->fl4_gre_key = key;
352 }
353
354 static int ip_tunnel_bind_dev(struct net_device *dev)
355 {
356 struct net_device *tdev = NULL;
357 struct ip_tunnel *tunnel = netdev_priv(dev);
358 const struct iphdr *iph;
359 int hlen = LL_MAX_HEADER;
360 int mtu = ETH_DATA_LEN;
361 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
362
363 iph = &tunnel->parms.iph;
364
365 /* Guess output device to choose reasonable mtu and needed_headroom */
366 if (iph->daddr) {
367 struct flowi4 fl4;
368 struct rtable *rt;
369
370 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
371 iph->saddr, tunnel->parms.o_key,
372 RT_TOS(iph->tos), tunnel->parms.link);
373 rt = ip_route_output_key(tunnel->net, &fl4);
374
375 if (!IS_ERR(rt)) {
376 tdev = rt->dst.dev;
377 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
378 ip_rt_put(rt);
379 }
380 if (dev->type != ARPHRD_ETHER)
381 dev->flags |= IFF_POINTOPOINT;
382 }
383
384 if (!tdev && tunnel->parms.link)
385 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
386
387 if (tdev) {
388 hlen = tdev->hard_header_len + tdev->needed_headroom;
389 mtu = tdev->mtu;
390 }
391 dev->iflink = tunnel->parms.link;
392
393 dev->needed_headroom = t_hlen + hlen;
394 mtu -= (dev->hard_header_len + t_hlen);
395
396 if (mtu < 68)
397 mtu = 68;
398
399 return mtu;
400 }
401
402 static struct ip_tunnel *ip_tunnel_create(struct net *net,
403 struct ip_tunnel_net *itn,
404 struct ip_tunnel_parm *parms)
405 {
406 struct ip_tunnel *nt;
407 struct net_device *dev;
408
409 BUG_ON(!itn->fb_tunnel_dev);
410 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
411 if (IS_ERR(dev))
412 return ERR_CAST(dev);
413
414 dev->mtu = ip_tunnel_bind_dev(dev);
415
416 nt = netdev_priv(dev);
417 ip_tunnel_add(itn, nt);
418 return nt;
419 }
420
421 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
422 const struct tnl_ptk_info *tpi, bool log_ecn_error)
423 {
424 struct pcpu_sw_netstats *tstats;
425 const struct iphdr *iph = ip_hdr(skb);
426 int err;
427
428 #ifdef CONFIG_NET_IPGRE_BROADCAST
429 if (ipv4_is_multicast(iph->daddr)) {
430 tunnel->dev->stats.multicast++;
431 skb->pkt_type = PACKET_BROADCAST;
432 }
433 #endif
434
435 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
436 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
437 tunnel->dev->stats.rx_crc_errors++;
438 tunnel->dev->stats.rx_errors++;
439 goto drop;
440 }
441
442 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
443 if (!(tpi->flags&TUNNEL_SEQ) ||
444 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
445 tunnel->dev->stats.rx_fifo_errors++;
446 tunnel->dev->stats.rx_errors++;
447 goto drop;
448 }
449 tunnel->i_seqno = ntohl(tpi->seq) + 1;
450 }
451
452 skb_reset_network_header(skb);
453
454 err = IP_ECN_decapsulate(iph, skb);
455 if (unlikely(err)) {
456 if (log_ecn_error)
457 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
458 &iph->saddr, iph->tos);
459 if (err > 1) {
460 ++tunnel->dev->stats.rx_frame_errors;
461 ++tunnel->dev->stats.rx_errors;
462 goto drop;
463 }
464 }
465
466 tstats = this_cpu_ptr(tunnel->dev->tstats);
467 u64_stats_update_begin(&tstats->syncp);
468 tstats->rx_packets++;
469 tstats->rx_bytes += skb->len;
470 u64_stats_update_end(&tstats->syncp);
471
472 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
473
474 if (tunnel->dev->type == ARPHRD_ETHER) {
475 skb->protocol = eth_type_trans(skb, tunnel->dev);
476 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
477 } else {
478 skb->dev = tunnel->dev;
479 }
480
481 gro_cells_receive(&tunnel->gro_cells, skb);
482 return 0;
483
484 drop:
485 kfree_skb(skb);
486 return 0;
487 }
488 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
489
490 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
491 struct rtable *rt, __be16 df)
492 {
493 struct ip_tunnel *tunnel = netdev_priv(dev);
494 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
495 int mtu;
496
497 if (df)
498 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
499 - sizeof(struct iphdr) - tunnel->hlen;
500 else
501 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
502
503 if (skb_dst(skb))
504 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
505
506 if (skb->protocol == htons(ETH_P_IP)) {
507 if (!skb_is_gso(skb) &&
508 (df & htons(IP_DF)) && mtu < pkt_size) {
509 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
510 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
511 return -E2BIG;
512 }
513 }
514 #if IS_ENABLED(CONFIG_IPV6)
515 else if (skb->protocol == htons(ETH_P_IPV6)) {
516 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
517
518 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
519 mtu >= IPV6_MIN_MTU) {
520 if ((tunnel->parms.iph.daddr &&
521 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
522 rt6->rt6i_dst.plen == 128) {
523 rt6->rt6i_flags |= RTF_MODIFIED;
524 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
525 }
526 }
527
528 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
529 mtu < pkt_size) {
530 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
531 return -E2BIG;
532 }
533 }
534 #endif
535 return 0;
536 }
537
538 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
539 const struct iphdr *tnl_params, const u8 protocol)
540 {
541 struct ip_tunnel *tunnel = netdev_priv(dev);
542 const struct iphdr *inner_iph;
543 struct flowi4 fl4;
544 u8 tos, ttl;
545 __be16 df;
546 struct rtable *rt; /* Route to the other host */
547 unsigned int max_headroom; /* The extra header space needed */
548 __be32 dst;
549 int err;
550 bool connected;
551
552 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
553 connected = (tunnel->parms.iph.daddr != 0);
554
555 dst = tnl_params->daddr;
556 if (dst == 0) {
557 /* NBMA tunnel */
558
559 if (skb_dst(skb) == NULL) {
560 dev->stats.tx_fifo_errors++;
561 goto tx_error;
562 }
563
564 if (skb->protocol == htons(ETH_P_IP)) {
565 rt = skb_rtable(skb);
566 dst = rt_nexthop(rt, inner_iph->daddr);
567 }
568 #if IS_ENABLED(CONFIG_IPV6)
569 else if (skb->protocol == htons(ETH_P_IPV6)) {
570 const struct in6_addr *addr6;
571 struct neighbour *neigh;
572 bool do_tx_error_icmp;
573 int addr_type;
574
575 neigh = dst_neigh_lookup(skb_dst(skb),
576 &ipv6_hdr(skb)->daddr);
577 if (neigh == NULL)
578 goto tx_error;
579
580 addr6 = (const struct in6_addr *)&neigh->primary_key;
581 addr_type = ipv6_addr_type(addr6);
582
583 if (addr_type == IPV6_ADDR_ANY) {
584 addr6 = &ipv6_hdr(skb)->daddr;
585 addr_type = ipv6_addr_type(addr6);
586 }
587
588 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
589 do_tx_error_icmp = true;
590 else {
591 do_tx_error_icmp = false;
592 dst = addr6->s6_addr32[3];
593 }
594 neigh_release(neigh);
595 if (do_tx_error_icmp)
596 goto tx_error_icmp;
597 }
598 #endif
599 else
600 goto tx_error;
601
602 connected = false;
603 }
604
605 tos = tnl_params->tos;
606 if (tos & 0x1) {
607 tos &= ~0x1;
608 if (skb->protocol == htons(ETH_P_IP)) {
609 tos = inner_iph->tos;
610 connected = false;
611 } else if (skb->protocol == htons(ETH_P_IPV6)) {
612 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
613 connected = false;
614 }
615 }
616
617 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
618 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
619
620 rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL;
621
622 if (!rt) {
623 rt = ip_route_output_key(tunnel->net, &fl4);
624
625 if (IS_ERR(rt)) {
626 dev->stats.tx_carrier_errors++;
627 goto tx_error;
628 }
629 if (connected)
630 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
631 }
632
633 if (rt->dst.dev == dev) {
634 ip_rt_put(rt);
635 dev->stats.collisions++;
636 goto tx_error;
637 }
638
639 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
640 ip_rt_put(rt);
641 goto tx_error;
642 }
643
644 if (tunnel->err_count > 0) {
645 if (time_before(jiffies,
646 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
647 tunnel->err_count--;
648
649 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
650 dst_link_failure(skb);
651 } else
652 tunnel->err_count = 0;
653 }
654
655 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
656 ttl = tnl_params->ttl;
657 if (ttl == 0) {
658 if (skb->protocol == htons(ETH_P_IP))
659 ttl = inner_iph->ttl;
660 #if IS_ENABLED(CONFIG_IPV6)
661 else if (skb->protocol == htons(ETH_P_IPV6))
662 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
663 #endif
664 else
665 ttl = ip4_dst_hoplimit(&rt->dst);
666 }
667
668 df = tnl_params->frag_off;
669 if (skb->protocol == htons(ETH_P_IP))
670 df |= (inner_iph->frag_off&htons(IP_DF));
671
672 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
673 + rt->dst.header_len;
674 if (max_headroom > dev->needed_headroom)
675 dev->needed_headroom = max_headroom;
676
677 if (skb_cow_head(skb, dev->needed_headroom)) {
678 ip_rt_put(rt);
679 dev->stats.tx_dropped++;
680 kfree_skb(skb);
681 return;
682 }
683
684 err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol,
685 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
686 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
687
688 return;
689
690 #if IS_ENABLED(CONFIG_IPV6)
691 tx_error_icmp:
692 dst_link_failure(skb);
693 #endif
694 tx_error:
695 dev->stats.tx_errors++;
696 kfree_skb(skb);
697 }
698 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
699
700 static void ip_tunnel_update(struct ip_tunnel_net *itn,
701 struct ip_tunnel *t,
702 struct net_device *dev,
703 struct ip_tunnel_parm *p,
704 bool set_mtu)
705 {
706 ip_tunnel_del(t);
707 t->parms.iph.saddr = p->iph.saddr;
708 t->parms.iph.daddr = p->iph.daddr;
709 t->parms.i_key = p->i_key;
710 t->parms.o_key = p->o_key;
711 if (dev->type != ARPHRD_ETHER) {
712 memcpy(dev->dev_addr, &p->iph.saddr, 4);
713 memcpy(dev->broadcast, &p->iph.daddr, 4);
714 }
715 ip_tunnel_add(itn, t);
716
717 t->parms.iph.ttl = p->iph.ttl;
718 t->parms.iph.tos = p->iph.tos;
719 t->parms.iph.frag_off = p->iph.frag_off;
720
721 if (t->parms.link != p->link) {
722 int mtu;
723
724 t->parms.link = p->link;
725 mtu = ip_tunnel_bind_dev(dev);
726 if (set_mtu)
727 dev->mtu = mtu;
728 }
729 ip_tunnel_dst_reset_all(t);
730 netdev_state_change(dev);
731 }
732
733 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
734 {
735 int err = 0;
736 struct ip_tunnel *t = netdev_priv(dev);
737 struct net *net = t->net;
738 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
739
740 BUG_ON(!itn->fb_tunnel_dev);
741 switch (cmd) {
742 case SIOCGETTUNNEL:
743 if (dev == itn->fb_tunnel_dev) {
744 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
745 if (t == NULL)
746 t = netdev_priv(dev);
747 }
748 memcpy(p, &t->parms, sizeof(*p));
749 break;
750
751 case SIOCADDTUNNEL:
752 case SIOCCHGTUNNEL:
753 err = -EPERM;
754 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
755 goto done;
756 if (p->iph.ttl)
757 p->iph.frag_off |= htons(IP_DF);
758 if (!(p->i_flags & VTI_ISVTI)) {
759 if (!(p->i_flags & TUNNEL_KEY))
760 p->i_key = 0;
761 if (!(p->o_flags & TUNNEL_KEY))
762 p->o_key = 0;
763 }
764
765 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
766
767 if (!t && (cmd == SIOCADDTUNNEL)) {
768 t = ip_tunnel_create(net, itn, p);
769 err = PTR_ERR_OR_ZERO(t);
770 break;
771 }
772 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
773 if (t != NULL) {
774 if (t->dev != dev) {
775 err = -EEXIST;
776 break;
777 }
778 } else {
779 unsigned int nflags = 0;
780
781 if (ipv4_is_multicast(p->iph.daddr))
782 nflags = IFF_BROADCAST;
783 else if (p->iph.daddr)
784 nflags = IFF_POINTOPOINT;
785
786 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
787 err = -EINVAL;
788 break;
789 }
790
791 t = netdev_priv(dev);
792 }
793 }
794
795 if (t) {
796 err = 0;
797 ip_tunnel_update(itn, t, dev, p, true);
798 } else {
799 err = -ENOENT;
800 }
801 break;
802
803 case SIOCDELTUNNEL:
804 err = -EPERM;
805 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
806 goto done;
807
808 if (dev == itn->fb_tunnel_dev) {
809 err = -ENOENT;
810 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
811 if (t == NULL)
812 goto done;
813 err = -EPERM;
814 if (t == netdev_priv(itn->fb_tunnel_dev))
815 goto done;
816 dev = t->dev;
817 }
818 unregister_netdevice(dev);
819 err = 0;
820 break;
821
822 default:
823 err = -EINVAL;
824 }
825
826 done:
827 return err;
828 }
829 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
830
831 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
832 {
833 struct ip_tunnel *tunnel = netdev_priv(dev);
834 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
835
836 if (new_mtu < 68 ||
837 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
838 return -EINVAL;
839 dev->mtu = new_mtu;
840 return 0;
841 }
842 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
843
844 static void ip_tunnel_dev_free(struct net_device *dev)
845 {
846 struct ip_tunnel *tunnel = netdev_priv(dev);
847
848 gro_cells_destroy(&tunnel->gro_cells);
849 free_percpu(tunnel->dst_cache);
850 free_percpu(dev->tstats);
851 free_netdev(dev);
852 }
853
854 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
855 {
856 struct ip_tunnel *tunnel = netdev_priv(dev);
857 struct ip_tunnel_net *itn;
858
859 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
860
861 if (itn->fb_tunnel_dev != dev) {
862 ip_tunnel_del(netdev_priv(dev));
863 unregister_netdevice_queue(dev, head);
864 }
865 }
866 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
867
868 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
869 struct rtnl_link_ops *ops, char *devname)
870 {
871 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
872 struct ip_tunnel_parm parms;
873 unsigned int i;
874
875 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
876 INIT_HLIST_HEAD(&itn->tunnels[i]);
877
878 if (!ops) {
879 itn->fb_tunnel_dev = NULL;
880 return 0;
881 }
882
883 memset(&parms, 0, sizeof(parms));
884 if (devname)
885 strlcpy(parms.name, devname, IFNAMSIZ);
886
887 rtnl_lock();
888 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
889 /* FB netdevice is special: we have one, and only one per netns.
890 * Allowing to move it to another netns is clearly unsafe.
891 */
892 if (!IS_ERR(itn->fb_tunnel_dev)) {
893 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
894 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
895 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
896 }
897 rtnl_unlock();
898
899 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
900 }
901 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
902
903 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
904 struct rtnl_link_ops *ops)
905 {
906 struct net *net = dev_net(itn->fb_tunnel_dev);
907 struct net_device *dev, *aux;
908 int h;
909
910 for_each_netdev_safe(net, dev, aux)
911 if (dev->rtnl_link_ops == ops)
912 unregister_netdevice_queue(dev, head);
913
914 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
915 struct ip_tunnel *t;
916 struct hlist_node *n;
917 struct hlist_head *thead = &itn->tunnels[h];
918
919 hlist_for_each_entry_safe(t, n, thead, hash_node)
920 /* If dev is in the same netns, it has already
921 * been added to the list by the previous loop.
922 */
923 if (!net_eq(dev_net(t->dev), net))
924 unregister_netdevice_queue(t->dev, head);
925 }
926 }
927
928 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
929 {
930 LIST_HEAD(list);
931
932 rtnl_lock();
933 ip_tunnel_destroy(itn, &list, ops);
934 unregister_netdevice_many(&list);
935 rtnl_unlock();
936 }
937 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
938
939 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
940 struct ip_tunnel_parm *p)
941 {
942 struct ip_tunnel *nt;
943 struct net *net = dev_net(dev);
944 struct ip_tunnel_net *itn;
945 int mtu;
946 int err;
947
948 nt = netdev_priv(dev);
949 itn = net_generic(net, nt->ip_tnl_net_id);
950
951 if (ip_tunnel_find(itn, p, dev->type))
952 return -EEXIST;
953
954 nt->net = net;
955 nt->parms = *p;
956 err = register_netdevice(dev);
957 if (err)
958 goto out;
959
960 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
961 eth_hw_addr_random(dev);
962
963 mtu = ip_tunnel_bind_dev(dev);
964 if (!tb[IFLA_MTU])
965 dev->mtu = mtu;
966
967 ip_tunnel_add(itn, nt);
968
969 out:
970 return err;
971 }
972 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
973
974 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
975 struct ip_tunnel_parm *p)
976 {
977 struct ip_tunnel *t;
978 struct ip_tunnel *tunnel = netdev_priv(dev);
979 struct net *net = tunnel->net;
980 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
981
982 if (dev == itn->fb_tunnel_dev)
983 return -EINVAL;
984
985 t = ip_tunnel_find(itn, p, dev->type);
986
987 if (t) {
988 if (t->dev != dev)
989 return -EEXIST;
990 } else {
991 t = tunnel;
992
993 if (dev->type != ARPHRD_ETHER) {
994 unsigned int nflags = 0;
995
996 if (ipv4_is_multicast(p->iph.daddr))
997 nflags = IFF_BROADCAST;
998 else if (p->iph.daddr)
999 nflags = IFF_POINTOPOINT;
1000
1001 if ((dev->flags ^ nflags) &
1002 (IFF_POINTOPOINT | IFF_BROADCAST))
1003 return -EINVAL;
1004 }
1005 }
1006
1007 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1008 return 0;
1009 }
1010 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1011
1012 int ip_tunnel_init(struct net_device *dev)
1013 {
1014 struct ip_tunnel *tunnel = netdev_priv(dev);
1015 struct iphdr *iph = &tunnel->parms.iph;
1016 int err;
1017
1018 dev->destructor = ip_tunnel_dev_free;
1019 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1020 if (!dev->tstats)
1021 return -ENOMEM;
1022
1023 tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1024 if (!tunnel->dst_cache) {
1025 free_percpu(dev->tstats);
1026 return -ENOMEM;
1027 }
1028
1029 err = gro_cells_init(&tunnel->gro_cells, dev);
1030 if (err) {
1031 free_percpu(tunnel->dst_cache);
1032 free_percpu(dev->tstats);
1033 return err;
1034 }
1035
1036 tunnel->dev = dev;
1037 tunnel->net = dev_net(dev);
1038 strcpy(tunnel->parms.name, dev->name);
1039 iph->version = 4;
1040 iph->ihl = 5;
1041
1042 return 0;
1043 }
1044 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1045
1046 void ip_tunnel_uninit(struct net_device *dev)
1047 {
1048 struct ip_tunnel *tunnel = netdev_priv(dev);
1049 struct net *net = tunnel->net;
1050 struct ip_tunnel_net *itn;
1051
1052 itn = net_generic(net, tunnel->ip_tnl_net_id);
1053 /* fb_tunnel_dev will be unregisted in net-exit call. */
1054 if (itn->fb_tunnel_dev != dev)
1055 ip_tunnel_del(netdev_priv(dev));
1056
1057 ip_tunnel_dst_reset_all(tunnel);
1058 }
1059 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1060
1061 /* Do least required initialization, rest of init is done in tunnel_init call */
1062 void ip_tunnel_setup(struct net_device *dev, int net_id)
1063 {
1064 struct ip_tunnel *tunnel = netdev_priv(dev);
1065 tunnel->ip_tnl_net_id = net_id;
1066 }
1067 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1068
1069 MODULE_LICENSE("GPL");
This page took 0.066156 seconds and 6 git commands to generate.