Commit | Line | Data |
---|---|---|
c5441932 PS |
1 | /* |
2 | * Copyright (c) 2013 Nicira, Inc. | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or | |
5 | * modify it under the terms of version 2 of the GNU General Public | |
6 | * License as published by the Free Software Foundation. | |
7 | * | |
8 | * This program is distributed in the hope that it will be useful, but | |
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
11 | * General Public License for more details. | |
12 | * | |
13 | * You should have received a copy of the GNU General Public License | |
14 | * along with this program; if not, write to the Free Software | |
15 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA | |
16 | * 02110-1301, USA | |
17 | */ | |
18 | ||
19 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
20 | ||
21 | #include <linux/capability.h> | |
22 | #include <linux/module.h> | |
23 | #include <linux/types.h> | |
24 | #include <linux/kernel.h> | |
25 | #include <linux/slab.h> | |
26 | #include <linux/uaccess.h> | |
27 | #include <linux/skbuff.h> | |
28 | #include <linux/netdevice.h> | |
29 | #include <linux/in.h> | |
30 | #include <linux/tcp.h> | |
31 | #include <linux/udp.h> | |
32 | #include <linux/if_arp.h> | |
33 | #include <linux/mroute.h> | |
34 | #include <linux/init.h> | |
35 | #include <linux/in6.h> | |
36 | #include <linux/inetdevice.h> | |
37 | #include <linux/igmp.h> | |
38 | #include <linux/netfilter_ipv4.h> | |
39 | #include <linux/etherdevice.h> | |
40 | #include <linux/if_ether.h> | |
41 | #include <linux/if_vlan.h> | |
42 | #include <linux/rculist.h> | |
27d79f3b | 43 | #include <linux/err.h> |
c5441932 PS |
44 | |
45 | #include <net/sock.h> | |
46 | #include <net/ip.h> | |
47 | #include <net/icmp.h> | |
48 | #include <net/protocol.h> | |
49 | #include <net/ip_tunnels.h> | |
50 | #include <net/arp.h> | |
51 | #include <net/checksum.h> | |
52 | #include <net/dsfield.h> | |
53 | #include <net/inet_ecn.h> | |
54 | #include <net/xfrm.h> | |
55 | #include <net/net_namespace.h> | |
56 | #include <net/netns/generic.h> | |
57 | #include <net/rtnetlink.h> | |
58 | ||
59 | #if IS_ENABLED(CONFIG_IPV6) | |
60 | #include <net/ipv6.h> | |
61 | #include <net/ip6_fib.h> | |
62 | #include <net/ip6_route.h> | |
63 | #endif | |
64 | ||
967680e0 | 65 | static unsigned int ip_tunnel_hash(__be32 key, __be32 remote) |
c5441932 PS |
66 | { |
67 | return hash_32((__force u32)key ^ (__force u32)remote, | |
68 | IP_TNL_HASH_BITS); | |
69 | } | |
70 | ||
6c7e7610 ED |
71 | static void __tunnel_dst_set(struct ip_tunnel_dst *idst, |
72 | struct dst_entry *dst) | |
7d442fab TH |
73 | { |
74 | struct dst_entry *old_dst; | |
75 | ||
6c7e7610 ED |
76 | if (dst) { |
77 | if (dst->flags & DST_NOCACHE) | |
78 | dst = NULL; | |
79 | else | |
80 | dst_clone(dst); | |
81 | } | |
82 | old_dst = xchg((__force struct dst_entry **)&idst->dst, dst); | |
7d442fab | 83 | dst_release(old_dst); |
7d442fab TH |
84 | } |
85 | ||
6c7e7610 | 86 | static void tunnel_dst_set(struct ip_tunnel *t, struct dst_entry *dst) |
7d442fab | 87 | { |
9a4aa9af | 88 | __tunnel_dst_set(this_cpu_ptr(t->dst_cache), dst); |
7d442fab TH |
89 | } |
90 | ||
6c7e7610 | 91 | static void tunnel_dst_reset(struct ip_tunnel *t) |
7d442fab TH |
92 | { |
93 | tunnel_dst_set(t, NULL); | |
94 | } | |
95 | ||
9a4aa9af TH |
96 | static void tunnel_dst_reset_all(struct ip_tunnel *t) |
97 | { | |
98 | int i; | |
99 | ||
100 | for_each_possible_cpu(i) | |
101 | __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL); | |
102 | } | |
103 | ||
b045d37b | 104 | static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie) |
7d442fab TH |
105 | { |
106 | struct dst_entry *dst; | |
107 | ||
108 | rcu_read_lock(); | |
9a4aa9af | 109 | dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst); |
b045d37b ED |
110 | if (dst) { |
111 | if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) { | |
112 | rcu_read_unlock(); | |
113 | tunnel_dst_reset(t); | |
114 | return NULL; | |
115 | } | |
7d442fab | 116 | dst_hold(dst); |
7d442fab | 117 | } |
b045d37b ED |
118 | rcu_read_unlock(); |
119 | return (struct rtable *)dst; | |
7d442fab TH |
120 | } |
121 | ||
c5441932 PS |
122 | static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p, |
123 | __be16 flags, __be32 key) | |
124 | { | |
125 | if (p->i_flags & TUNNEL_KEY) { | |
126 | if (flags & TUNNEL_KEY) | |
127 | return key == p->i_key; | |
128 | else | |
129 | /* key expected, none present */ | |
130 | return false; | |
131 | } else | |
132 | return !(flags & TUNNEL_KEY); | |
133 | } | |
134 | ||
135 | /* Fallback tunnel: no source, no destination, no key, no options | |
136 | ||
137 | Tunnel hash table: | |
138 | We require exact key match i.e. if a key is present in packet | |
139 | it will match only tunnel with the same key; if it is not present, | |
140 | it will match only keyless tunnel. | |
141 | ||
142 | All keysless packets, if not matched configured keyless tunnels | |
143 | will match fallback tunnel. | |
144 | Given src, dst and key, find appropriate for input tunnel. | |
145 | */ | |
146 | struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, | |
147 | int link, __be16 flags, | |
148 | __be32 remote, __be32 local, | |
149 | __be32 key) | |
150 | { | |
151 | unsigned int hash; | |
152 | struct ip_tunnel *t, *cand = NULL; | |
153 | struct hlist_head *head; | |
154 | ||
967680e0 | 155 | hash = ip_tunnel_hash(key, remote); |
c5441932 PS |
156 | head = &itn->tunnels[hash]; |
157 | ||
158 | hlist_for_each_entry_rcu(t, head, hash_node) { | |
159 | if (local != t->parms.iph.saddr || | |
160 | remote != t->parms.iph.daddr || | |
161 | !(t->dev->flags & IFF_UP)) | |
162 | continue; | |
163 | ||
164 | if (!ip_tunnel_key_match(&t->parms, flags, key)) | |
165 | continue; | |
166 | ||
167 | if (t->parms.link == link) | |
168 | return t; | |
169 | else | |
170 | cand = t; | |
171 | } | |
172 | ||
173 | hlist_for_each_entry_rcu(t, head, hash_node) { | |
174 | if (remote != t->parms.iph.daddr || | |
175 | !(t->dev->flags & IFF_UP)) | |
176 | continue; | |
177 | ||
178 | if (!ip_tunnel_key_match(&t->parms, flags, key)) | |
179 | continue; | |
180 | ||
181 | if (t->parms.link == link) | |
182 | return t; | |
183 | else if (!cand) | |
184 | cand = t; | |
185 | } | |
186 | ||
967680e0 | 187 | hash = ip_tunnel_hash(key, 0); |
c5441932 PS |
188 | head = &itn->tunnels[hash]; |
189 | ||
190 | hlist_for_each_entry_rcu(t, head, hash_node) { | |
191 | if ((local != t->parms.iph.saddr && | |
192 | (local != t->parms.iph.daddr || | |
193 | !ipv4_is_multicast(local))) || | |
194 | !(t->dev->flags & IFF_UP)) | |
195 | continue; | |
196 | ||
197 | if (!ip_tunnel_key_match(&t->parms, flags, key)) | |
198 | continue; | |
199 | ||
200 | if (t->parms.link == link) | |
201 | return t; | |
202 | else if (!cand) | |
203 | cand = t; | |
204 | } | |
205 | ||
206 | if (flags & TUNNEL_NO_KEY) | |
207 | goto skip_key_lookup; | |
208 | ||
209 | hlist_for_each_entry_rcu(t, head, hash_node) { | |
210 | if (t->parms.i_key != key || | |
211 | !(t->dev->flags & IFF_UP)) | |
212 | continue; | |
213 | ||
214 | if (t->parms.link == link) | |
215 | return t; | |
216 | else if (!cand) | |
217 | cand = t; | |
218 | } | |
219 | ||
220 | skip_key_lookup: | |
221 | if (cand) | |
222 | return cand; | |
223 | ||
224 | if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP) | |
225 | return netdev_priv(itn->fb_tunnel_dev); | |
226 | ||
227 | ||
228 | return NULL; | |
229 | } | |
230 | EXPORT_SYMBOL_GPL(ip_tunnel_lookup); | |
231 | ||
232 | static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn, | |
233 | struct ip_tunnel_parm *parms) | |
234 | { | |
235 | unsigned int h; | |
236 | __be32 remote; | |
237 | ||
238 | if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr)) | |
239 | remote = parms->iph.daddr; | |
240 | else | |
241 | remote = 0; | |
242 | ||
967680e0 | 243 | h = ip_tunnel_hash(parms->i_key, remote); |
c5441932 PS |
244 | return &itn->tunnels[h]; |
245 | } | |
246 | ||
247 | static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t) | |
248 | { | |
249 | struct hlist_head *head = ip_bucket(itn, &t->parms); | |
250 | ||
251 | hlist_add_head_rcu(&t->hash_node, head); | |
252 | } | |
253 | ||
254 | static void ip_tunnel_del(struct ip_tunnel *t) | |
255 | { | |
256 | hlist_del_init_rcu(&t->hash_node); | |
257 | } | |
258 | ||
259 | static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn, | |
260 | struct ip_tunnel_parm *parms, | |
261 | int type) | |
262 | { | |
263 | __be32 remote = parms->iph.daddr; | |
264 | __be32 local = parms->iph.saddr; | |
265 | __be32 key = parms->i_key; | |
266 | int link = parms->link; | |
267 | struct ip_tunnel *t = NULL; | |
268 | struct hlist_head *head = ip_bucket(itn, parms); | |
269 | ||
270 | hlist_for_each_entry_rcu(t, head, hash_node) { | |
271 | if (local == t->parms.iph.saddr && | |
272 | remote == t->parms.iph.daddr && | |
273 | key == t->parms.i_key && | |
274 | link == t->parms.link && | |
275 | type == t->dev->type) | |
276 | break; | |
277 | } | |
278 | return t; | |
279 | } | |
280 | ||
281 | static struct net_device *__ip_tunnel_create(struct net *net, | |
282 | const struct rtnl_link_ops *ops, | |
283 | struct ip_tunnel_parm *parms) | |
284 | { | |
285 | int err; | |
286 | struct ip_tunnel *tunnel; | |
287 | struct net_device *dev; | |
288 | char name[IFNAMSIZ]; | |
289 | ||
290 | if (parms->name[0]) | |
291 | strlcpy(name, parms->name, IFNAMSIZ); | |
292 | else { | |
54a5d382 | 293 | if (strlen(ops->kind) > (IFNAMSIZ - 3)) { |
c5441932 PS |
294 | err = -E2BIG; |
295 | goto failed; | |
296 | } | |
297 | strlcpy(name, ops->kind, IFNAMSIZ); | |
298 | strncat(name, "%d", 2); | |
299 | } | |
300 | ||
301 | ASSERT_RTNL(); | |
302 | dev = alloc_netdev(ops->priv_size, name, ops->setup); | |
303 | if (!dev) { | |
304 | err = -ENOMEM; | |
305 | goto failed; | |
306 | } | |
307 | dev_net_set(dev, net); | |
308 | ||
309 | dev->rtnl_link_ops = ops; | |
310 | ||
311 | tunnel = netdev_priv(dev); | |
312 | tunnel->parms = *parms; | |
5e6700b3 | 313 | tunnel->net = net; |
c5441932 PS |
314 | |
315 | err = register_netdevice(dev); | |
316 | if (err) | |
317 | goto failed_free; | |
318 | ||
319 | return dev; | |
320 | ||
321 | failed_free: | |
322 | free_netdev(dev); | |
323 | failed: | |
324 | return ERR_PTR(err); | |
325 | } | |
326 | ||
7d442fab TH |
327 | static inline void init_tunnel_flow(struct flowi4 *fl4, |
328 | int proto, | |
329 | __be32 daddr, __be32 saddr, | |
330 | __be32 key, __u8 tos, int oif) | |
c5441932 PS |
331 | { |
332 | memset(fl4, 0, sizeof(*fl4)); | |
333 | fl4->flowi4_oif = oif; | |
334 | fl4->daddr = daddr; | |
335 | fl4->saddr = saddr; | |
336 | fl4->flowi4_tos = tos; | |
337 | fl4->flowi4_proto = proto; | |
338 | fl4->fl4_gre_key = key; | |
c5441932 PS |
339 | } |
340 | ||
341 | static int ip_tunnel_bind_dev(struct net_device *dev) | |
342 | { | |
343 | struct net_device *tdev = NULL; | |
344 | struct ip_tunnel *tunnel = netdev_priv(dev); | |
345 | const struct iphdr *iph; | |
346 | int hlen = LL_MAX_HEADER; | |
347 | int mtu = ETH_DATA_LEN; | |
348 | int t_hlen = tunnel->hlen + sizeof(struct iphdr); | |
349 | ||
350 | iph = &tunnel->parms.iph; | |
351 | ||
352 | /* Guess output device to choose reasonable mtu and needed_headroom */ | |
353 | if (iph->daddr) { | |
354 | struct flowi4 fl4; | |
355 | struct rtable *rt; | |
356 | ||
7d442fab TH |
357 | init_tunnel_flow(&fl4, iph->protocol, iph->daddr, |
358 | iph->saddr, tunnel->parms.o_key, | |
359 | RT_TOS(iph->tos), tunnel->parms.link); | |
360 | rt = ip_route_output_key(tunnel->net, &fl4); | |
361 | ||
c5441932 PS |
362 | if (!IS_ERR(rt)) { |
363 | tdev = rt->dst.dev; | |
6c7e7610 | 364 | tunnel_dst_set(tunnel, &rt->dst); |
c5441932 PS |
365 | ip_rt_put(rt); |
366 | } | |
367 | if (dev->type != ARPHRD_ETHER) | |
368 | dev->flags |= IFF_POINTOPOINT; | |
369 | } | |
370 | ||
371 | if (!tdev && tunnel->parms.link) | |
6c742e71 | 372 | tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link); |
c5441932 PS |
373 | |
374 | if (tdev) { | |
375 | hlen = tdev->hard_header_len + tdev->needed_headroom; | |
376 | mtu = tdev->mtu; | |
377 | } | |
378 | dev->iflink = tunnel->parms.link; | |
379 | ||
380 | dev->needed_headroom = t_hlen + hlen; | |
381 | mtu -= (dev->hard_header_len + t_hlen); | |
382 | ||
383 | if (mtu < 68) | |
384 | mtu = 68; | |
385 | ||
386 | return mtu; | |
387 | } | |
388 | ||
389 | static struct ip_tunnel *ip_tunnel_create(struct net *net, | |
390 | struct ip_tunnel_net *itn, | |
391 | struct ip_tunnel_parm *parms) | |
392 | { | |
393 | struct ip_tunnel *nt, *fbt; | |
394 | struct net_device *dev; | |
395 | ||
396 | BUG_ON(!itn->fb_tunnel_dev); | |
397 | fbt = netdev_priv(itn->fb_tunnel_dev); | |
398 | dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms); | |
399 | if (IS_ERR(dev)) | |
400 | return NULL; | |
401 | ||
402 | dev->mtu = ip_tunnel_bind_dev(dev); | |
403 | ||
404 | nt = netdev_priv(dev); | |
405 | ip_tunnel_add(itn, nt); | |
406 | return nt; | |
407 | } | |
408 | ||
409 | int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, | |
410 | const struct tnl_ptk_info *tpi, bool log_ecn_error) | |
411 | { | |
8f84985f | 412 | struct pcpu_sw_netstats *tstats; |
c5441932 PS |
413 | const struct iphdr *iph = ip_hdr(skb); |
414 | int err; | |
415 | ||
c5441932 PS |
416 | #ifdef CONFIG_NET_IPGRE_BROADCAST |
417 | if (ipv4_is_multicast(iph->daddr)) { | |
418 | /* Looped back packet, drop it! */ | |
419 | if (rt_is_output_route(skb_rtable(skb))) | |
420 | goto drop; | |
421 | tunnel->dev->stats.multicast++; | |
422 | skb->pkt_type = PACKET_BROADCAST; | |
423 | } | |
424 | #endif | |
425 | ||
426 | if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) || | |
427 | ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) { | |
428 | tunnel->dev->stats.rx_crc_errors++; | |
429 | tunnel->dev->stats.rx_errors++; | |
430 | goto drop; | |
431 | } | |
432 | ||
433 | if (tunnel->parms.i_flags&TUNNEL_SEQ) { | |
434 | if (!(tpi->flags&TUNNEL_SEQ) || | |
435 | (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) { | |
436 | tunnel->dev->stats.rx_fifo_errors++; | |
437 | tunnel->dev->stats.rx_errors++; | |
438 | goto drop; | |
439 | } | |
440 | tunnel->i_seqno = ntohl(tpi->seq) + 1; | |
441 | } | |
442 | ||
c5441932 PS |
443 | err = IP_ECN_decapsulate(iph, skb); |
444 | if (unlikely(err)) { | |
445 | if (log_ecn_error) | |
446 | net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", | |
447 | &iph->saddr, iph->tos); | |
448 | if (err > 1) { | |
449 | ++tunnel->dev->stats.rx_frame_errors; | |
450 | ++tunnel->dev->stats.rx_errors; | |
451 | goto drop; | |
452 | } | |
453 | } | |
454 | ||
455 | tstats = this_cpu_ptr(tunnel->dev->tstats); | |
456 | u64_stats_update_begin(&tstats->syncp); | |
457 | tstats->rx_packets++; | |
458 | tstats->rx_bytes += skb->len; | |
459 | u64_stats_update_end(&tstats->syncp); | |
460 | ||
81b9eab5 AS |
461 | skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev))); |
462 | ||
3d7b46cd PS |
463 | if (tunnel->dev->type == ARPHRD_ETHER) { |
464 | skb->protocol = eth_type_trans(skb, tunnel->dev); | |
465 | skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); | |
466 | } else { | |
467 | skb->dev = tunnel->dev; | |
468 | } | |
64261f23 | 469 | |
c5441932 PS |
470 | gro_cells_receive(&tunnel->gro_cells, skb); |
471 | return 0; | |
472 | ||
473 | drop: | |
474 | kfree_skb(skb); | |
475 | return 0; | |
476 | } | |
477 | EXPORT_SYMBOL_GPL(ip_tunnel_rcv); | |
478 | ||
23a3647b PS |
479 | static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, |
480 | struct rtable *rt, __be16 df) | |
481 | { | |
482 | struct ip_tunnel *tunnel = netdev_priv(dev); | |
8c91e162 | 483 | int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len; |
23a3647b PS |
484 | int mtu; |
485 | ||
486 | if (df) | |
487 | mtu = dst_mtu(&rt->dst) - dev->hard_header_len | |
488 | - sizeof(struct iphdr) - tunnel->hlen; | |
489 | else | |
490 | mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; | |
491 | ||
492 | if (skb_dst(skb)) | |
493 | skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); | |
494 | ||
495 | if (skb->protocol == htons(ETH_P_IP)) { | |
496 | if (!skb_is_gso(skb) && | |
497 | (df & htons(IP_DF)) && mtu < pkt_size) { | |
498 | memset(IPCB(skb), 0, sizeof(*IPCB(skb))); | |
499 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); | |
500 | return -E2BIG; | |
501 | } | |
502 | } | |
503 | #if IS_ENABLED(CONFIG_IPV6) | |
504 | else if (skb->protocol == htons(ETH_P_IPV6)) { | |
505 | struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); | |
506 | ||
507 | if (rt6 && mtu < dst_mtu(skb_dst(skb)) && | |
508 | mtu >= IPV6_MIN_MTU) { | |
509 | if ((tunnel->parms.iph.daddr && | |
510 | !ipv4_is_multicast(tunnel->parms.iph.daddr)) || | |
511 | rt6->rt6i_dst.plen == 128) { | |
512 | rt6->rt6i_flags |= RTF_MODIFIED; | |
513 | dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); | |
514 | } | |
515 | } | |
516 | ||
517 | if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU && | |
518 | mtu < pkt_size) { | |
519 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); | |
520 | return -E2BIG; | |
521 | } | |
522 | } | |
523 | #endif | |
524 | return 0; | |
525 | } | |
526 | ||
c5441932 | 527 | void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, |
bf3d6a8f | 528 | const struct iphdr *tnl_params, const u8 protocol) |
c5441932 PS |
529 | { |
530 | struct ip_tunnel *tunnel = netdev_priv(dev); | |
531 | const struct iphdr *inner_iph; | |
c5441932 PS |
532 | struct flowi4 fl4; |
533 | u8 tos, ttl; | |
534 | __be16 df; | |
b045d37b | 535 | struct rtable *rt; /* Route to the other host */ |
c5441932 PS |
536 | unsigned int max_headroom; /* The extra header space needed */ |
537 | __be32 dst; | |
0e6fbc5b | 538 | int err; |
7d442fab | 539 | bool connected = true; |
c5441932 PS |
540 | |
541 | inner_iph = (const struct iphdr *)skb_inner_network_header(skb); | |
542 | ||
543 | dst = tnl_params->daddr; | |
544 | if (dst == 0) { | |
545 | /* NBMA tunnel */ | |
546 | ||
547 | if (skb_dst(skb) == NULL) { | |
548 | dev->stats.tx_fifo_errors++; | |
549 | goto tx_error; | |
550 | } | |
551 | ||
552 | if (skb->protocol == htons(ETH_P_IP)) { | |
553 | rt = skb_rtable(skb); | |
554 | dst = rt_nexthop(rt, inner_iph->daddr); | |
555 | } | |
556 | #if IS_ENABLED(CONFIG_IPV6) | |
557 | else if (skb->protocol == htons(ETH_P_IPV6)) { | |
558 | const struct in6_addr *addr6; | |
559 | struct neighbour *neigh; | |
560 | bool do_tx_error_icmp; | |
561 | int addr_type; | |
562 | ||
563 | neigh = dst_neigh_lookup(skb_dst(skb), | |
564 | &ipv6_hdr(skb)->daddr); | |
565 | if (neigh == NULL) | |
566 | goto tx_error; | |
567 | ||
568 | addr6 = (const struct in6_addr *)&neigh->primary_key; | |
569 | addr_type = ipv6_addr_type(addr6); | |
570 | ||
571 | if (addr_type == IPV6_ADDR_ANY) { | |
572 | addr6 = &ipv6_hdr(skb)->daddr; | |
573 | addr_type = ipv6_addr_type(addr6); | |
574 | } | |
575 | ||
576 | if ((addr_type & IPV6_ADDR_COMPATv4) == 0) | |
577 | do_tx_error_icmp = true; | |
578 | else { | |
579 | do_tx_error_icmp = false; | |
580 | dst = addr6->s6_addr32[3]; | |
581 | } | |
582 | neigh_release(neigh); | |
583 | if (do_tx_error_icmp) | |
584 | goto tx_error_icmp; | |
585 | } | |
586 | #endif | |
587 | else | |
588 | goto tx_error; | |
7d442fab TH |
589 | |
590 | connected = false; | |
c5441932 PS |
591 | } |
592 | ||
593 | tos = tnl_params->tos; | |
594 | if (tos & 0x1) { | |
595 | tos &= ~0x1; | |
7d442fab | 596 | if (skb->protocol == htons(ETH_P_IP)) { |
c5441932 | 597 | tos = inner_iph->tos; |
7d442fab TH |
598 | connected = false; |
599 | } else if (skb->protocol == htons(ETH_P_IPV6)) { | |
c5441932 | 600 | tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); |
7d442fab TH |
601 | connected = false; |
602 | } | |
c5441932 PS |
603 | } |
604 | ||
7d442fab TH |
605 | init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, |
606 | tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link); | |
607 | ||
b045d37b | 608 | rt = connected ? tunnel_rtable_get(tunnel, 0) : NULL; |
7d442fab TH |
609 | |
610 | if (!rt) { | |
611 | rt = ip_route_output_key(tunnel->net, &fl4); | |
612 | ||
613 | if (IS_ERR(rt)) { | |
614 | dev->stats.tx_carrier_errors++; | |
615 | goto tx_error; | |
616 | } | |
617 | if (connected) | |
6c7e7610 | 618 | tunnel_dst_set(tunnel, &rt->dst); |
c5441932 | 619 | } |
7d442fab | 620 | |
0e6fbc5b | 621 | if (rt->dst.dev == dev) { |
c5441932 PS |
622 | ip_rt_put(rt); |
623 | dev->stats.collisions++; | |
624 | goto tx_error; | |
625 | } | |
c5441932 | 626 | |
23a3647b PS |
627 | if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) { |
628 | ip_rt_put(rt); | |
629 | goto tx_error; | |
c5441932 | 630 | } |
c5441932 PS |
631 | |
632 | if (tunnel->err_count > 0) { | |
633 | if (time_before(jiffies, | |
634 | tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { | |
635 | tunnel->err_count--; | |
636 | ||
11c21a30 | 637 | memset(IPCB(skb), 0, sizeof(*IPCB(skb))); |
c5441932 PS |
638 | dst_link_failure(skb); |
639 | } else | |
640 | tunnel->err_count = 0; | |
641 | } | |
642 | ||
d4a71b15 | 643 | tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); |
c5441932 PS |
644 | ttl = tnl_params->ttl; |
645 | if (ttl == 0) { | |
646 | if (skb->protocol == htons(ETH_P_IP)) | |
647 | ttl = inner_iph->ttl; | |
648 | #if IS_ENABLED(CONFIG_IPV6) | |
649 | else if (skb->protocol == htons(ETH_P_IPV6)) | |
650 | ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit; | |
651 | #endif | |
652 | else | |
653 | ttl = ip4_dst_hoplimit(&rt->dst); | |
654 | } | |
655 | ||
23a3647b PS |
656 | df = tnl_params->frag_off; |
657 | if (skb->protocol == htons(ETH_P_IP)) | |
658 | df |= (inner_iph->frag_off&htons(IP_DF)); | |
659 | ||
0e6fbc5b PS |
660 | max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) |
661 | + rt->dst.header_len; | |
3e08f4a7 | 662 | if (max_headroom > dev->needed_headroom) |
c5441932 | 663 | dev->needed_headroom = max_headroom; |
3e08f4a7 SK |
664 | |
665 | if (skb_cow_head(skb, dev->needed_headroom)) { | |
666 | dev->stats.tx_dropped++; | |
3acfa1e7 | 667 | kfree_skb(skb); |
3e08f4a7 | 668 | return; |
c5441932 PS |
669 | } |
670 | ||
8b7ed2d9 | 671 | err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol, |
d4a71b15 | 672 | tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); |
0e6fbc5b | 673 | iptunnel_xmit_stats(err, &dev->stats, dev->tstats); |
c5441932 | 674 | |
c5441932 PS |
675 | return; |
676 | ||
677 | #if IS_ENABLED(CONFIG_IPV6) | |
678 | tx_error_icmp: | |
679 | dst_link_failure(skb); | |
680 | #endif | |
681 | tx_error: | |
682 | dev->stats.tx_errors++; | |
3acfa1e7 | 683 | kfree_skb(skb); |
c5441932 PS |
684 | } |
685 | EXPORT_SYMBOL_GPL(ip_tunnel_xmit); | |
686 | ||
687 | static void ip_tunnel_update(struct ip_tunnel_net *itn, | |
688 | struct ip_tunnel *t, | |
689 | struct net_device *dev, | |
690 | struct ip_tunnel_parm *p, | |
691 | bool set_mtu) | |
692 | { | |
693 | ip_tunnel_del(t); | |
694 | t->parms.iph.saddr = p->iph.saddr; | |
695 | t->parms.iph.daddr = p->iph.daddr; | |
696 | t->parms.i_key = p->i_key; | |
697 | t->parms.o_key = p->o_key; | |
698 | if (dev->type != ARPHRD_ETHER) { | |
699 | memcpy(dev->dev_addr, &p->iph.saddr, 4); | |
700 | memcpy(dev->broadcast, &p->iph.daddr, 4); | |
701 | } | |
702 | ip_tunnel_add(itn, t); | |
703 | ||
704 | t->parms.iph.ttl = p->iph.ttl; | |
705 | t->parms.iph.tos = p->iph.tos; | |
706 | t->parms.iph.frag_off = p->iph.frag_off; | |
707 | ||
708 | if (t->parms.link != p->link) { | |
709 | int mtu; | |
710 | ||
711 | t->parms.link = p->link; | |
712 | mtu = ip_tunnel_bind_dev(dev); | |
713 | if (set_mtu) | |
714 | dev->mtu = mtu; | |
715 | } | |
9a4aa9af | 716 | tunnel_dst_reset_all(t); |
c5441932 PS |
717 | netdev_state_change(dev); |
718 | } | |
719 | ||
720 | int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) | |
721 | { | |
722 | int err = 0; | |
723 | struct ip_tunnel *t; | |
724 | struct net *net = dev_net(dev); | |
725 | struct ip_tunnel *tunnel = netdev_priv(dev); | |
726 | struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); | |
727 | ||
728 | BUG_ON(!itn->fb_tunnel_dev); | |
729 | switch (cmd) { | |
730 | case SIOCGETTUNNEL: | |
731 | t = NULL; | |
732 | if (dev == itn->fb_tunnel_dev) | |
733 | t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); | |
734 | if (t == NULL) | |
735 | t = netdev_priv(dev); | |
736 | memcpy(p, &t->parms, sizeof(*p)); | |
737 | break; | |
738 | ||
739 | case SIOCADDTUNNEL: | |
740 | case SIOCCHGTUNNEL: | |
741 | err = -EPERM; | |
742 | if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) | |
743 | goto done; | |
744 | if (p->iph.ttl) | |
745 | p->iph.frag_off |= htons(IP_DF); | |
746 | if (!(p->i_flags&TUNNEL_KEY)) | |
747 | p->i_key = 0; | |
748 | if (!(p->o_flags&TUNNEL_KEY)) | |
749 | p->o_key = 0; | |
750 | ||
751 | t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); | |
752 | ||
753 | if (!t && (cmd == SIOCADDTUNNEL)) | |
754 | t = ip_tunnel_create(net, itn, p); | |
755 | ||
756 | if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { | |
757 | if (t != NULL) { | |
758 | if (t->dev != dev) { | |
759 | err = -EEXIST; | |
760 | break; | |
761 | } | |
762 | } else { | |
763 | unsigned int nflags = 0; | |
764 | ||
765 | if (ipv4_is_multicast(p->iph.daddr)) | |
766 | nflags = IFF_BROADCAST; | |
767 | else if (p->iph.daddr) | |
768 | nflags = IFF_POINTOPOINT; | |
769 | ||
770 | if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) { | |
771 | err = -EINVAL; | |
772 | break; | |
773 | } | |
774 | ||
775 | t = netdev_priv(dev); | |
776 | } | |
777 | } | |
778 | ||
779 | if (t) { | |
780 | err = 0; | |
781 | ip_tunnel_update(itn, t, dev, p, true); | |
782 | } else | |
783 | err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); | |
784 | break; | |
785 | ||
786 | case SIOCDELTUNNEL: | |
787 | err = -EPERM; | |
788 | if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) | |
789 | goto done; | |
790 | ||
791 | if (dev == itn->fb_tunnel_dev) { | |
792 | err = -ENOENT; | |
793 | t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); | |
794 | if (t == NULL) | |
795 | goto done; | |
796 | err = -EPERM; | |
797 | if (t == netdev_priv(itn->fb_tunnel_dev)) | |
798 | goto done; | |
799 | dev = t->dev; | |
800 | } | |
801 | unregister_netdevice(dev); | |
802 | err = 0; | |
803 | break; | |
804 | ||
805 | default: | |
806 | err = -EINVAL; | |
807 | } | |
808 | ||
809 | done: | |
810 | return err; | |
811 | } | |
812 | EXPORT_SYMBOL_GPL(ip_tunnel_ioctl); | |
813 | ||
814 | int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu) | |
815 | { | |
816 | struct ip_tunnel *tunnel = netdev_priv(dev); | |
817 | int t_hlen = tunnel->hlen + sizeof(struct iphdr); | |
818 | ||
819 | if (new_mtu < 68 || | |
820 | new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen) | |
821 | return -EINVAL; | |
822 | dev->mtu = new_mtu; | |
823 | return 0; | |
824 | } | |
825 | EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu); | |
826 | ||
827 | static void ip_tunnel_dev_free(struct net_device *dev) | |
828 | { | |
829 | struct ip_tunnel *tunnel = netdev_priv(dev); | |
830 | ||
831 | gro_cells_destroy(&tunnel->gro_cells); | |
9a4aa9af | 832 | free_percpu(tunnel->dst_cache); |
c5441932 PS |
833 | free_percpu(dev->tstats); |
834 | free_netdev(dev); | |
835 | } | |
836 | ||
837 | void ip_tunnel_dellink(struct net_device *dev, struct list_head *head) | |
838 | { | |
c5441932 PS |
839 | struct ip_tunnel *tunnel = netdev_priv(dev); |
840 | struct ip_tunnel_net *itn; | |
841 | ||
6c742e71 | 842 | itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id); |
c5441932 PS |
843 | |
844 | if (itn->fb_tunnel_dev != dev) { | |
845 | ip_tunnel_del(netdev_priv(dev)); | |
846 | unregister_netdevice_queue(dev, head); | |
847 | } | |
848 | } | |
849 | EXPORT_SYMBOL_GPL(ip_tunnel_dellink); | |
850 | ||
d3b6f614 | 851 | int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, |
c5441932 PS |
852 | struct rtnl_link_ops *ops, char *devname) |
853 | { | |
854 | struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id); | |
855 | struct ip_tunnel_parm parms; | |
6261d983 | 856 | unsigned int i; |
c5441932 | 857 | |
6261d983 | 858 | for (i = 0; i < IP_TNL_HASH_SIZE; i++) |
859 | INIT_HLIST_HEAD(&itn->tunnels[i]); | |
c5441932 PS |
860 | |
861 | if (!ops) { | |
862 | itn->fb_tunnel_dev = NULL; | |
863 | return 0; | |
864 | } | |
6261d983 | 865 | |
c5441932 PS |
866 | memset(&parms, 0, sizeof(parms)); |
867 | if (devname) | |
868 | strlcpy(parms.name, devname, IFNAMSIZ); | |
869 | ||
870 | rtnl_lock(); | |
871 | itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms); | |
ea857f28 DC |
872 | /* FB netdevice is special: we have one, and only one per netns. |
873 | * Allowing to move it to another netns is clearly unsafe. | |
874 | */ | |
67013282 | 875 | if (!IS_ERR(itn->fb_tunnel_dev)) { |
b4de77ad | 876 | itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; |
67013282 SK |
877 | ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev)); |
878 | } | |
b4de77ad | 879 | rtnl_unlock(); |
c5441932 | 880 | |
27d79f3b | 881 | return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev); |
c5441932 PS |
882 | } |
883 | EXPORT_SYMBOL_GPL(ip_tunnel_init_net); | |
884 | ||
6c742e71 ND |
885 | static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head, |
886 | struct rtnl_link_ops *ops) | |
c5441932 | 887 | { |
6c742e71 ND |
888 | struct net *net = dev_net(itn->fb_tunnel_dev); |
889 | struct net_device *dev, *aux; | |
c5441932 PS |
890 | int h; |
891 | ||
6c742e71 ND |
892 | for_each_netdev_safe(net, dev, aux) |
893 | if (dev->rtnl_link_ops == ops) | |
894 | unregister_netdevice_queue(dev, head); | |
895 | ||
c5441932 PS |
896 | for (h = 0; h < IP_TNL_HASH_SIZE; h++) { |
897 | struct ip_tunnel *t; | |
898 | struct hlist_node *n; | |
899 | struct hlist_head *thead = &itn->tunnels[h]; | |
900 | ||
901 | hlist_for_each_entry_safe(t, n, thead, hash_node) | |
6c742e71 ND |
902 | /* If dev is in the same netns, it has already |
903 | * been added to the list by the previous loop. | |
904 | */ | |
905 | if (!net_eq(dev_net(t->dev), net)) | |
906 | unregister_netdevice_queue(t->dev, head); | |
c5441932 | 907 | } |
c5441932 PS |
908 | } |
909 | ||
6c742e71 | 910 | void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops) |
c5441932 PS |
911 | { |
912 | LIST_HEAD(list); | |
913 | ||
914 | rtnl_lock(); | |
6c742e71 | 915 | ip_tunnel_destroy(itn, &list, ops); |
c5441932 PS |
916 | unregister_netdevice_many(&list); |
917 | rtnl_unlock(); | |
c5441932 PS |
918 | } |
919 | EXPORT_SYMBOL_GPL(ip_tunnel_delete_net); | |
920 | ||
921 | int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], | |
922 | struct ip_tunnel_parm *p) | |
923 | { | |
924 | struct ip_tunnel *nt; | |
925 | struct net *net = dev_net(dev); | |
926 | struct ip_tunnel_net *itn; | |
927 | int mtu; | |
928 | int err; | |
929 | ||
930 | nt = netdev_priv(dev); | |
931 | itn = net_generic(net, nt->ip_tnl_net_id); | |
932 | ||
933 | if (ip_tunnel_find(itn, p, dev->type)) | |
934 | return -EEXIST; | |
935 | ||
5e6700b3 | 936 | nt->net = net; |
c5441932 PS |
937 | nt->parms = *p; |
938 | err = register_netdevice(dev); | |
939 | if (err) | |
940 | goto out; | |
941 | ||
942 | if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) | |
943 | eth_hw_addr_random(dev); | |
944 | ||
945 | mtu = ip_tunnel_bind_dev(dev); | |
946 | if (!tb[IFLA_MTU]) | |
947 | dev->mtu = mtu; | |
948 | ||
949 | ip_tunnel_add(itn, nt); | |
950 | ||
951 | out: | |
952 | return err; | |
953 | } | |
954 | EXPORT_SYMBOL_GPL(ip_tunnel_newlink); | |
955 | ||
956 | int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], | |
957 | struct ip_tunnel_parm *p) | |
958 | { | |
6c742e71 | 959 | struct ip_tunnel *t; |
c5441932 | 960 | struct ip_tunnel *tunnel = netdev_priv(dev); |
6c742e71 | 961 | struct net *net = tunnel->net; |
c5441932 PS |
962 | struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); |
963 | ||
964 | if (dev == itn->fb_tunnel_dev) | |
965 | return -EINVAL; | |
966 | ||
c5441932 PS |
967 | t = ip_tunnel_find(itn, p, dev->type); |
968 | ||
969 | if (t) { | |
970 | if (t->dev != dev) | |
971 | return -EEXIST; | |
972 | } else { | |
6c742e71 | 973 | t = tunnel; |
c5441932 PS |
974 | |
975 | if (dev->type != ARPHRD_ETHER) { | |
976 | unsigned int nflags = 0; | |
977 | ||
978 | if (ipv4_is_multicast(p->iph.daddr)) | |
979 | nflags = IFF_BROADCAST; | |
980 | else if (p->iph.daddr) | |
981 | nflags = IFF_POINTOPOINT; | |
982 | ||
983 | if ((dev->flags ^ nflags) & | |
984 | (IFF_POINTOPOINT | IFF_BROADCAST)) | |
985 | return -EINVAL; | |
986 | } | |
987 | } | |
988 | ||
989 | ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]); | |
990 | return 0; | |
991 | } | |
992 | EXPORT_SYMBOL_GPL(ip_tunnel_changelink); | |
993 | ||
994 | int ip_tunnel_init(struct net_device *dev) | |
995 | { | |
996 | struct ip_tunnel *tunnel = netdev_priv(dev); | |
997 | struct iphdr *iph = &tunnel->parms.iph; | |
827da44c | 998 | int i, err; |
c5441932 PS |
999 | |
1000 | dev->destructor = ip_tunnel_dev_free; | |
8f84985f | 1001 | dev->tstats = alloc_percpu(struct pcpu_sw_netstats); |
c5441932 PS |
1002 | if (!dev->tstats) |
1003 | return -ENOMEM; | |
1004 | ||
827da44c | 1005 | for_each_possible_cpu(i) { |
8f84985f | 1006 | struct pcpu_sw_netstats *ipt_stats; |
827da44c JS |
1007 | ipt_stats = per_cpu_ptr(dev->tstats, i); |
1008 | u64_stats_init(&ipt_stats->syncp); | |
1009 | } | |
1010 | ||
9a4aa9af TH |
1011 | tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); |
1012 | if (!tunnel->dst_cache) { | |
1013 | free_percpu(dev->tstats); | |
1014 | return -ENOMEM; | |
1015 | } | |
1016 | ||
c5441932 PS |
1017 | err = gro_cells_init(&tunnel->gro_cells, dev); |
1018 | if (err) { | |
9a4aa9af | 1019 | free_percpu(tunnel->dst_cache); |
c5441932 PS |
1020 | free_percpu(dev->tstats); |
1021 | return err; | |
1022 | } | |
1023 | ||
1024 | tunnel->dev = dev; | |
6c742e71 | 1025 | tunnel->net = dev_net(dev); |
c5441932 PS |
1026 | strcpy(tunnel->parms.name, dev->name); |
1027 | iph->version = 4; | |
1028 | iph->ihl = 5; | |
1029 | ||
1030 | return 0; | |
1031 | } | |
1032 | EXPORT_SYMBOL_GPL(ip_tunnel_init); | |
1033 | ||
1034 | void ip_tunnel_uninit(struct net_device *dev) | |
1035 | { | |
c5441932 | 1036 | struct ip_tunnel *tunnel = netdev_priv(dev); |
6c742e71 | 1037 | struct net *net = tunnel->net; |
c5441932 PS |
1038 | struct ip_tunnel_net *itn; |
1039 | ||
1040 | itn = net_generic(net, tunnel->ip_tnl_net_id); | |
1041 | /* fb_tunnel_dev will be unregisted in net-exit call. */ | |
1042 | if (itn->fb_tunnel_dev != dev) | |
1043 | ip_tunnel_del(netdev_priv(dev)); | |
7d442fab | 1044 | |
9a4aa9af | 1045 | tunnel_dst_reset_all(tunnel); |
c5441932 PS |
1046 | } |
1047 | EXPORT_SYMBOL_GPL(ip_tunnel_uninit); | |
1048 | ||
1049 | /* Do least required initialization, rest of init is done in tunnel_init call */ | |
1050 | void ip_tunnel_setup(struct net_device *dev, int net_id) | |
1051 | { | |
1052 | struct ip_tunnel *tunnel = netdev_priv(dev); | |
1053 | tunnel->ip_tnl_net_id = net_id; | |
1054 | } | |
1055 | EXPORT_SYMBOL_GPL(ip_tunnel_setup); | |
1056 | ||
1057 | MODULE_LICENSE("GPL"); |