net: replace NIPQUAD() in net/ipv4/ net/ipv6/
[deliverable/linux.git] / net / ipv4 / ip_gre.c
1 /*
2 * Linux NET3: GRE over IP protocol decoder.
3 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13 #include <linux/capability.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <asm/uaccess.h>
18 #include <linux/skbuff.h>
19 #include <linux/netdevice.h>
20 #include <linux/in.h>
21 #include <linux/tcp.h>
22 #include <linux/udp.h>
23 #include <linux/if_arp.h>
24 #include <linux/mroute.h>
25 #include <linux/init.h>
26 #include <linux/in6.h>
27 #include <linux/inetdevice.h>
28 #include <linux/igmp.h>
29 #include <linux/netfilter_ipv4.h>
30 #include <linux/etherdevice.h>
31 #include <linux/if_ether.h>
32
33 #include <net/sock.h>
34 #include <net/ip.h>
35 #include <net/icmp.h>
36 #include <net/protocol.h>
37 #include <net/ipip.h>
38 #include <net/arp.h>
39 #include <net/checksum.h>
40 #include <net/dsfield.h>
41 #include <net/inet_ecn.h>
42 #include <net/xfrm.h>
43 #include <net/net_namespace.h>
44 #include <net/netns/generic.h>
45 #include <net/rtnetlink.h>
46
47 #ifdef CONFIG_IPV6
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #endif
52
53 /*
54 Problems & solutions
55 --------------------
56
57 1. The most important issue is detecting local dead loops.
58 They would cause complete host lockup in transmit, which
59 would be "resolved" by stack overflow or, if queueing is enabled,
60 with infinite looping in net_bh.
61
62 We cannot track such dead loops during route installation,
63 it is infeasible task. The most general solutions would be
64 to keep skb->encapsulation counter (sort of local ttl),
65 and silently drop packet when it expires. It is the best
66 solution, but it supposes maintaing new variable in ALL
67 skb, even if no tunneling is used.
68
69 Current solution: t->recursion lock breaks dead loops. It looks
70 like dev->tbusy flag, but I preferred new variable, because
71 the semantics is different. One day, when hard_start_xmit
72 will be multithreaded we will have to use skb->encapsulation.
73
74
75
76 2. Networking dead loops would not kill routers, but would really
77 kill network. IP hop limit plays role of "t->recursion" in this case,
78 if we copy it from packet being encapsulated to upper header.
79 It is very good solution, but it introduces two problems:
80
81 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
82 do not work over tunnels.
83 - traceroute does not work. I planned to relay ICMP from tunnel,
84 so that this problem would be solved and traceroute output
85 would even more informative. This idea appeared to be wrong:
86 only Linux complies to rfc1812 now (yes, guys, Linux is the only
87 true router now :-)), all routers (at least, in neighbourhood of mine)
88 return only 8 bytes of payload. It is the end.
89
90 Hence, if we want that OSPF worked or traceroute said something reasonable,
91 we should search for another solution.
92
93 One of them is to parse packet trying to detect inner encapsulation
94 made by our node. It is difficult or even impossible, especially,
95 taking into account fragmentation. TO be short, tt is not solution at all.
96
97 Current solution: The solution was UNEXPECTEDLY SIMPLE.
98 We force DF flag on tunnels with preconfigured hop limit,
99 that is ALL. :-) Well, it does not remove the problem completely,
100 but exponential growth of network traffic is changed to linear
101 (branches, that exceed pmtu are pruned) and tunnel mtu
102 fastly degrades to value <68, where looping stops.
103 Yes, it is not good if there exists a router in the loop,
104 which does not force DF, even when encapsulating packets have DF set.
105 But it is not our problem! Nobody could accuse us, we made
106 all that we could make. Even if it is your gated who injected
107 fatal route to network, even if it were you who configured
108 fatal static route: you are innocent. :-)
109
110
111
112 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
113 practically identical code. It would be good to glue them
114 together, but it is not very evident, how to make them modular.
115 sit is integral part of IPv6, ipip and gre are naturally modular.
116 We could extract common parts (hash table, ioctl etc)
117 to a separate module (ip_tunnel.c).
118
119 Alexey Kuznetsov.
120 */
121
122 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
123 static int ipgre_tunnel_init(struct net_device *dev);
124 static void ipgre_tunnel_setup(struct net_device *dev);
125 static int ipgre_tunnel_bind_dev(struct net_device *dev);
126
127 /* Fallback tunnel: no source, no destination, no key, no options */
128
129 static int ipgre_fb_tunnel_init(struct net_device *dev);
130
131 #define HASH_SIZE 16
132
133 static int ipgre_net_id;
134 struct ipgre_net {
135 struct ip_tunnel *tunnels[4][HASH_SIZE];
136
137 struct net_device *fb_tunnel_dev;
138 };
139
140 /* Tunnel hash table */
141
142 /*
143 4 hash tables:
144
145 3: (remote,local)
146 2: (remote,*)
147 1: (*,local)
148 0: (*,*)
149
150 We require exact key match i.e. if a key is present in packet
151 it will match only tunnel with the same key; if it is not present,
152 it will match only keyless tunnel.
153
154 All keysless packets, if not matched configured keyless tunnels
155 will match fallback tunnel.
156 */
157
158 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
159
160 #define tunnels_r_l tunnels[3]
161 #define tunnels_r tunnels[2]
162 #define tunnels_l tunnels[1]
163 #define tunnels_wc tunnels[0]
164
165 static DEFINE_RWLOCK(ipgre_lock);
166
167 /* Given src, dst and key, find appropriate for input tunnel. */
168
169 static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net,
170 __be32 remote, __be32 local,
171 __be32 key, __be16 gre_proto)
172 {
173 unsigned h0 = HASH(remote);
174 unsigned h1 = HASH(key);
175 struct ip_tunnel *t;
176 struct ip_tunnel *t2 = NULL;
177 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
178 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
179 ARPHRD_ETHER : ARPHRD_IPGRE;
180
181 for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
182 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
183 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
184 if (t->dev->type == dev_type)
185 return t;
186 if (t->dev->type == ARPHRD_IPGRE && !t2)
187 t2 = t;
188 }
189 }
190 }
191
192 for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
193 if (remote == t->parms.iph.daddr) {
194 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
195 if (t->dev->type == dev_type)
196 return t;
197 if (t->dev->type == ARPHRD_IPGRE && !t2)
198 t2 = t;
199 }
200 }
201 }
202
203 for (t = ign->tunnels_l[h1]; t; t = t->next) {
204 if (local == t->parms.iph.saddr ||
205 (local == t->parms.iph.daddr &&
206 ipv4_is_multicast(local))) {
207 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
208 if (t->dev->type == dev_type)
209 return t;
210 if (t->dev->type == ARPHRD_IPGRE && !t2)
211 t2 = t;
212 }
213 }
214 }
215
216 for (t = ign->tunnels_wc[h1]; t; t = t->next) {
217 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
218 if (t->dev->type == dev_type)
219 return t;
220 if (t->dev->type == ARPHRD_IPGRE && !t2)
221 t2 = t;
222 }
223 }
224
225 if (t2)
226 return t2;
227
228 if (ign->fb_tunnel_dev->flags&IFF_UP)
229 return netdev_priv(ign->fb_tunnel_dev);
230 return NULL;
231 }
232
233 static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
234 struct ip_tunnel_parm *parms)
235 {
236 __be32 remote = parms->iph.daddr;
237 __be32 local = parms->iph.saddr;
238 __be32 key = parms->i_key;
239 unsigned h = HASH(key);
240 int prio = 0;
241
242 if (local)
243 prio |= 1;
244 if (remote && !ipv4_is_multicast(remote)) {
245 prio |= 2;
246 h ^= HASH(remote);
247 }
248
249 return &ign->tunnels[prio][h];
250 }
251
252 static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
253 struct ip_tunnel *t)
254 {
255 return __ipgre_bucket(ign, &t->parms);
256 }
257
258 static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
259 {
260 struct ip_tunnel **tp = ipgre_bucket(ign, t);
261
262 t->next = *tp;
263 write_lock_bh(&ipgre_lock);
264 *tp = t;
265 write_unlock_bh(&ipgre_lock);
266 }
267
268 static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
269 {
270 struct ip_tunnel **tp;
271
272 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
273 if (t == *tp) {
274 write_lock_bh(&ipgre_lock);
275 *tp = t->next;
276 write_unlock_bh(&ipgre_lock);
277 break;
278 }
279 }
280 }
281
282 static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
283 struct ip_tunnel_parm *parms,
284 int type)
285 {
286 __be32 remote = parms->iph.daddr;
287 __be32 local = parms->iph.saddr;
288 __be32 key = parms->i_key;
289 struct ip_tunnel *t, **tp;
290 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
291
292 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
293 if (local == t->parms.iph.saddr &&
294 remote == t->parms.iph.daddr &&
295 key == t->parms.i_key &&
296 type == t->dev->type)
297 break;
298
299 return t;
300 }
301
302 static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
303 struct ip_tunnel_parm *parms, int create)
304 {
305 struct ip_tunnel *t, *nt;
306 struct net_device *dev;
307 char name[IFNAMSIZ];
308 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
309
310 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
311 if (t || !create)
312 return t;
313
314 if (parms->name[0])
315 strlcpy(name, parms->name, IFNAMSIZ);
316 else
317 sprintf(name, "gre%%d");
318
319 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
320 if (!dev)
321 return NULL;
322
323 dev_net_set(dev, net);
324
325 if (strchr(name, '%')) {
326 if (dev_alloc_name(dev, name) < 0)
327 goto failed_free;
328 }
329
330 nt = netdev_priv(dev);
331 nt->parms = *parms;
332 dev->rtnl_link_ops = &ipgre_link_ops;
333
334 dev->mtu = ipgre_tunnel_bind_dev(dev);
335
336 if (register_netdevice(dev) < 0)
337 goto failed_free;
338
339 dev_hold(dev);
340 ipgre_tunnel_link(ign, nt);
341 return nt;
342
343 failed_free:
344 free_netdev(dev);
345 return NULL;
346 }
347
348 static void ipgre_tunnel_uninit(struct net_device *dev)
349 {
350 struct net *net = dev_net(dev);
351 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
352
353 ipgre_tunnel_unlink(ign, netdev_priv(dev));
354 dev_put(dev);
355 }
356
357
358 static void ipgre_err(struct sk_buff *skb, u32 info)
359 {
360
361 /* All the routers (except for Linux) return only
362 8 bytes of packet payload. It means, that precise relaying of
363 ICMP in the real Internet is absolutely infeasible.
364
365 Moreover, Cisco "wise men" put GRE key to the third word
366 in GRE header. It makes impossible maintaining even soft state for keyed
367 GRE tunnels with enabled checksum. Tell them "thank you".
368
369 Well, I wonder, rfc1812 was written by Cisco employee,
370 what the hell these idiots break standrads established
371 by themself???
372 */
373
374 struct iphdr *iph = (struct iphdr*)skb->data;
375 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
376 int grehlen = (iph->ihl<<2) + 4;
377 const int type = icmp_hdr(skb)->type;
378 const int code = icmp_hdr(skb)->code;
379 struct ip_tunnel *t;
380 __be16 flags;
381
382 flags = p[0];
383 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
384 if (flags&(GRE_VERSION|GRE_ROUTING))
385 return;
386 if (flags&GRE_KEY) {
387 grehlen += 4;
388 if (flags&GRE_CSUM)
389 grehlen += 4;
390 }
391 }
392
393 /* If only 8 bytes returned, keyed message will be dropped here */
394 if (skb_headlen(skb) < grehlen)
395 return;
396
397 switch (type) {
398 default:
399 case ICMP_PARAMETERPROB:
400 return;
401
402 case ICMP_DEST_UNREACH:
403 switch (code) {
404 case ICMP_SR_FAILED:
405 case ICMP_PORT_UNREACH:
406 /* Impossible event. */
407 return;
408 case ICMP_FRAG_NEEDED:
409 /* Soft state for pmtu is maintained by IP core. */
410 return;
411 default:
412 /* All others are translated to HOST_UNREACH.
413 rfc2003 contains "deep thoughts" about NET_UNREACH,
414 I believe they are just ether pollution. --ANK
415 */
416 break;
417 }
418 break;
419 case ICMP_TIME_EXCEEDED:
420 if (code != ICMP_EXC_TTL)
421 return;
422 break;
423 }
424
425 read_lock(&ipgre_lock);
426 t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr,
427 flags & GRE_KEY ?
428 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
429 p[1]);
430 if (t == NULL || t->parms.iph.daddr == 0 ||
431 ipv4_is_multicast(t->parms.iph.daddr))
432 goto out;
433
434 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
435 goto out;
436
437 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
438 t->err_count++;
439 else
440 t->err_count = 1;
441 t->err_time = jiffies;
442 out:
443 read_unlock(&ipgre_lock);
444 return;
445 }
446
447 static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
448 {
449 if (INET_ECN_is_ce(iph->tos)) {
450 if (skb->protocol == htons(ETH_P_IP)) {
451 IP_ECN_set_ce(ip_hdr(skb));
452 } else if (skb->protocol == htons(ETH_P_IPV6)) {
453 IP6_ECN_set_ce(ipv6_hdr(skb));
454 }
455 }
456 }
457
458 static inline u8
459 ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
460 {
461 u8 inner = 0;
462 if (skb->protocol == htons(ETH_P_IP))
463 inner = old_iph->tos;
464 else if (skb->protocol == htons(ETH_P_IPV6))
465 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
466 return INET_ECN_encapsulate(tos, inner);
467 }
468
469 static int ipgre_rcv(struct sk_buff *skb)
470 {
471 struct iphdr *iph;
472 u8 *h;
473 __be16 flags;
474 __sum16 csum = 0;
475 __be32 key = 0;
476 u32 seqno = 0;
477 struct ip_tunnel *tunnel;
478 int offset = 4;
479 __be16 gre_proto;
480 unsigned int len;
481
482 if (!pskb_may_pull(skb, 16))
483 goto drop_nolock;
484
485 iph = ip_hdr(skb);
486 h = skb->data;
487 flags = *(__be16*)h;
488
489 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
490 /* - Version must be 0.
491 - We do not support routing headers.
492 */
493 if (flags&(GRE_VERSION|GRE_ROUTING))
494 goto drop_nolock;
495
496 if (flags&GRE_CSUM) {
497 switch (skb->ip_summed) {
498 case CHECKSUM_COMPLETE:
499 csum = csum_fold(skb->csum);
500 if (!csum)
501 break;
502 /* fall through */
503 case CHECKSUM_NONE:
504 skb->csum = 0;
505 csum = __skb_checksum_complete(skb);
506 skb->ip_summed = CHECKSUM_COMPLETE;
507 }
508 offset += 4;
509 }
510 if (flags&GRE_KEY) {
511 key = *(__be32*)(h + offset);
512 offset += 4;
513 }
514 if (flags&GRE_SEQ) {
515 seqno = ntohl(*(__be32*)(h + offset));
516 offset += 4;
517 }
518 }
519
520 gre_proto = *(__be16 *)(h + 2);
521
522 read_lock(&ipgre_lock);
523 if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev),
524 iph->saddr, iph->daddr, key,
525 gre_proto))) {
526 struct net_device_stats *stats = &tunnel->dev->stats;
527
528 secpath_reset(skb);
529
530 skb->protocol = gre_proto;
531 /* WCCP version 1 and 2 protocol decoding.
532 * - Change protocol to IP
533 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
534 */
535 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
536 skb->protocol = htons(ETH_P_IP);
537 if ((*(h + offset) & 0xF0) != 0x40)
538 offset += 4;
539 }
540
541 skb->mac_header = skb->network_header;
542 __pskb_pull(skb, offset);
543 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
544 skb->pkt_type = PACKET_HOST;
545 #ifdef CONFIG_NET_IPGRE_BROADCAST
546 if (ipv4_is_multicast(iph->daddr)) {
547 /* Looped back packet, drop it! */
548 if (skb->rtable->fl.iif == 0)
549 goto drop;
550 stats->multicast++;
551 skb->pkt_type = PACKET_BROADCAST;
552 }
553 #endif
554
555 if (((flags&GRE_CSUM) && csum) ||
556 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
557 stats->rx_crc_errors++;
558 stats->rx_errors++;
559 goto drop;
560 }
561 if (tunnel->parms.i_flags&GRE_SEQ) {
562 if (!(flags&GRE_SEQ) ||
563 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
564 stats->rx_fifo_errors++;
565 stats->rx_errors++;
566 goto drop;
567 }
568 tunnel->i_seqno = seqno + 1;
569 }
570
571 len = skb->len;
572
573 /* Warning: All skb pointers will be invalidated! */
574 if (tunnel->dev->type == ARPHRD_ETHER) {
575 if (!pskb_may_pull(skb, ETH_HLEN)) {
576 stats->rx_length_errors++;
577 stats->rx_errors++;
578 goto drop;
579 }
580
581 iph = ip_hdr(skb);
582 skb->protocol = eth_type_trans(skb, tunnel->dev);
583 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
584 }
585
586 stats->rx_packets++;
587 stats->rx_bytes += len;
588 skb->dev = tunnel->dev;
589 dst_release(skb->dst);
590 skb->dst = NULL;
591 nf_reset(skb);
592
593 skb_reset_network_header(skb);
594 ipgre_ecn_decapsulate(iph, skb);
595
596 netif_rx(skb);
597 read_unlock(&ipgre_lock);
598 return(0);
599 }
600 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
601
602 drop:
603 read_unlock(&ipgre_lock);
604 drop_nolock:
605 kfree_skb(skb);
606 return(0);
607 }
608
609 static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
610 {
611 struct ip_tunnel *tunnel = netdev_priv(dev);
612 struct net_device_stats *stats = &tunnel->dev->stats;
613 struct iphdr *old_iph = ip_hdr(skb);
614 struct iphdr *tiph;
615 u8 tos;
616 __be16 df;
617 struct rtable *rt; /* Route to the other host */
618 struct net_device *tdev; /* Device to other host */
619 struct iphdr *iph; /* Our new IP header */
620 unsigned int max_headroom; /* The extra header space needed */
621 int gre_hlen;
622 __be32 dst;
623 int mtu;
624
625 if (tunnel->recursion++) {
626 stats->collisions++;
627 goto tx_error;
628 }
629
630 if (dev->type == ARPHRD_ETHER)
631 IPCB(skb)->flags = 0;
632
633 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
634 gre_hlen = 0;
635 tiph = (struct iphdr*)skb->data;
636 } else {
637 gre_hlen = tunnel->hlen;
638 tiph = &tunnel->parms.iph;
639 }
640
641 if ((dst = tiph->daddr) == 0) {
642 /* NBMA tunnel */
643
644 if (skb->dst == NULL) {
645 stats->tx_fifo_errors++;
646 goto tx_error;
647 }
648
649 if (skb->protocol == htons(ETH_P_IP)) {
650 rt = skb->rtable;
651 if ((dst = rt->rt_gateway) == 0)
652 goto tx_error_icmp;
653 }
654 #ifdef CONFIG_IPV6
655 else if (skb->protocol == htons(ETH_P_IPV6)) {
656 struct in6_addr *addr6;
657 int addr_type;
658 struct neighbour *neigh = skb->dst->neighbour;
659
660 if (neigh == NULL)
661 goto tx_error;
662
663 addr6 = (struct in6_addr*)&neigh->primary_key;
664 addr_type = ipv6_addr_type(addr6);
665
666 if (addr_type == IPV6_ADDR_ANY) {
667 addr6 = &ipv6_hdr(skb)->daddr;
668 addr_type = ipv6_addr_type(addr6);
669 }
670
671 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
672 goto tx_error_icmp;
673
674 dst = addr6->s6_addr32[3];
675 }
676 #endif
677 else
678 goto tx_error;
679 }
680
681 tos = tiph->tos;
682 if (tos&1) {
683 if (skb->protocol == htons(ETH_P_IP))
684 tos = old_iph->tos;
685 tos &= ~1;
686 }
687
688 {
689 struct flowi fl = { .oif = tunnel->parms.link,
690 .nl_u = { .ip4_u =
691 { .daddr = dst,
692 .saddr = tiph->saddr,
693 .tos = RT_TOS(tos) } },
694 .proto = IPPROTO_GRE };
695 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
696 stats->tx_carrier_errors++;
697 goto tx_error;
698 }
699 }
700 tdev = rt->u.dst.dev;
701
702 if (tdev == dev) {
703 ip_rt_put(rt);
704 stats->collisions++;
705 goto tx_error;
706 }
707
708 df = tiph->frag_off;
709 if (df)
710 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
711 else
712 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
713
714 if (skb->dst)
715 skb->dst->ops->update_pmtu(skb->dst, mtu);
716
717 if (skb->protocol == htons(ETH_P_IP)) {
718 df |= (old_iph->frag_off&htons(IP_DF));
719
720 if ((old_iph->frag_off&htons(IP_DF)) &&
721 mtu < ntohs(old_iph->tot_len)) {
722 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
723 ip_rt_put(rt);
724 goto tx_error;
725 }
726 }
727 #ifdef CONFIG_IPV6
728 else if (skb->protocol == htons(ETH_P_IPV6)) {
729 struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
730
731 if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
732 if ((tunnel->parms.iph.daddr &&
733 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
734 rt6->rt6i_dst.plen == 128) {
735 rt6->rt6i_flags |= RTF_MODIFIED;
736 skb->dst->metrics[RTAX_MTU-1] = mtu;
737 }
738 }
739
740 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
741 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
742 ip_rt_put(rt);
743 goto tx_error;
744 }
745 }
746 #endif
747
748 if (tunnel->err_count > 0) {
749 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
750 tunnel->err_count--;
751
752 dst_link_failure(skb);
753 } else
754 tunnel->err_count = 0;
755 }
756
757 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
758
759 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
760 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
761 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
762 if (!new_skb) {
763 ip_rt_put(rt);
764 stats->tx_dropped++;
765 dev_kfree_skb(skb);
766 tunnel->recursion--;
767 return 0;
768 }
769 if (skb->sk)
770 skb_set_owner_w(new_skb, skb->sk);
771 dev_kfree_skb(skb);
772 skb = new_skb;
773 old_iph = ip_hdr(skb);
774 }
775
776 skb_reset_transport_header(skb);
777 skb_push(skb, gre_hlen);
778 skb_reset_network_header(skb);
779 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
780 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
781 IPSKB_REROUTED);
782 dst_release(skb->dst);
783 skb->dst = &rt->u.dst;
784
785 /*
786 * Push down and install the IPIP header.
787 */
788
789 iph = ip_hdr(skb);
790 iph->version = 4;
791 iph->ihl = sizeof(struct iphdr) >> 2;
792 iph->frag_off = df;
793 iph->protocol = IPPROTO_GRE;
794 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
795 iph->daddr = rt->rt_dst;
796 iph->saddr = rt->rt_src;
797
798 if ((iph->ttl = tiph->ttl) == 0) {
799 if (skb->protocol == htons(ETH_P_IP))
800 iph->ttl = old_iph->ttl;
801 #ifdef CONFIG_IPV6
802 else if (skb->protocol == htons(ETH_P_IPV6))
803 iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit;
804 #endif
805 else
806 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
807 }
808
809 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
810 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
811 htons(ETH_P_TEB) : skb->protocol;
812
813 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
814 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
815
816 if (tunnel->parms.o_flags&GRE_SEQ) {
817 ++tunnel->o_seqno;
818 *ptr = htonl(tunnel->o_seqno);
819 ptr--;
820 }
821 if (tunnel->parms.o_flags&GRE_KEY) {
822 *ptr = tunnel->parms.o_key;
823 ptr--;
824 }
825 if (tunnel->parms.o_flags&GRE_CSUM) {
826 *ptr = 0;
827 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
828 }
829 }
830
831 nf_reset(skb);
832
833 IPTUNNEL_XMIT();
834 tunnel->recursion--;
835 return 0;
836
837 tx_error_icmp:
838 dst_link_failure(skb);
839
840 tx_error:
841 stats->tx_errors++;
842 dev_kfree_skb(skb);
843 tunnel->recursion--;
844 return 0;
845 }
846
847 static int ipgre_tunnel_bind_dev(struct net_device *dev)
848 {
849 struct net_device *tdev = NULL;
850 struct ip_tunnel *tunnel;
851 struct iphdr *iph;
852 int hlen = LL_MAX_HEADER;
853 int mtu = ETH_DATA_LEN;
854 int addend = sizeof(struct iphdr) + 4;
855
856 tunnel = netdev_priv(dev);
857 iph = &tunnel->parms.iph;
858
859 /* Guess output device to choose reasonable mtu and needed_headroom */
860
861 if (iph->daddr) {
862 struct flowi fl = { .oif = tunnel->parms.link,
863 .nl_u = { .ip4_u =
864 { .daddr = iph->daddr,
865 .saddr = iph->saddr,
866 .tos = RT_TOS(iph->tos) } },
867 .proto = IPPROTO_GRE };
868 struct rtable *rt;
869 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
870 tdev = rt->u.dst.dev;
871 ip_rt_put(rt);
872 }
873
874 if (dev->type != ARPHRD_ETHER)
875 dev->flags |= IFF_POINTOPOINT;
876 }
877
878 if (!tdev && tunnel->parms.link)
879 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
880
881 if (tdev) {
882 hlen = tdev->hard_header_len + tdev->needed_headroom;
883 mtu = tdev->mtu;
884 }
885 dev->iflink = tunnel->parms.link;
886
887 /* Precalculate GRE options length */
888 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
889 if (tunnel->parms.o_flags&GRE_CSUM)
890 addend += 4;
891 if (tunnel->parms.o_flags&GRE_KEY)
892 addend += 4;
893 if (tunnel->parms.o_flags&GRE_SEQ)
894 addend += 4;
895 }
896 dev->needed_headroom = addend + hlen;
897 mtu -= dev->hard_header_len - addend;
898
899 if (mtu < 68)
900 mtu = 68;
901
902 tunnel->hlen = addend;
903
904 return mtu;
905 }
906
907 static int
908 ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
909 {
910 int err = 0;
911 struct ip_tunnel_parm p;
912 struct ip_tunnel *t;
913 struct net *net = dev_net(dev);
914 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
915
916 switch (cmd) {
917 case SIOCGETTUNNEL:
918 t = NULL;
919 if (dev == ign->fb_tunnel_dev) {
920 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
921 err = -EFAULT;
922 break;
923 }
924 t = ipgre_tunnel_locate(net, &p, 0);
925 }
926 if (t == NULL)
927 t = netdev_priv(dev);
928 memcpy(&p, &t->parms, sizeof(p));
929 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
930 err = -EFAULT;
931 break;
932
933 case SIOCADDTUNNEL:
934 case SIOCCHGTUNNEL:
935 err = -EPERM;
936 if (!capable(CAP_NET_ADMIN))
937 goto done;
938
939 err = -EFAULT;
940 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
941 goto done;
942
943 err = -EINVAL;
944 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
945 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
946 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
947 goto done;
948 if (p.iph.ttl)
949 p.iph.frag_off |= htons(IP_DF);
950
951 if (!(p.i_flags&GRE_KEY))
952 p.i_key = 0;
953 if (!(p.o_flags&GRE_KEY))
954 p.o_key = 0;
955
956 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
957
958 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
959 if (t != NULL) {
960 if (t->dev != dev) {
961 err = -EEXIST;
962 break;
963 }
964 } else {
965 unsigned nflags=0;
966
967 t = netdev_priv(dev);
968
969 if (ipv4_is_multicast(p.iph.daddr))
970 nflags = IFF_BROADCAST;
971 else if (p.iph.daddr)
972 nflags = IFF_POINTOPOINT;
973
974 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
975 err = -EINVAL;
976 break;
977 }
978 ipgre_tunnel_unlink(ign, t);
979 t->parms.iph.saddr = p.iph.saddr;
980 t->parms.iph.daddr = p.iph.daddr;
981 t->parms.i_key = p.i_key;
982 t->parms.o_key = p.o_key;
983 memcpy(dev->dev_addr, &p.iph.saddr, 4);
984 memcpy(dev->broadcast, &p.iph.daddr, 4);
985 ipgre_tunnel_link(ign, t);
986 netdev_state_change(dev);
987 }
988 }
989
990 if (t) {
991 err = 0;
992 if (cmd == SIOCCHGTUNNEL) {
993 t->parms.iph.ttl = p.iph.ttl;
994 t->parms.iph.tos = p.iph.tos;
995 t->parms.iph.frag_off = p.iph.frag_off;
996 if (t->parms.link != p.link) {
997 t->parms.link = p.link;
998 dev->mtu = ipgre_tunnel_bind_dev(dev);
999 netdev_state_change(dev);
1000 }
1001 }
1002 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1003 err = -EFAULT;
1004 } else
1005 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1006 break;
1007
1008 case SIOCDELTUNNEL:
1009 err = -EPERM;
1010 if (!capable(CAP_NET_ADMIN))
1011 goto done;
1012
1013 if (dev == ign->fb_tunnel_dev) {
1014 err = -EFAULT;
1015 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1016 goto done;
1017 err = -ENOENT;
1018 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1019 goto done;
1020 err = -EPERM;
1021 if (t == netdev_priv(ign->fb_tunnel_dev))
1022 goto done;
1023 dev = t->dev;
1024 }
1025 unregister_netdevice(dev);
1026 err = 0;
1027 break;
1028
1029 default:
1030 err = -EINVAL;
1031 }
1032
1033 done:
1034 return err;
1035 }
1036
1037 static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1038 {
1039 struct ip_tunnel *tunnel = netdev_priv(dev);
1040 if (new_mtu < 68 ||
1041 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1042 return -EINVAL;
1043 dev->mtu = new_mtu;
1044 return 0;
1045 }
1046
1047 /* Nice toy. Unfortunately, useless in real life :-)
1048 It allows to construct virtual multiprotocol broadcast "LAN"
1049 over the Internet, provided multicast routing is tuned.
1050
1051
1052 I have no idea was this bicycle invented before me,
1053 so that I had to set ARPHRD_IPGRE to a random value.
1054 I have an impression, that Cisco could make something similar,
1055 but this feature is apparently missing in IOS<=11.2(8).
1056
1057 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1058 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1059
1060 ping -t 255 224.66.66.66
1061
1062 If nobody answers, mbone does not work.
1063
1064 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1065 ip addr add 10.66.66.<somewhat>/24 dev Universe
1066 ifconfig Universe up
1067 ifconfig Universe add fe80::<Your_real_addr>/10
1068 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1069 ftp 10.66.66.66
1070 ...
1071 ftp fec0:6666:6666::193.233.7.65
1072 ...
1073
1074 */
1075
1076 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1077 unsigned short type,
1078 const void *daddr, const void *saddr, unsigned len)
1079 {
1080 struct ip_tunnel *t = netdev_priv(dev);
1081 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1082 __be16 *p = (__be16*)(iph+1);
1083
1084 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1085 p[0] = t->parms.o_flags;
1086 p[1] = htons(type);
1087
1088 /*
1089 * Set the source hardware address.
1090 */
1091
1092 if (saddr)
1093 memcpy(&iph->saddr, saddr, 4);
1094
1095 if (daddr) {
1096 memcpy(&iph->daddr, daddr, 4);
1097 return t->hlen;
1098 }
1099 if (iph->daddr && !ipv4_is_multicast(iph->daddr))
1100 return t->hlen;
1101
1102 return -t->hlen;
1103 }
1104
1105 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1106 {
1107 struct iphdr *iph = (struct iphdr*) skb_mac_header(skb);
1108 memcpy(haddr, &iph->saddr, 4);
1109 return 4;
1110 }
1111
1112 static const struct header_ops ipgre_header_ops = {
1113 .create = ipgre_header,
1114 .parse = ipgre_header_parse,
1115 };
1116
1117 #ifdef CONFIG_NET_IPGRE_BROADCAST
1118 static int ipgre_open(struct net_device *dev)
1119 {
1120 struct ip_tunnel *t = netdev_priv(dev);
1121
1122 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1123 struct flowi fl = { .oif = t->parms.link,
1124 .nl_u = { .ip4_u =
1125 { .daddr = t->parms.iph.daddr,
1126 .saddr = t->parms.iph.saddr,
1127 .tos = RT_TOS(t->parms.iph.tos) } },
1128 .proto = IPPROTO_GRE };
1129 struct rtable *rt;
1130 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1131 return -EADDRNOTAVAIL;
1132 dev = rt->u.dst.dev;
1133 ip_rt_put(rt);
1134 if (__in_dev_get_rtnl(dev) == NULL)
1135 return -EADDRNOTAVAIL;
1136 t->mlink = dev->ifindex;
1137 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1138 }
1139 return 0;
1140 }
1141
1142 static int ipgre_close(struct net_device *dev)
1143 {
1144 struct ip_tunnel *t = netdev_priv(dev);
1145 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1146 struct in_device *in_dev;
1147 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1148 if (in_dev) {
1149 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1150 in_dev_put(in_dev);
1151 }
1152 }
1153 return 0;
1154 }
1155
1156 #endif
1157
1158 static void ipgre_tunnel_setup(struct net_device *dev)
1159 {
1160 dev->init = ipgre_tunnel_init;
1161 dev->uninit = ipgre_tunnel_uninit;
1162 dev->destructor = free_netdev;
1163 dev->hard_start_xmit = ipgre_tunnel_xmit;
1164 dev->do_ioctl = ipgre_tunnel_ioctl;
1165 dev->change_mtu = ipgre_tunnel_change_mtu;
1166
1167 dev->type = ARPHRD_IPGRE;
1168 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1169 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1170 dev->flags = IFF_NOARP;
1171 dev->iflink = 0;
1172 dev->addr_len = 4;
1173 dev->features |= NETIF_F_NETNS_LOCAL;
1174 }
1175
1176 static int ipgre_tunnel_init(struct net_device *dev)
1177 {
1178 struct ip_tunnel *tunnel;
1179 struct iphdr *iph;
1180
1181 tunnel = netdev_priv(dev);
1182 iph = &tunnel->parms.iph;
1183
1184 tunnel->dev = dev;
1185 strcpy(tunnel->parms.name, dev->name);
1186
1187 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1188 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1189
1190 if (iph->daddr) {
1191 #ifdef CONFIG_NET_IPGRE_BROADCAST
1192 if (ipv4_is_multicast(iph->daddr)) {
1193 if (!iph->saddr)
1194 return -EINVAL;
1195 dev->flags = IFF_BROADCAST;
1196 dev->header_ops = &ipgre_header_ops;
1197 dev->open = ipgre_open;
1198 dev->stop = ipgre_close;
1199 }
1200 #endif
1201 } else
1202 dev->header_ops = &ipgre_header_ops;
1203
1204 return 0;
1205 }
1206
1207 static int ipgre_fb_tunnel_init(struct net_device *dev)
1208 {
1209 struct ip_tunnel *tunnel = netdev_priv(dev);
1210 struct iphdr *iph = &tunnel->parms.iph;
1211 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1212
1213 tunnel->dev = dev;
1214 strcpy(tunnel->parms.name, dev->name);
1215
1216 iph->version = 4;
1217 iph->protocol = IPPROTO_GRE;
1218 iph->ihl = 5;
1219 tunnel->hlen = sizeof(struct iphdr) + 4;
1220
1221 dev_hold(dev);
1222 ign->tunnels_wc[0] = tunnel;
1223 return 0;
1224 }
1225
1226
1227 static struct net_protocol ipgre_protocol = {
1228 .handler = ipgre_rcv,
1229 .err_handler = ipgre_err,
1230 .netns_ok = 1,
1231 };
1232
1233 static void ipgre_destroy_tunnels(struct ipgre_net *ign)
1234 {
1235 int prio;
1236
1237 for (prio = 0; prio < 4; prio++) {
1238 int h;
1239 for (h = 0; h < HASH_SIZE; h++) {
1240 struct ip_tunnel *t;
1241 while ((t = ign->tunnels[prio][h]) != NULL)
1242 unregister_netdevice(t->dev);
1243 }
1244 }
1245 }
1246
1247 static int ipgre_init_net(struct net *net)
1248 {
1249 int err;
1250 struct ipgre_net *ign;
1251
1252 err = -ENOMEM;
1253 ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL);
1254 if (ign == NULL)
1255 goto err_alloc;
1256
1257 err = net_assign_generic(net, ipgre_net_id, ign);
1258 if (err < 0)
1259 goto err_assign;
1260
1261 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1262 ipgre_tunnel_setup);
1263 if (!ign->fb_tunnel_dev) {
1264 err = -ENOMEM;
1265 goto err_alloc_dev;
1266 }
1267
1268 ign->fb_tunnel_dev->init = ipgre_fb_tunnel_init;
1269 dev_net_set(ign->fb_tunnel_dev, net);
1270 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
1271
1272 if ((err = register_netdev(ign->fb_tunnel_dev)))
1273 goto err_reg_dev;
1274
1275 return 0;
1276
1277 err_reg_dev:
1278 free_netdev(ign->fb_tunnel_dev);
1279 err_alloc_dev:
1280 /* nothing */
1281 err_assign:
1282 kfree(ign);
1283 err_alloc:
1284 return err;
1285 }
1286
1287 static void ipgre_exit_net(struct net *net)
1288 {
1289 struct ipgre_net *ign;
1290
1291 ign = net_generic(net, ipgre_net_id);
1292 rtnl_lock();
1293 ipgre_destroy_tunnels(ign);
1294 rtnl_unlock();
1295 kfree(ign);
1296 }
1297
1298 static struct pernet_operations ipgre_net_ops = {
1299 .init = ipgre_init_net,
1300 .exit = ipgre_exit_net,
1301 };
1302
1303 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1304 {
1305 __be16 flags;
1306
1307 if (!data)
1308 return 0;
1309
1310 flags = 0;
1311 if (data[IFLA_GRE_IFLAGS])
1312 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1313 if (data[IFLA_GRE_OFLAGS])
1314 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1315 if (flags & (GRE_VERSION|GRE_ROUTING))
1316 return -EINVAL;
1317
1318 return 0;
1319 }
1320
1321 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1322 {
1323 __be32 daddr;
1324
1325 if (tb[IFLA_ADDRESS]) {
1326 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1327 return -EINVAL;
1328 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1329 return -EADDRNOTAVAIL;
1330 }
1331
1332 if (!data)
1333 goto out;
1334
1335 if (data[IFLA_GRE_REMOTE]) {
1336 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1337 if (!daddr)
1338 return -EINVAL;
1339 }
1340
1341 out:
1342 return ipgre_tunnel_validate(tb, data);
1343 }
1344
1345 static void ipgre_netlink_parms(struct nlattr *data[],
1346 struct ip_tunnel_parm *parms)
1347 {
1348 memset(parms, 0, sizeof(*parms));
1349
1350 parms->iph.protocol = IPPROTO_GRE;
1351
1352 if (!data)
1353 return;
1354
1355 if (data[IFLA_GRE_LINK])
1356 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1357
1358 if (data[IFLA_GRE_IFLAGS])
1359 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1360
1361 if (data[IFLA_GRE_OFLAGS])
1362 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1363
1364 if (data[IFLA_GRE_IKEY])
1365 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1366
1367 if (data[IFLA_GRE_OKEY])
1368 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1369
1370 if (data[IFLA_GRE_LOCAL])
1371 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
1372
1373 if (data[IFLA_GRE_REMOTE])
1374 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
1375
1376 if (data[IFLA_GRE_TTL])
1377 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1378
1379 if (data[IFLA_GRE_TOS])
1380 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1381
1382 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1383 parms->iph.frag_off = htons(IP_DF);
1384 }
1385
1386 static int ipgre_tap_init(struct net_device *dev)
1387 {
1388 struct ip_tunnel *tunnel;
1389
1390 tunnel = netdev_priv(dev);
1391
1392 tunnel->dev = dev;
1393 strcpy(tunnel->parms.name, dev->name);
1394
1395 ipgre_tunnel_bind_dev(dev);
1396
1397 return 0;
1398 }
1399
1400 static void ipgre_tap_setup(struct net_device *dev)
1401 {
1402
1403 ether_setup(dev);
1404
1405 dev->init = ipgre_tap_init;
1406 dev->uninit = ipgre_tunnel_uninit;
1407 dev->destructor = free_netdev;
1408 dev->hard_start_xmit = ipgre_tunnel_xmit;
1409 dev->change_mtu = ipgre_tunnel_change_mtu;
1410
1411 dev->iflink = 0;
1412 dev->features |= NETIF_F_NETNS_LOCAL;
1413 }
1414
1415 static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[],
1416 struct nlattr *data[])
1417 {
1418 struct ip_tunnel *nt;
1419 struct net *net = dev_net(dev);
1420 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1421 int mtu;
1422 int err;
1423
1424 nt = netdev_priv(dev);
1425 ipgre_netlink_parms(data, &nt->parms);
1426
1427 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
1428 return -EEXIST;
1429
1430 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1431 random_ether_addr(dev->dev_addr);
1432
1433 mtu = ipgre_tunnel_bind_dev(dev);
1434 if (!tb[IFLA_MTU])
1435 dev->mtu = mtu;
1436
1437 err = register_netdevice(dev);
1438 if (err)
1439 goto out;
1440
1441 dev_hold(dev);
1442 ipgre_tunnel_link(ign, nt);
1443
1444 out:
1445 return err;
1446 }
1447
1448 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1449 struct nlattr *data[])
1450 {
1451 struct ip_tunnel *t, *nt;
1452 struct net *net = dev_net(dev);
1453 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1454 struct ip_tunnel_parm p;
1455 int mtu;
1456
1457 if (dev == ign->fb_tunnel_dev)
1458 return -EINVAL;
1459
1460 nt = netdev_priv(dev);
1461 ipgre_netlink_parms(data, &p);
1462
1463 t = ipgre_tunnel_locate(net, &p, 0);
1464
1465 if (t) {
1466 if (t->dev != dev)
1467 return -EEXIST;
1468 } else {
1469 unsigned nflags = 0;
1470
1471 t = nt;
1472
1473 if (ipv4_is_multicast(p.iph.daddr))
1474 nflags = IFF_BROADCAST;
1475 else if (p.iph.daddr)
1476 nflags = IFF_POINTOPOINT;
1477
1478 if ((dev->flags ^ nflags) &
1479 (IFF_POINTOPOINT | IFF_BROADCAST))
1480 return -EINVAL;
1481
1482 ipgre_tunnel_unlink(ign, t);
1483 t->parms.iph.saddr = p.iph.saddr;
1484 t->parms.iph.daddr = p.iph.daddr;
1485 t->parms.i_key = p.i_key;
1486 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1487 memcpy(dev->broadcast, &p.iph.daddr, 4);
1488 ipgre_tunnel_link(ign, t);
1489 netdev_state_change(dev);
1490 }
1491
1492 t->parms.o_key = p.o_key;
1493 t->parms.iph.ttl = p.iph.ttl;
1494 t->parms.iph.tos = p.iph.tos;
1495 t->parms.iph.frag_off = p.iph.frag_off;
1496
1497 if (t->parms.link != p.link) {
1498 t->parms.link = p.link;
1499 mtu = ipgre_tunnel_bind_dev(dev);
1500 if (!tb[IFLA_MTU])
1501 dev->mtu = mtu;
1502 netdev_state_change(dev);
1503 }
1504
1505 return 0;
1506 }
1507
1508 static size_t ipgre_get_size(const struct net_device *dev)
1509 {
1510 return
1511 /* IFLA_GRE_LINK */
1512 nla_total_size(4) +
1513 /* IFLA_GRE_IFLAGS */
1514 nla_total_size(2) +
1515 /* IFLA_GRE_OFLAGS */
1516 nla_total_size(2) +
1517 /* IFLA_GRE_IKEY */
1518 nla_total_size(4) +
1519 /* IFLA_GRE_OKEY */
1520 nla_total_size(4) +
1521 /* IFLA_GRE_LOCAL */
1522 nla_total_size(4) +
1523 /* IFLA_GRE_REMOTE */
1524 nla_total_size(4) +
1525 /* IFLA_GRE_TTL */
1526 nla_total_size(1) +
1527 /* IFLA_GRE_TOS */
1528 nla_total_size(1) +
1529 /* IFLA_GRE_PMTUDISC */
1530 nla_total_size(1) +
1531 0;
1532 }
1533
1534 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1535 {
1536 struct ip_tunnel *t = netdev_priv(dev);
1537 struct ip_tunnel_parm *p = &t->parms;
1538
1539 NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1540 NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1541 NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
1542 NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
1543 NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
1544 NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
1545 NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
1546 NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1547 NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1548 NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1549
1550 return 0;
1551
1552 nla_put_failure:
1553 return -EMSGSIZE;
1554 }
1555
1556 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1557 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1558 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1559 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1560 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1561 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
1562 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1563 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1564 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1565 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1566 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1567 };
1568
1569 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1570 .kind = "gre",
1571 .maxtype = IFLA_GRE_MAX,
1572 .policy = ipgre_policy,
1573 .priv_size = sizeof(struct ip_tunnel),
1574 .setup = ipgre_tunnel_setup,
1575 .validate = ipgre_tunnel_validate,
1576 .newlink = ipgre_newlink,
1577 .changelink = ipgre_changelink,
1578 .get_size = ipgre_get_size,
1579 .fill_info = ipgre_fill_info,
1580 };
1581
1582 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1583 .kind = "gretap",
1584 .maxtype = IFLA_GRE_MAX,
1585 .policy = ipgre_policy,
1586 .priv_size = sizeof(struct ip_tunnel),
1587 .setup = ipgre_tap_setup,
1588 .validate = ipgre_tap_validate,
1589 .newlink = ipgre_newlink,
1590 .changelink = ipgre_changelink,
1591 .get_size = ipgre_get_size,
1592 .fill_info = ipgre_fill_info,
1593 };
1594
1595 /*
1596 * And now the modules code and kernel interface.
1597 */
1598
1599 static int __init ipgre_init(void)
1600 {
1601 int err;
1602
1603 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1604
1605 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1606 printk(KERN_INFO "ipgre init: can't add protocol\n");
1607 return -EAGAIN;
1608 }
1609
1610 err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
1611 if (err < 0)
1612 goto gen_device_failed;
1613
1614 err = rtnl_link_register(&ipgre_link_ops);
1615 if (err < 0)
1616 goto rtnl_link_failed;
1617
1618 err = rtnl_link_register(&ipgre_tap_ops);
1619 if (err < 0)
1620 goto tap_ops_failed;
1621
1622 out:
1623 return err;
1624
1625 tap_ops_failed:
1626 rtnl_link_unregister(&ipgre_link_ops);
1627 rtnl_link_failed:
1628 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1629 gen_device_failed:
1630 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1631 goto out;
1632 }
1633
1634 static void __exit ipgre_fini(void)
1635 {
1636 rtnl_link_unregister(&ipgre_tap_ops);
1637 rtnl_link_unregister(&ipgre_link_ops);
1638 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1639 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1640 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1641 }
1642
1643 module_init(ipgre_init);
1644 module_exit(ipgre_fini);
1645 MODULE_LICENSE("GPL");
1646 MODULE_ALIAS_RTNL_LINK("gre");
1647 MODULE_ALIAS_RTNL_LINK("gretap");
This page took 0.064099 seconds and 5 git commands to generate.