2 * Linux NET3: IP/IP protocol decoder modified to support
3 * virtual tunnel interface
6 * Saurabh Mohan (saurabh.mohan@vyatta.com) 05/07/2012
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
16 This version of net/ipv4/ip_vti.c is cloned of net/ipv4/ipip.c
18 For comments look at net/ipv4/ip_gre.c --ANK
22 #include <linux/capability.h>
23 #include <linux/module.h>
24 #include <linux/types.h>
25 #include <linux/kernel.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/netfilter_ipv4.h>
36 #include <linux/if_ether.h>
41 #include <net/ip_tunnels.h>
42 #include <net/inet_ecn.h>
44 #include <net/net_namespace.h>
45 #include <net/netns/generic.h>
47 static struct rtnl_link_ops vti_link_ops __read_mostly
;
49 static int vti_net_id __read_mostly
;
50 static int vti_tunnel_init(struct net_device
*dev
);
52 static int vti_err(struct sk_buff
*skb
, u32 info
)
55 /* All the routers (except for Linux) return only
56 * 8 bytes of packet payload. It means, that precise relaying of
57 * ICMP in the real Internet is absolutely infeasible.
59 struct net
*net
= dev_net(skb
->dev
);
60 struct ip_tunnel_net
*itn
= net_generic(net
, vti_net_id
);
61 struct iphdr
*iph
= (struct iphdr
*)skb
->data
;
62 const int type
= icmp_hdr(skb
)->type
;
63 const int code
= icmp_hdr(skb
)->code
;
69 case ICMP_PARAMETERPROB
:
72 case ICMP_DEST_UNREACH
:
75 case ICMP_PORT_UNREACH
:
76 /* Impossible event. */
79 /* All others are translated to HOST_UNREACH. */
83 case ICMP_TIME_EXCEEDED
:
84 if (code
!= ICMP_EXC_TTL
)
91 t
= ip_tunnel_lookup(itn
, skb
->dev
->ifindex
, TUNNEL_NO_KEY
,
92 iph
->daddr
, iph
->saddr
, 0);
96 if (type
== ICMP_DEST_UNREACH
&& code
== ICMP_FRAG_NEEDED
) {
97 ipv4_update_pmtu(skb
, dev_net(skb
->dev
), info
,
98 t
->parms
.link
, 0, IPPROTO_IPIP
, 0);
104 if (t
->parms
.iph
.ttl
== 0 && type
== ICMP_TIME_EXCEEDED
)
107 if (time_before(jiffies
, t
->err_time
+ IPTUNNEL_ERR_TIMEO
))
111 t
->err_time
= jiffies
;
116 /* We dont digest the packet therefore let the packet pass */
117 static int vti_rcv(struct sk_buff
*skb
)
119 struct ip_tunnel
*tunnel
;
120 const struct iphdr
*iph
= ip_hdr(skb
);
121 struct net
*net
= dev_net(skb
->dev
);
122 struct ip_tunnel_net
*itn
= net_generic(net
, vti_net_id
);
124 tunnel
= ip_tunnel_lookup(itn
, skb
->dev
->ifindex
, TUNNEL_NO_KEY
,
125 iph
->saddr
, iph
->daddr
, 0);
126 if (tunnel
!= NULL
) {
127 struct pcpu_tstats
*tstats
;
128 u32 oldmark
= skb
->mark
;
132 /* temporarily mark the skb with the tunnel o_key, to
133 * only match policies with this mark.
135 skb
->mark
= be32_to_cpu(tunnel
->parms
.o_key
);
136 ret
= xfrm4_policy_check(NULL
, XFRM_POLICY_IN
, skb
);
141 tstats
= this_cpu_ptr(tunnel
->dev
->tstats
);
142 u64_stats_update_begin(&tstats
->syncp
);
143 tstats
->rx_packets
++;
144 tstats
->rx_bytes
+= skb
->len
;
145 u64_stats_update_end(&tstats
->syncp
);
148 skb
->dev
= tunnel
->dev
;
155 /* This function assumes it is being called from dev_queue_xmit()
156 * and that skb is filled properly by that function.
159 static netdev_tx_t
vti_tunnel_xmit(struct sk_buff
*skb
, struct net_device
*dev
)
161 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
162 struct iphdr
*tiph
= &tunnel
->parms
.iph
;
164 struct rtable
*rt
; /* Route to the other host */
165 struct net_device
*tdev
; /* Device to other host */
166 struct iphdr
*old_iph
= ip_hdr(skb
);
167 __be32 dst
= tiph
->daddr
;
171 if (skb
->protocol
!= htons(ETH_P_IP
))
176 memset(&fl4
, 0, sizeof(fl4
));
177 flowi4_init_output(&fl4
, tunnel
->parms
.link
,
178 be32_to_cpu(tunnel
->parms
.o_key
), RT_TOS(tos
),
181 dst
, tiph
->saddr
, 0, 0);
182 rt
= ip_route_output_key(dev_net(dev
), &fl4
);
184 dev
->stats
.tx_carrier_errors
++;
187 /* if there is no transform then this tunnel is not functional.
188 * Or if the xfrm is not mode tunnel.
191 rt
->dst
.xfrm
->props
.mode
!= XFRM_MODE_TUNNEL
) {
192 dev
->stats
.tx_carrier_errors
++;
199 dev
->stats
.collisions
++;
203 if (tunnel
->err_count
> 0) {
204 if (time_before(jiffies
,
205 tunnel
->err_time
+ IPTUNNEL_ERR_TIMEO
)) {
207 dst_link_failure(skb
);
209 tunnel
->err_count
= 0;
212 memset(IPCB(skb
), 0, sizeof(*IPCB(skb
)));
214 skb_dst_set(skb
, &rt
->dst
);
216 skb
->dev
= skb_dst(skb
)->dev
;
218 err
= dst_output(skb
);
219 if (net_xmit_eval(err
) == 0)
221 iptunnel_xmit_stats(err
, &dev
->stats
, dev
->tstats
);
225 dst_link_failure(skb
);
227 dev
->stats
.tx_errors
++;
233 vti_tunnel_ioctl(struct net_device
*dev
, struct ifreq
*ifr
, int cmd
)
236 struct ip_tunnel_parm p
;
238 if (copy_from_user(&p
, ifr
->ifr_ifru
.ifru_data
, sizeof(p
)))
241 if (cmd
== SIOCADDTUNNEL
|| cmd
== SIOCCHGTUNNEL
) {
242 if (p
.iph
.version
!= 4 || p
.iph
.protocol
!= IPPROTO_IPIP
||
247 err
= ip_tunnel_ioctl(dev
, &p
, cmd
);
251 if (cmd
!= SIOCDELTUNNEL
) {
252 p
.i_flags
|= GRE_KEY
| VTI_ISVTI
;
253 p
.o_flags
|= GRE_KEY
;
256 if (copy_to_user(ifr
->ifr_ifru
.ifru_data
, &p
, sizeof(p
)))
261 static const struct net_device_ops vti_netdev_ops
= {
262 .ndo_init
= vti_tunnel_init
,
263 .ndo_uninit
= ip_tunnel_uninit
,
264 .ndo_start_xmit
= vti_tunnel_xmit
,
265 .ndo_do_ioctl
= vti_tunnel_ioctl
,
266 .ndo_change_mtu
= ip_tunnel_change_mtu
,
267 .ndo_get_stats64
= ip_tunnel_get_stats64
,
270 static void vti_tunnel_setup(struct net_device
*dev
)
272 dev
->netdev_ops
= &vti_netdev_ops
;
273 ip_tunnel_setup(dev
, vti_net_id
);
276 static int vti_tunnel_init(struct net_device
*dev
)
278 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
279 struct iphdr
*iph
= &tunnel
->parms
.iph
;
281 memcpy(dev
->dev_addr
, &iph
->saddr
, 4);
282 memcpy(dev
->broadcast
, &iph
->daddr
, 4);
284 dev
->type
= ARPHRD_TUNNEL
;
285 dev
->hard_header_len
= LL_MAX_HEADER
+ sizeof(struct iphdr
);
286 dev
->mtu
= ETH_DATA_LEN
;
287 dev
->flags
= IFF_NOARP
;
290 dev
->features
|= NETIF_F_NETNS_LOCAL
;
291 dev
->features
|= NETIF_F_LLTX
;
292 dev
->priv_flags
&= ~IFF_XMIT_DST_RELEASE
;
294 return ip_tunnel_init(dev
);
297 static void __net_init
vti_fb_tunnel_init(struct net_device
*dev
)
299 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
300 struct iphdr
*iph
= &tunnel
->parms
.iph
;
303 iph
->protocol
= IPPROTO_IPIP
;
307 static struct xfrm_tunnel vti_handler __read_mostly
= {
309 .err_handler
= vti_err
,
313 static int __net_init
vti_init_net(struct net
*net
)
316 struct ip_tunnel_net
*itn
;
318 err
= ip_tunnel_init_net(net
, vti_net_id
, &vti_link_ops
, "ip_vti0");
321 itn
= net_generic(net
, vti_net_id
);
322 vti_fb_tunnel_init(itn
->fb_tunnel_dev
);
326 static void __net_exit
vti_exit_net(struct net
*net
)
328 struct ip_tunnel_net
*itn
= net_generic(net
, vti_net_id
);
329 ip_tunnel_delete_net(itn
, &vti_link_ops
);
332 static struct pernet_operations vti_net_ops
= {
333 .init
= vti_init_net
,
334 .exit
= vti_exit_net
,
336 .size
= sizeof(struct ip_tunnel_net
),
339 static int vti_tunnel_validate(struct nlattr
*tb
[], struct nlattr
*data
[])
344 static void vti_netlink_parms(struct nlattr
*data
[],
345 struct ip_tunnel_parm
*parms
)
347 memset(parms
, 0, sizeof(*parms
));
349 parms
->iph
.protocol
= IPPROTO_IPIP
;
354 if (data
[IFLA_VTI_LINK
])
355 parms
->link
= nla_get_u32(data
[IFLA_VTI_LINK
]);
357 if (data
[IFLA_VTI_IKEY
])
358 parms
->i_key
= nla_get_be32(data
[IFLA_VTI_IKEY
]);
360 if (data
[IFLA_VTI_OKEY
])
361 parms
->o_key
= nla_get_be32(data
[IFLA_VTI_OKEY
]);
363 if (data
[IFLA_VTI_LOCAL
])
364 parms
->iph
.saddr
= nla_get_be32(data
[IFLA_VTI_LOCAL
]);
366 if (data
[IFLA_VTI_REMOTE
])
367 parms
->iph
.daddr
= nla_get_be32(data
[IFLA_VTI_REMOTE
]);
371 static int vti_newlink(struct net
*src_net
, struct net_device
*dev
,
372 struct nlattr
*tb
[], struct nlattr
*data
[])
374 struct ip_tunnel_parm parms
;
376 vti_netlink_parms(data
, &parms
);
377 return ip_tunnel_newlink(dev
, tb
, &parms
);
380 static int vti_changelink(struct net_device
*dev
, struct nlattr
*tb
[],
381 struct nlattr
*data
[])
383 struct ip_tunnel_parm p
;
385 vti_netlink_parms(data
, &p
);
386 return ip_tunnel_changelink(dev
, tb
, &p
);
389 static size_t vti_get_size(const struct net_device
*dev
)
400 /* IFLA_VTI_REMOTE */
405 static int vti_fill_info(struct sk_buff
*skb
, const struct net_device
*dev
)
407 struct ip_tunnel
*t
= netdev_priv(dev
);
408 struct ip_tunnel_parm
*p
= &t
->parms
;
410 nla_put_u32(skb
, IFLA_VTI_LINK
, p
->link
);
411 nla_put_be32(skb
, IFLA_VTI_IKEY
, p
->i_key
);
412 nla_put_be32(skb
, IFLA_VTI_OKEY
, p
->o_key
);
413 nla_put_be32(skb
, IFLA_VTI_LOCAL
, p
->iph
.saddr
);
414 nla_put_be32(skb
, IFLA_VTI_REMOTE
, p
->iph
.daddr
);
419 static const struct nla_policy vti_policy
[IFLA_VTI_MAX
+ 1] = {
420 [IFLA_VTI_LINK
] = { .type
= NLA_U32
},
421 [IFLA_VTI_IKEY
] = { .type
= NLA_U32
},
422 [IFLA_VTI_OKEY
] = { .type
= NLA_U32
},
423 [IFLA_VTI_LOCAL
] = { .len
= FIELD_SIZEOF(struct iphdr
, saddr
) },
424 [IFLA_VTI_REMOTE
] = { .len
= FIELD_SIZEOF(struct iphdr
, daddr
) },
427 static struct rtnl_link_ops vti_link_ops __read_mostly
= {
429 .maxtype
= IFLA_VTI_MAX
,
430 .policy
= vti_policy
,
431 .priv_size
= sizeof(struct ip_tunnel
),
432 .setup
= vti_tunnel_setup
,
433 .validate
= vti_tunnel_validate
,
434 .newlink
= vti_newlink
,
435 .changelink
= vti_changelink
,
436 .get_size
= vti_get_size
,
437 .fill_info
= vti_fill_info
,
440 static int __init
vti_init(void)
444 pr_info("IPv4 over IPSec tunneling driver\n");
446 err
= register_pernet_device(&vti_net_ops
);
449 err
= xfrm4_mode_tunnel_input_register(&vti_handler
);
451 unregister_pernet_device(&vti_net_ops
);
452 pr_info("vti init: can't register tunnel\n");
455 err
= rtnl_link_register(&vti_link_ops
);
457 goto rtnl_link_failed
;
462 xfrm4_mode_tunnel_input_deregister(&vti_handler
);
463 unregister_pernet_device(&vti_net_ops
);
467 static void __exit
vti_fini(void)
469 rtnl_link_unregister(&vti_link_ops
);
470 if (xfrm4_mode_tunnel_input_deregister(&vti_handler
))
471 pr_info("vti close: can't deregister tunnel\n");
473 unregister_pernet_device(&vti_net_ops
);
476 module_init(vti_init
);
477 module_exit(vti_fini
);
478 MODULE_LICENSE("GPL");
479 MODULE_ALIAS_RTNL_LINK("vti");
480 MODULE_ALIAS_NETDEV("ip_vti0");