1 #include <linux/module.h>
2 #include <linux/errno.h>
3 #include <linux/socket.h>
4 #include <linux/skbuff.h>
7 #include <linux/types.h>
8 #include <linux/kernel.h>
9 #include <net/genetlink.h>
12 #include <net/protocol.h>
14 #include <net/udp_tunnel.h>
16 #include <uapi/linux/fou.h>
17 #include <uapi/linux/genetlink.h>
19 static DEFINE_SPINLOCK(fou_lock
);
20 static LIST_HEAD(fou_list
);
26 struct udp_offload udp_offloads
;
27 struct list_head list
;
33 struct udp_port_cfg udp_config
;
36 static inline struct fou
*fou_from_sock(struct sock
*sk
)
38 return sk
->sk_user_data
;
41 static void fou_recv_pull(struct sk_buff
*skb
, size_t len
)
43 struct iphdr
*iph
= ip_hdr(skb
);
45 /* Remove 'len' bytes from the packet (UDP header and
46 * FOU header if present).
48 iph
->tot_len
= htons(ntohs(iph
->tot_len
) - len
);
50 skb_postpull_rcsum(skb
, udp_hdr(skb
), len
);
51 skb_reset_transport_header(skb
);
54 static int fou_udp_recv(struct sock
*sk
, struct sk_buff
*skb
)
56 struct fou
*fou
= fou_from_sock(sk
);
61 fou_recv_pull(skb
, sizeof(struct udphdr
));
63 return -fou
->protocol
;
66 static struct guehdr
*gue_remcsum(struct sk_buff
*skb
, struct guehdr
*guehdr
,
67 void *data
, size_t hdrlen
, u8 ipproto
)
70 size_t start
= ntohs(pd
[0]);
71 size_t offset
= ntohs(pd
[1]);
72 size_t plen
= hdrlen
+ max_t(size_t, offset
+ sizeof(u16
), start
);
75 if (skb
->remcsum_offload
) {
76 /* Already processed in GRO path */
77 skb
->remcsum_offload
= 0;
81 if (!pskb_may_pull(skb
, plen
))
83 guehdr
= (struct guehdr
*)&udp_hdr(skb
)[1];
85 if (unlikely(skb
->ip_summed
!= CHECKSUM_COMPLETE
))
86 __skb_checksum_complete(skb
);
88 delta
= remcsum_adjust((void *)guehdr
+ hdrlen
,
89 skb
->csum
, start
, offset
);
91 /* Adjust skb->csum since we changed the packet */
92 skb
->csum
= csum_add(skb
->csum
, delta
);
97 static int gue_control_message(struct sk_buff
*skb
, struct guehdr
*guehdr
)
104 static int gue_udp_recv(struct sock
*sk
, struct sk_buff
*skb
)
106 struct fou
*fou
= fou_from_sock(sk
);
107 size_t len
, optlen
, hdrlen
;
108 struct guehdr
*guehdr
;
115 len
= sizeof(struct udphdr
) + sizeof(struct guehdr
);
116 if (!pskb_may_pull(skb
, len
))
119 guehdr
= (struct guehdr
*)&udp_hdr(skb
)[1];
121 optlen
= guehdr
->hlen
<< 2;
124 if (!pskb_may_pull(skb
, len
))
127 /* guehdr may change after pull */
128 guehdr
= (struct guehdr
*)&udp_hdr(skb
)[1];
130 hdrlen
= sizeof(struct guehdr
) + optlen
;
132 if (guehdr
->version
!= 0 || validate_gue_flags(guehdr
, optlen
))
135 hdrlen
= sizeof(struct guehdr
) + optlen
;
137 ip_hdr(skb
)->tot_len
= htons(ntohs(ip_hdr(skb
)->tot_len
) - len
);
139 /* Pull csum through the guehdr now . This can be used if
140 * there is a remote checksum offload.
142 skb_postpull_rcsum(skb
, udp_hdr(skb
), len
);
146 if (guehdr
->flags
& GUE_FLAG_PRIV
) {
147 __be32 flags
= *(__be32
*)(data
+ doffset
);
149 doffset
+= GUE_LEN_PRIV
;
151 if (flags
& GUE_PFLAG_REMCSUM
) {
152 guehdr
= gue_remcsum(skb
, guehdr
, data
+ doffset
,
153 hdrlen
, guehdr
->proto_ctype
);
159 doffset
+= GUE_PLEN_REMCSUM
;
163 if (unlikely(guehdr
->control
))
164 return gue_control_message(skb
, guehdr
);
166 __skb_pull(skb
, sizeof(struct udphdr
) + hdrlen
);
167 skb_reset_transport_header(skb
);
169 return -guehdr
->proto_ctype
;
176 static struct sk_buff
**fou_gro_receive(struct sk_buff
**head
,
179 const struct net_offload
*ops
;
180 struct sk_buff
**pp
= NULL
;
181 u8 proto
= NAPI_GRO_CB(skb
)->proto
;
182 const struct net_offload
**offloads
;
185 offloads
= NAPI_GRO_CB(skb
)->is_ipv6
? inet6_offloads
: inet_offloads
;
186 ops
= rcu_dereference(offloads
[proto
]);
187 if (!ops
|| !ops
->callbacks
.gro_receive
)
190 pp
= ops
->callbacks
.gro_receive(head
, skb
);
198 static int fou_gro_complete(struct sk_buff
*skb
, int nhoff
)
200 const struct net_offload
*ops
;
201 u8 proto
= NAPI_GRO_CB(skb
)->proto
;
203 const struct net_offload
**offloads
;
205 udp_tunnel_gro_complete(skb
, nhoff
);
208 offloads
= NAPI_GRO_CB(skb
)->is_ipv6
? inet6_offloads
: inet_offloads
;
209 ops
= rcu_dereference(offloads
[proto
]);
210 if (WARN_ON(!ops
|| !ops
->callbacks
.gro_complete
))
213 err
= ops
->callbacks
.gro_complete(skb
, nhoff
);
221 static struct guehdr
*gue_gro_remcsum(struct sk_buff
*skb
, unsigned int off
,
222 struct guehdr
*guehdr
, void *data
,
223 size_t hdrlen
, u8 ipproto
)
226 size_t start
= ntohs(pd
[0]);
227 size_t offset
= ntohs(pd
[1]);
228 size_t plen
= hdrlen
+ max_t(size_t, offset
+ sizeof(u16
), start
);
231 if (skb
->remcsum_offload
)
234 if (!NAPI_GRO_CB(skb
)->csum_valid
)
237 /* Pull checksum that will be written */
238 if (skb_gro_header_hard(skb
, off
+ plen
)) {
239 guehdr
= skb_gro_header_slow(skb
, off
+ plen
, off
);
244 delta
= remcsum_adjust((void *)guehdr
+ hdrlen
,
245 NAPI_GRO_CB(skb
)->csum
, start
, offset
);
247 /* Adjust skb->csum since we changed the packet */
248 skb
->csum
= csum_add(skb
->csum
, delta
);
249 NAPI_GRO_CB(skb
)->csum
= csum_add(NAPI_GRO_CB(skb
)->csum
, delta
);
251 skb
->remcsum_offload
= 1;
256 static struct sk_buff
**gue_gro_receive(struct sk_buff
**head
,
259 const struct net_offload
**offloads
;
260 const struct net_offload
*ops
;
261 struct sk_buff
**pp
= NULL
;
263 struct guehdr
*guehdr
;
264 size_t len
, optlen
, hdrlen
, off
;
269 off
= skb_gro_offset(skb
);
270 len
= off
+ sizeof(*guehdr
);
272 guehdr
= skb_gro_header_fast(skb
, off
);
273 if (skb_gro_header_hard(skb
, len
)) {
274 guehdr
= skb_gro_header_slow(skb
, len
, off
);
275 if (unlikely(!guehdr
))
279 optlen
= guehdr
->hlen
<< 2;
282 if (skb_gro_header_hard(skb
, len
)) {
283 guehdr
= skb_gro_header_slow(skb
, len
, off
);
284 if (unlikely(!guehdr
))
288 if (unlikely(guehdr
->control
) || guehdr
->version
!= 0 ||
289 validate_gue_flags(guehdr
, optlen
))
292 hdrlen
= sizeof(*guehdr
) + optlen
;
294 /* Adjust NAPI_GRO_CB(skb)->csum to account for guehdr,
295 * this is needed if there is a remote checkcsum offload.
297 skb_gro_postpull_rcsum(skb
, guehdr
, hdrlen
);
301 if (guehdr
->flags
& GUE_FLAG_PRIV
) {
302 __be32 flags
= *(__be32
*)(data
+ doffset
);
304 doffset
+= GUE_LEN_PRIV
;
306 if (flags
& GUE_PFLAG_REMCSUM
) {
307 guehdr
= gue_gro_remcsum(skb
, off
, guehdr
,
308 data
+ doffset
, hdrlen
,
309 guehdr
->proto_ctype
);
315 doffset
+= GUE_PLEN_REMCSUM
;
319 skb_gro_pull(skb
, hdrlen
);
323 for (p
= *head
; p
; p
= p
->next
) {
324 const struct guehdr
*guehdr2
;
326 if (!NAPI_GRO_CB(p
)->same_flow
)
329 guehdr2
= (struct guehdr
*)(p
->data
+ off
);
331 /* Compare base GUE header to be equal (covers
332 * hlen, version, proto_ctype, and flags.
334 if (guehdr
->word
!= guehdr2
->word
) {
335 NAPI_GRO_CB(p
)->same_flow
= 0;
339 /* Compare optional fields are the same. */
340 if (guehdr
->hlen
&& memcmp(&guehdr
[1], &guehdr2
[1],
341 guehdr
->hlen
<< 2)) {
342 NAPI_GRO_CB(p
)->same_flow
= 0;
348 offloads
= NAPI_GRO_CB(skb
)->is_ipv6
? inet6_offloads
: inet_offloads
;
349 ops
= rcu_dereference(offloads
[guehdr
->proto_ctype
]);
350 if (WARN_ON(!ops
|| !ops
->callbacks
.gro_receive
))
353 pp
= ops
->callbacks
.gro_receive(head
, skb
);
358 NAPI_GRO_CB(skb
)->flush
|= flush
;
363 static int gue_gro_complete(struct sk_buff
*skb
, int nhoff
)
365 const struct net_offload
**offloads
;
366 struct guehdr
*guehdr
= (struct guehdr
*)(skb
->data
+ nhoff
);
367 const struct net_offload
*ops
;
368 unsigned int guehlen
;
372 proto
= guehdr
->proto_ctype
;
374 guehlen
= sizeof(*guehdr
) + (guehdr
->hlen
<< 2);
377 offloads
= NAPI_GRO_CB(skb
)->is_ipv6
? inet6_offloads
: inet_offloads
;
378 ops
= rcu_dereference(offloads
[proto
]);
379 if (WARN_ON(!ops
|| !ops
->callbacks
.gro_complete
))
382 err
= ops
->callbacks
.gro_complete(skb
, nhoff
+ guehlen
);
389 static int fou_add_to_port_list(struct fou
*fou
)
393 spin_lock(&fou_lock
);
394 list_for_each_entry(fout
, &fou_list
, list
) {
395 if (fou
->port
== fout
->port
) {
396 spin_unlock(&fou_lock
);
401 list_add(&fou
->list
, &fou_list
);
402 spin_unlock(&fou_lock
);
407 static void fou_release(struct fou
*fou
)
409 struct socket
*sock
= fou
->sock
;
410 struct sock
*sk
= sock
->sk
;
412 udp_del_offload(&fou
->udp_offloads
);
414 list_del(&fou
->list
);
416 /* Remove hooks into tunnel socket */
417 sk
->sk_user_data
= NULL
;
424 static int fou_encap_init(struct sock
*sk
, struct fou
*fou
, struct fou_cfg
*cfg
)
426 udp_sk(sk
)->encap_rcv
= fou_udp_recv
;
427 fou
->protocol
= cfg
->protocol
;
428 fou
->udp_offloads
.callbacks
.gro_receive
= fou_gro_receive
;
429 fou
->udp_offloads
.callbacks
.gro_complete
= fou_gro_complete
;
430 fou
->udp_offloads
.port
= cfg
->udp_config
.local_udp_port
;
431 fou
->udp_offloads
.ipproto
= cfg
->protocol
;
436 static int gue_encap_init(struct sock
*sk
, struct fou
*fou
, struct fou_cfg
*cfg
)
438 udp_sk(sk
)->encap_rcv
= gue_udp_recv
;
439 fou
->udp_offloads
.callbacks
.gro_receive
= gue_gro_receive
;
440 fou
->udp_offloads
.callbacks
.gro_complete
= gue_gro_complete
;
441 fou
->udp_offloads
.port
= cfg
->udp_config
.local_udp_port
;
446 static int fou_create(struct net
*net
, struct fou_cfg
*cfg
,
447 struct socket
**sockp
)
449 struct fou
*fou
= NULL
;
451 struct socket
*sock
= NULL
;
454 /* Open UDP socket */
455 err
= udp_sock_create(net
, &cfg
->udp_config
, &sock
);
459 /* Allocate FOU port structure */
460 fou
= kzalloc(sizeof(*fou
), GFP_KERNEL
);
468 fou
->port
= cfg
->udp_config
.local_udp_port
;
470 /* Initial for fou type */
472 case FOU_ENCAP_DIRECT
:
473 err
= fou_encap_init(sk
, fou
, cfg
);
478 err
= gue_encap_init(sk
, fou
, cfg
);
487 udp_sk(sk
)->encap_type
= 1;
490 sk
->sk_user_data
= fou
;
493 udp_set_convert_csum(sk
, true);
495 sk
->sk_allocation
= GFP_ATOMIC
;
497 if (cfg
->udp_config
.family
== AF_INET
) {
498 err
= udp_add_offload(&fou
->udp_offloads
);
503 err
= fou_add_to_port_list(fou
);
520 static int fou_destroy(struct net
*net
, struct fou_cfg
*cfg
)
523 u16 port
= cfg
->udp_config
.local_udp_port
;
526 spin_lock(&fou_lock
);
527 list_for_each_entry(fou
, &fou_list
, list
) {
528 if (fou
->port
== port
) {
529 udp_del_offload(&fou
->udp_offloads
);
535 spin_unlock(&fou_lock
);
540 static struct genl_family fou_nl_family
= {
541 .id
= GENL_ID_GENERATE
,
543 .name
= FOU_GENL_NAME
,
544 .version
= FOU_GENL_VERSION
,
545 .maxattr
= FOU_ATTR_MAX
,
549 static struct nla_policy fou_nl_policy
[FOU_ATTR_MAX
+ 1] = {
550 [FOU_ATTR_PORT
] = { .type
= NLA_U16
, },
551 [FOU_ATTR_AF
] = { .type
= NLA_U8
, },
552 [FOU_ATTR_IPPROTO
] = { .type
= NLA_U8
, },
553 [FOU_ATTR_TYPE
] = { .type
= NLA_U8
, },
556 static int parse_nl_config(struct genl_info
*info
,
559 memset(cfg
, 0, sizeof(*cfg
));
561 cfg
->udp_config
.family
= AF_INET
;
563 if (info
->attrs
[FOU_ATTR_AF
]) {
564 u8 family
= nla_get_u8(info
->attrs
[FOU_ATTR_AF
]);
566 if (family
!= AF_INET
&& family
!= AF_INET6
)
569 cfg
->udp_config
.family
= family
;
572 if (info
->attrs
[FOU_ATTR_PORT
]) {
573 u16 port
= nla_get_u16(info
->attrs
[FOU_ATTR_PORT
]);
575 cfg
->udp_config
.local_udp_port
= port
;
578 if (info
->attrs
[FOU_ATTR_IPPROTO
])
579 cfg
->protocol
= nla_get_u8(info
->attrs
[FOU_ATTR_IPPROTO
]);
581 if (info
->attrs
[FOU_ATTR_TYPE
])
582 cfg
->type
= nla_get_u8(info
->attrs
[FOU_ATTR_TYPE
]);
587 static int fou_nl_cmd_add_port(struct sk_buff
*skb
, struct genl_info
*info
)
592 err
= parse_nl_config(info
, &cfg
);
596 return fou_create(&init_net
, &cfg
, NULL
);
599 static int fou_nl_cmd_rm_port(struct sk_buff
*skb
, struct genl_info
*info
)
603 parse_nl_config(info
, &cfg
);
605 return fou_destroy(&init_net
, &cfg
);
608 static const struct genl_ops fou_nl_ops
[] = {
611 .doit
= fou_nl_cmd_add_port
,
612 .policy
= fou_nl_policy
,
613 .flags
= GENL_ADMIN_PERM
,
617 .doit
= fou_nl_cmd_rm_port
,
618 .policy
= fou_nl_policy
,
619 .flags
= GENL_ADMIN_PERM
,
623 size_t fou_encap_hlen(struct ip_tunnel_encap
*e
)
625 return sizeof(struct udphdr
);
627 EXPORT_SYMBOL(fou_encap_hlen
);
629 size_t gue_encap_hlen(struct ip_tunnel_encap
*e
)
632 bool need_priv
= false;
634 len
= sizeof(struct udphdr
) + sizeof(struct guehdr
);
636 if (e
->flags
& TUNNEL_ENCAP_FLAG_REMCSUM
) {
637 len
+= GUE_PLEN_REMCSUM
;
641 len
+= need_priv
? GUE_LEN_PRIV
: 0;
645 EXPORT_SYMBOL(gue_encap_hlen
);
647 static void fou_build_udp(struct sk_buff
*skb
, struct ip_tunnel_encap
*e
,
648 struct flowi4
*fl4
, u8
*protocol
, __be16 sport
)
652 skb_push(skb
, sizeof(struct udphdr
));
653 skb_reset_transport_header(skb
);
659 uh
->len
= htons(skb
->len
);
661 udp_set_csum(!(e
->flags
& TUNNEL_ENCAP_FLAG_CSUM
), skb
,
662 fl4
->saddr
, fl4
->daddr
, skb
->len
);
664 *protocol
= IPPROTO_UDP
;
667 int fou_build_header(struct sk_buff
*skb
, struct ip_tunnel_encap
*e
,
668 u8
*protocol
, struct flowi4
*fl4
)
670 bool csum
= !!(e
->flags
& TUNNEL_ENCAP_FLAG_CSUM
);
671 int type
= csum
? SKB_GSO_UDP_TUNNEL_CSUM
: SKB_GSO_UDP_TUNNEL
;
674 skb
= iptunnel_handle_offloads(skb
, csum
, type
);
679 sport
= e
->sport
? : udp_flow_src_port(dev_net(skb
->dev
),
681 fou_build_udp(skb
, e
, fl4
, protocol
, sport
);
685 EXPORT_SYMBOL(fou_build_header
);
687 int gue_build_header(struct sk_buff
*skb
, struct ip_tunnel_encap
*e
,
688 u8
*protocol
, struct flowi4
*fl4
)
690 bool csum
= !!(e
->flags
& TUNNEL_ENCAP_FLAG_CSUM
);
691 int type
= csum
? SKB_GSO_UDP_TUNNEL_CSUM
: SKB_GSO_UDP_TUNNEL
;
692 struct guehdr
*guehdr
;
693 size_t hdrlen
, optlen
= 0;
696 bool need_priv
= false;
698 if ((e
->flags
& TUNNEL_ENCAP_FLAG_REMCSUM
) &&
699 skb
->ip_summed
== CHECKSUM_PARTIAL
) {
701 optlen
+= GUE_PLEN_REMCSUM
;
702 type
|= SKB_GSO_TUNNEL_REMCSUM
;
706 optlen
+= need_priv
? GUE_LEN_PRIV
: 0;
708 skb
= iptunnel_handle_offloads(skb
, csum
, type
);
713 /* Get source port (based on flow hash) before skb_push */
714 sport
= e
->sport
? : udp_flow_src_port(dev_net(skb
->dev
),
717 hdrlen
= sizeof(struct guehdr
) + optlen
;
719 skb_push(skb
, hdrlen
);
721 guehdr
= (struct guehdr
*)skb
->data
;
725 guehdr
->hlen
= optlen
>> 2;
727 guehdr
->proto_ctype
= *protocol
;
732 __be32
*flags
= data
;
734 guehdr
->flags
|= GUE_FLAG_PRIV
;
736 data
+= GUE_LEN_PRIV
;
738 if (type
& SKB_GSO_TUNNEL_REMCSUM
) {
739 u16 csum_start
= skb_checksum_start_offset(skb
);
742 if (csum_start
< hdrlen
)
745 csum_start
-= hdrlen
;
746 pd
[0] = htons(csum_start
);
747 pd
[1] = htons(csum_start
+ skb
->csum_offset
);
749 if (!skb_is_gso(skb
)) {
750 skb
->ip_summed
= CHECKSUM_NONE
;
751 skb
->encapsulation
= 0;
754 *flags
|= GUE_PFLAG_REMCSUM
;
755 data
+= GUE_PLEN_REMCSUM
;
760 fou_build_udp(skb
, e
, fl4
, protocol
, sport
);
764 EXPORT_SYMBOL(gue_build_header
);
766 #ifdef CONFIG_NET_FOU_IP_TUNNELS
768 static const struct ip_tunnel_encap_ops __read_mostly fou_iptun_ops
= {
769 .encap_hlen
= fou_encap_hlen
,
770 .build_header
= fou_build_header
,
773 static const struct ip_tunnel_encap_ops __read_mostly gue_iptun_ops
= {
774 .encap_hlen
= gue_encap_hlen
,
775 .build_header
= gue_build_header
,
778 static int ip_tunnel_encap_add_fou_ops(void)
782 ret
= ip_tunnel_encap_add_ops(&fou_iptun_ops
, TUNNEL_ENCAP_FOU
);
784 pr_err("can't add fou ops\n");
788 ret
= ip_tunnel_encap_add_ops(&gue_iptun_ops
, TUNNEL_ENCAP_GUE
);
790 pr_err("can't add gue ops\n");
791 ip_tunnel_encap_del_ops(&fou_iptun_ops
, TUNNEL_ENCAP_FOU
);
798 static void ip_tunnel_encap_del_fou_ops(void)
800 ip_tunnel_encap_del_ops(&fou_iptun_ops
, TUNNEL_ENCAP_FOU
);
801 ip_tunnel_encap_del_ops(&gue_iptun_ops
, TUNNEL_ENCAP_GUE
);
806 static int ip_tunnel_encap_add_fou_ops(void)
811 static void ip_tunnel_encap_del_fou_ops(void)
817 static int __init
fou_init(void)
821 ret
= genl_register_family_with_ops(&fou_nl_family
,
827 ret
= ip_tunnel_encap_add_fou_ops();
829 genl_unregister_family(&fou_nl_family
);
835 static void __exit
fou_fini(void)
837 struct fou
*fou
, *next
;
839 ip_tunnel_encap_del_fou_ops();
841 genl_unregister_family(&fou_nl_family
);
843 /* Close all the FOU sockets */
845 spin_lock(&fou_lock
);
846 list_for_each_entry_safe(fou
, next
, &fou_list
, list
)
848 spin_unlock(&fou_lock
);
851 module_init(fou_init
);
852 module_exit(fou_fini
);
853 MODULE_AUTHOR("Tom Herbert <therbert@google.com>");
854 MODULE_LICENSE("GPL");