Commit | Line | Data |
---|---|---|
da5bab07 DB |
1 | /* |
2 | * IPV4 GSO/GRO offload support | |
3 | * Linux INET implementation | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or | |
6 | * modify it under the terms of the GNU General Public License | |
7 | * as published by the Free Software Foundation; either version | |
8 | * 2 of the License, or (at your option) any later version. | |
9 | * | |
10 | * UDPv4 GSO support | |
11 | */ | |
12 | ||
13 | #include <linux/skbuff.h> | |
14 | #include <net/udp.h> | |
15 | #include <net/protocol.h> | |
16 | ||
b582ef09 | 17 | static DEFINE_SPINLOCK(udp_offload_lock); |
a1d0cd8e | 18 | static struct udp_offload_priv __rcu *udp_offload_base __read_mostly; |
b582ef09 | 19 | |
a664a4f7 SP |
20 | #define udp_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&udp_offload_lock)) |
21 | ||
b582ef09 OG |
22 | struct udp_offload_priv { |
23 | struct udp_offload *offload; | |
24 | struct rcu_head rcu; | |
25 | struct udp_offload_priv __rcu *next; | |
26 | }; | |
27 | ||
8bce6d7d TH |
28 | static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, |
29 | netdev_features_t features, | |
30 | struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb, | |
31 | netdev_features_t features), | |
4bcb877d | 32 | __be16 new_protocol, bool is_ipv6) |
155e010e TH |
33 | { |
34 | struct sk_buff *segs = ERR_PTR(-EINVAL); | |
35 | u16 mac_offset = skb->mac_header; | |
36 | int mac_len = skb->mac_len; | |
37 | int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); | |
38 | __be16 protocol = skb->protocol; | |
39 | netdev_features_t enc_features; | |
40 | int udp_offset, outer_hlen; | |
41 | unsigned int oldlen; | |
4bcb877d TH |
42 | bool need_csum = !!(skb_shinfo(skb)->gso_type & |
43 | SKB_GSO_UDP_TUNNEL_CSUM); | |
e585f236 TH |
44 | bool remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM); |
45 | bool offload_csum = false, dont_encap = (need_csum || remcsum); | |
155e010e TH |
46 | |
47 | oldlen = (u16)~skb->len; | |
48 | ||
49 | if (unlikely(!pskb_may_pull(skb, tnl_hlen))) | |
50 | goto out; | |
51 | ||
52 | skb->encapsulation = 0; | |
53 | __skb_pull(skb, tnl_hlen); | |
54 | skb_reset_mac_header(skb); | |
55 | skb_set_network_header(skb, skb_inner_network_offset(skb)); | |
56 | skb->mac_len = skb_inner_network_offset(skb); | |
8bce6d7d | 57 | skb->protocol = new_protocol; |
4bcb877d | 58 | skb->encap_hdr_csum = need_csum; |
e585f236 | 59 | skb->remcsum_offload = remcsum; |
155e010e | 60 | |
4bcb877d TH |
61 | /* Try to offload checksum if possible */ |
62 | offload_csum = !!(need_csum && | |
63 | (skb->dev->features & | |
64 | (is_ipv6 ? NETIF_F_V6_CSUM : NETIF_F_V4_CSUM))); | |
155e010e TH |
65 | |
66 | /* segment inner packet. */ | |
1e16aa3d | 67 | enc_features = skb->dev->hw_enc_features & features; |
8bce6d7d | 68 | segs = gso_inner_segment(skb, enc_features); |
27446442 | 69 | if (IS_ERR_OR_NULL(segs)) { |
155e010e TH |
70 | skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset, |
71 | mac_len); | |
72 | goto out; | |
73 | } | |
74 | ||
75 | outer_hlen = skb_tnl_header_len(skb); | |
76 | udp_offset = outer_hlen - tnl_hlen; | |
77 | skb = segs; | |
78 | do { | |
79 | struct udphdr *uh; | |
80 | int len; | |
4bcb877d TH |
81 | __be32 delta; |
82 | ||
83 | if (dont_encap) { | |
84 | skb->encapsulation = 0; | |
85 | skb->ip_summed = CHECKSUM_NONE; | |
86 | } else { | |
87 | /* Only set up inner headers if we might be offloading | |
88 | * inner checksum. | |
89 | */ | |
90 | skb_reset_inner_headers(skb); | |
91 | skb->encapsulation = 1; | |
92 | } | |
155e010e TH |
93 | |
94 | skb->mac_len = mac_len; | |
4bcb877d | 95 | skb->protocol = protocol; |
155e010e TH |
96 | |
97 | skb_push(skb, outer_hlen); | |
98 | skb_reset_mac_header(skb); | |
99 | skb_set_network_header(skb, mac_len); | |
100 | skb_set_transport_header(skb, udp_offset); | |
101 | len = skb->len - udp_offset; | |
102 | uh = udp_hdr(skb); | |
103 | uh->len = htons(len); | |
104 | ||
4bcb877d TH |
105 | if (!need_csum) |
106 | continue; | |
107 | ||
108 | delta = htonl(oldlen + len); | |
155e010e | 109 | |
4bcb877d TH |
110 | uh->check = ~csum_fold((__force __wsum) |
111 | ((__force u32)uh->check + | |
112 | (__force u32)delta)); | |
4bcb877d TH |
113 | if (offload_csum) { |
114 | skb->ip_summed = CHECKSUM_PARTIAL; | |
115 | skb->csum_start = skb_transport_header(skb) - skb->head; | |
116 | skb->csum_offset = offsetof(struct udphdr, check); | |
e585f236 TH |
117 | } else if (remcsum) { |
118 | /* Need to calculate checksum from scratch, | |
119 | * inner checksums are never when doing | |
120 | * remote_checksum_offload. | |
121 | */ | |
122 | ||
123 | skb->csum = skb_checksum(skb, udp_offset, | |
124 | skb->len - udp_offset, | |
125 | 0); | |
126 | uh->check = csum_fold(skb->csum); | |
127 | if (uh->check == 0) | |
128 | uh->check = CSUM_MANGLED_0; | |
4bcb877d | 129 | } else { |
155e010e TH |
130 | uh->check = gso_make_checksum(skb, ~uh->check); |
131 | ||
132 | if (uh->check == 0) | |
133 | uh->check = CSUM_MANGLED_0; | |
134 | } | |
155e010e TH |
135 | } while ((skb = skb->next)); |
136 | out: | |
137 | return segs; | |
138 | } | |
139 | ||
8bce6d7d TH |
140 | struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, |
141 | netdev_features_t features, | |
142 | bool is_ipv6) | |
143 | { | |
144 | __be16 protocol = skb->protocol; | |
145 | const struct net_offload **offloads; | |
146 | const struct net_offload *ops; | |
147 | struct sk_buff *segs = ERR_PTR(-EINVAL); | |
148 | struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb, | |
149 | netdev_features_t features); | |
150 | ||
151 | rcu_read_lock(); | |
152 | ||
153 | switch (skb->inner_protocol_type) { | |
154 | case ENCAP_TYPE_ETHER: | |
155 | protocol = skb->inner_protocol; | |
156 | gso_inner_segment = skb_mac_gso_segment; | |
157 | break; | |
158 | case ENCAP_TYPE_IPPROTO: | |
159 | offloads = is_ipv6 ? inet6_offloads : inet_offloads; | |
160 | ops = rcu_dereference(offloads[skb->inner_ipproto]); | |
161 | if (!ops || !ops->callbacks.gso_segment) | |
162 | goto out_unlock; | |
163 | gso_inner_segment = ops->callbacks.gso_segment; | |
164 | break; | |
165 | default: | |
166 | goto out_unlock; | |
167 | } | |
168 | ||
169 | segs = __skb_udp_tunnel_segment(skb, features, gso_inner_segment, | |
4bcb877d | 170 | protocol, is_ipv6); |
8bce6d7d TH |
171 | |
172 | out_unlock: | |
173 | rcu_read_unlock(); | |
174 | ||
175 | return segs; | |
176 | } | |
177 | ||
da5bab07 DB |
178 | static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, |
179 | netdev_features_t features) | |
180 | { | |
181 | struct sk_buff *segs = ERR_PTR(-EINVAL); | |
182 | unsigned int mss; | |
7a7ffbab | 183 | __wsum csum; |
f71470b3 TH |
184 | struct udphdr *uh; |
185 | struct iphdr *iph; | |
7a7ffbab WCC |
186 | |
187 | if (skb->encapsulation && | |
0f4f4ffa TH |
188 | (skb_shinfo(skb)->gso_type & |
189 | (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) { | |
8bce6d7d | 190 | segs = skb_udp_tunnel_segment(skb, features, false); |
7a7ffbab WCC |
191 | goto out; |
192 | } | |
da5bab07 | 193 | |
f71470b3 TH |
194 | if (!pskb_may_pull(skb, sizeof(struct udphdr))) |
195 | goto out; | |
196 | ||
da5bab07 DB |
197 | mss = skb_shinfo(skb)->gso_size; |
198 | if (unlikely(skb->len <= mss)) | |
199 | goto out; | |
200 | ||
201 | if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { | |
202 | /* Packet is from an untrusted source, reset gso_segs. */ | |
203 | int type = skb_shinfo(skb)->gso_type; | |
204 | ||
205 | if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | | |
206 | SKB_GSO_UDP_TUNNEL | | |
0f4f4ffa | 207 | SKB_GSO_UDP_TUNNEL_CSUM | |
e585f236 | 208 | SKB_GSO_TUNNEL_REMCSUM | |
cb32f511 | 209 | SKB_GSO_IPIP | |
59b93b41 | 210 | SKB_GSO_GRE | SKB_GSO_GRE_CSUM) || |
da5bab07 DB |
211 | !(type & (SKB_GSO_UDP)))) |
212 | goto out; | |
213 | ||
214 | skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); | |
215 | ||
216 | segs = NULL; | |
217 | goto out; | |
218 | } | |
219 | ||
7a7ffbab WCC |
220 | /* Do software UFO. Complete and fill in the UDP checksum as |
221 | * HW cannot do checksum of UDP packets sent as multiple | |
222 | * IP fragments. | |
223 | */ | |
f71470b3 TH |
224 | |
225 | uh = udp_hdr(skb); | |
226 | iph = ip_hdr(skb); | |
227 | ||
228 | uh->check = 0; | |
229 | csum = skb_checksum(skb, 0, skb->len, 0); | |
230 | uh->check = udp_v4_check(skb->len, iph->saddr, iph->daddr, csum); | |
231 | if (uh->check == 0) | |
232 | uh->check = CSUM_MANGLED_0; | |
233 | ||
7a7ffbab WCC |
234 | skb->ip_summed = CHECKSUM_NONE; |
235 | ||
da5bab07 DB |
236 | /* Fragment the skb. IP headers of the fragments are updated in |
237 | * inet_gso_segment() | |
238 | */ | |
7a7ffbab | 239 | segs = skb_segment(skb, features); |
da5bab07 DB |
240 | out: |
241 | return segs; | |
242 | } | |
243 | ||
b582ef09 OG |
244 | int udp_add_offload(struct udp_offload *uo) |
245 | { | |
b5aaab12 | 246 | struct udp_offload_priv *new_offload = kzalloc(sizeof(*new_offload), GFP_ATOMIC); |
b582ef09 OG |
247 | |
248 | if (!new_offload) | |
249 | return -ENOMEM; | |
250 | ||
251 | new_offload->offload = uo; | |
252 | ||
253 | spin_lock(&udp_offload_lock); | |
a664a4f7 SP |
254 | new_offload->next = udp_offload_base; |
255 | rcu_assign_pointer(udp_offload_base, new_offload); | |
b582ef09 OG |
256 | spin_unlock(&udp_offload_lock); |
257 | ||
258 | return 0; | |
259 | } | |
260 | EXPORT_SYMBOL(udp_add_offload); | |
261 | ||
262 | static void udp_offload_free_routine(struct rcu_head *head) | |
263 | { | |
264 | struct udp_offload_priv *ou_priv = container_of(head, struct udp_offload_priv, rcu); | |
265 | kfree(ou_priv); | |
266 | } | |
267 | ||
268 | void udp_del_offload(struct udp_offload *uo) | |
269 | { | |
270 | struct udp_offload_priv __rcu **head = &udp_offload_base; | |
271 | struct udp_offload_priv *uo_priv; | |
272 | ||
273 | spin_lock(&udp_offload_lock); | |
274 | ||
a664a4f7 | 275 | uo_priv = udp_deref_protected(*head); |
b582ef09 | 276 | for (; uo_priv != NULL; |
a664a4f7 | 277 | uo_priv = udp_deref_protected(*head)) { |
b582ef09 | 278 | if (uo_priv->offload == uo) { |
a664a4f7 SP |
279 | rcu_assign_pointer(*head, |
280 | udp_deref_protected(uo_priv->next)); | |
b582ef09 OG |
281 | goto unlock; |
282 | } | |
283 | head = &uo_priv->next; | |
284 | } | |
a1d0cd8e | 285 | pr_warn("udp_del_offload: didn't find offload for port %d\n", ntohs(uo->port)); |
b582ef09 OG |
286 | unlock: |
287 | spin_unlock(&udp_offload_lock); | |
00db4124 | 288 | if (uo_priv) |
b582ef09 OG |
289 | call_rcu(&uo_priv->rcu, udp_offload_free_routine); |
290 | } | |
291 | EXPORT_SYMBOL(udp_del_offload); | |
292 | ||
57c67ff4 TH |
293 | struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb, |
294 | struct udphdr *uh) | |
b582ef09 OG |
295 | { |
296 | struct udp_offload_priv *uo_priv; | |
297 | struct sk_buff *p, **pp = NULL; | |
57c67ff4 TH |
298 | struct udphdr *uh2; |
299 | unsigned int off = skb_gro_offset(skb); | |
b582ef09 OG |
300 | int flush = 1; |
301 | ||
302 | if (NAPI_GRO_CB(skb)->udp_mark || | |
662880f4 TH |
303 | (skb->ip_summed != CHECKSUM_PARTIAL && |
304 | NAPI_GRO_CB(skb)->csum_cnt == 0 && | |
305 | !NAPI_GRO_CB(skb)->csum_valid)) | |
b582ef09 OG |
306 | goto out; |
307 | ||
308 | /* mark that this skb passed once through the udp gro layer */ | |
309 | NAPI_GRO_CB(skb)->udp_mark = 1; | |
b582ef09 OG |
310 | |
311 | rcu_read_lock(); | |
312 | uo_priv = rcu_dereference(udp_offload_base); | |
313 | for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) { | |
314 | if (uo_priv->offload->port == uh->dest && | |
315 | uo_priv->offload->callbacks.gro_receive) | |
316 | goto unflush; | |
317 | } | |
318 | goto out_unlock; | |
319 | ||
320 | unflush: | |
321 | flush = 0; | |
322 | ||
323 | for (p = *head; p; p = p->next) { | |
324 | if (!NAPI_GRO_CB(p)->same_flow) | |
325 | continue; | |
326 | ||
327 | uh2 = (struct udphdr *)(p->data + off); | |
57c67ff4 TH |
328 | |
329 | /* Match ports and either checksums are either both zero | |
330 | * or nonzero. | |
331 | */ | |
332 | if ((*(u32 *)&uh->source != *(u32 *)&uh2->source) || | |
333 | (!uh->check ^ !uh2->check)) { | |
b582ef09 OG |
334 | NAPI_GRO_CB(p)->same_flow = 0; |
335 | continue; | |
336 | } | |
337 | } | |
338 | ||
339 | skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */ | |
6bae1d4c | 340 | skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr)); |
afe93325 | 341 | NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto; |
a2b12f3c TH |
342 | pp = uo_priv->offload->callbacks.gro_receive(head, skb, |
343 | uo_priv->offload); | |
b582ef09 OG |
344 | |
345 | out_unlock: | |
346 | rcu_read_unlock(); | |
347 | out: | |
348 | NAPI_GRO_CB(skb)->flush |= flush; | |
349 | return pp; | |
350 | } | |
351 | ||
57c67ff4 TH |
352 | static struct sk_buff **udp4_gro_receive(struct sk_buff **head, |
353 | struct sk_buff *skb) | |
354 | { | |
355 | struct udphdr *uh = udp_gro_udphdr(skb); | |
356 | ||
2abb7cdc TH |
357 | if (unlikely(!uh)) |
358 | goto flush; | |
57c67ff4 | 359 | |
2abb7cdc | 360 | /* Don't bother verifying checksum if we're going to flush anyway. */ |
2d8f7e2c | 361 | if (NAPI_GRO_CB(skb)->flush) |
2abb7cdc TH |
362 | goto skip; |
363 | ||
364 | if (skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check, | |
365 | inet_gro_compute_pseudo)) | |
366 | goto flush; | |
367 | else if (uh->check) | |
368 | skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check, | |
369 | inet_gro_compute_pseudo); | |
370 | skip: | |
efc98d08 | 371 | NAPI_GRO_CB(skb)->is_ipv6 = 0; |
57c67ff4 | 372 | return udp_gro_receive(head, skb, uh); |
2abb7cdc TH |
373 | |
374 | flush: | |
375 | NAPI_GRO_CB(skb)->flush = 1; | |
376 | return NULL; | |
57c67ff4 TH |
377 | } |
378 | ||
379 | int udp_gro_complete(struct sk_buff *skb, int nhoff) | |
b582ef09 OG |
380 | { |
381 | struct udp_offload_priv *uo_priv; | |
382 | __be16 newlen = htons(skb->len - nhoff); | |
383 | struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); | |
384 | int err = -ENOSYS; | |
385 | ||
386 | uh->len = newlen; | |
387 | ||
388 | rcu_read_lock(); | |
389 | ||
390 | uo_priv = rcu_dereference(udp_offload_base); | |
391 | for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) { | |
392 | if (uo_priv->offload->port == uh->dest && | |
393 | uo_priv->offload->callbacks.gro_complete) | |
394 | break; | |
395 | } | |
396 | ||
00db4124 | 397 | if (uo_priv) { |
afe93325 | 398 | NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto; |
a2b12f3c TH |
399 | err = uo_priv->offload->callbacks.gro_complete(skb, |
400 | nhoff + sizeof(struct udphdr), | |
401 | uo_priv->offload); | |
afe93325 | 402 | } |
b582ef09 OG |
403 | |
404 | rcu_read_unlock(); | |
6db93ea1 TH |
405 | |
406 | if (skb->remcsum_offload) | |
407 | skb_shinfo(skb)->gso_type |= SKB_GSO_TUNNEL_REMCSUM; | |
408 | ||
409 | skb->encapsulation = 1; | |
410 | skb_set_inner_mac_header(skb, nhoff + sizeof(struct udphdr)); | |
411 | ||
b582ef09 OG |
412 | return err; |
413 | } | |
414 | ||
72bb17b3 | 415 | static int udp4_gro_complete(struct sk_buff *skb, int nhoff) |
57c67ff4 TH |
416 | { |
417 | const struct iphdr *iph = ip_hdr(skb); | |
418 | struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); | |
419 | ||
6db93ea1 TH |
420 | if (uh->check) { |
421 | skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; | |
57c67ff4 TH |
422 | uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr, |
423 | iph->daddr, 0); | |
6db93ea1 TH |
424 | } else { |
425 | skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; | |
426 | } | |
57c67ff4 TH |
427 | |
428 | return udp_gro_complete(skb, nhoff); | |
429 | } | |
430 | ||
da5bab07 DB |
431 | static const struct net_offload udpv4_offload = { |
432 | .callbacks = { | |
da5bab07 | 433 | .gso_segment = udp4_ufo_fragment, |
57c67ff4 TH |
434 | .gro_receive = udp4_gro_receive, |
435 | .gro_complete = udp4_gro_complete, | |
da5bab07 DB |
436 | }, |
437 | }; | |
438 | ||
439 | int __init udpv4_offload_init(void) | |
440 | { | |
441 | return inet_add_offload(&udpv4_offload, IPPROTO_UDP); | |
442 | } |