Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jbarnes...
[deliverable/linux.git] / net / ipv6 / ip6_output.c
1 /*
2 * IPv6 output functions
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40
41 #include <linux/netfilter.h>
42 #include <linux/netfilter_ipv6.h>
43
44 #include <net/sock.h>
45 #include <net/snmp.h>
46
47 #include <net/ipv6.h>
48 #include <net/ndisc.h>
49 #include <net/protocol.h>
50 #include <net/ip6_route.h>
51 #include <net/addrconf.h>
52 #include <net/rawv6.h>
53 #include <net/icmp.h>
54 #include <net/xfrm.h>
55 #include <net/checksum.h>
56 #include <linux/mroute6.h>
57
58 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
59
60 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
61 {
62 static u32 ipv6_fragmentation_id = 1;
63 static DEFINE_SPINLOCK(ip6_id_lock);
64
65 spin_lock_bh(&ip6_id_lock);
66 fhdr->identification = htonl(ipv6_fragmentation_id);
67 if (++ipv6_fragmentation_id == 0)
68 ipv6_fragmentation_id = 1;
69 spin_unlock_bh(&ip6_id_lock);
70 }
71
72 int __ip6_local_out(struct sk_buff *skb)
73 {
74 int len;
75
76 len = skb->len - sizeof(struct ipv6hdr);
77 if (len > IPV6_MAXPLEN)
78 len = 0;
79 ipv6_hdr(skb)->payload_len = htons(len);
80
81 return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev,
82 dst_output);
83 }
84
85 int ip6_local_out(struct sk_buff *skb)
86 {
87 int err;
88
89 err = __ip6_local_out(skb);
90 if (likely(err == 1))
91 err = dst_output(skb);
92
93 return err;
94 }
95 EXPORT_SYMBOL_GPL(ip6_local_out);
96
97 static int ip6_output_finish(struct sk_buff *skb)
98 {
99 struct dst_entry *dst = skb->dst;
100
101 if (dst->hh)
102 return neigh_hh_output(dst->hh, skb);
103 else if (dst->neighbour)
104 return dst->neighbour->output(skb);
105
106 IP6_INC_STATS_BH(dev_net(dst->dev),
107 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
108 kfree_skb(skb);
109 return -EINVAL;
110
111 }
112
113 /* dev_loopback_xmit for use with netfilter. */
114 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
115 {
116 skb_reset_mac_header(newskb);
117 __skb_pull(newskb, skb_network_offset(newskb));
118 newskb->pkt_type = PACKET_LOOPBACK;
119 newskb->ip_summed = CHECKSUM_UNNECESSARY;
120 WARN_ON(!newskb->dst);
121
122 netif_rx(newskb);
123 return 0;
124 }
125
126
127 static int ip6_output2(struct sk_buff *skb)
128 {
129 struct dst_entry *dst = skb->dst;
130 struct net_device *dev = dst->dev;
131
132 skb->protocol = htons(ETH_P_IPV6);
133 skb->dev = dev;
134
135 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
136 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
137 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
138
139 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
140 ((mroute6_socket(dev_net(dev)) &&
141 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
142 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
143 &ipv6_hdr(skb)->saddr))) {
144 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
145
146 /* Do not check for IFF_ALLMULTI; multicast routing
147 is not supported in any case.
148 */
149 if (newskb)
150 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
151 NULL, newskb->dev,
152 ip6_dev_loopback_xmit);
153
154 if (ipv6_hdr(skb)->hop_limit == 0) {
155 IP6_INC_STATS(dev_net(dev), idev,
156 IPSTATS_MIB_OUTDISCARDS);
157 kfree_skb(skb);
158 return 0;
159 }
160 }
161
162 IP6_INC_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCASTPKTS);
163 }
164
165 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
166 ip6_output_finish);
167 }
168
169 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
170 {
171 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
172
173 return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
174 skb->dst->dev->mtu : dst_mtu(skb->dst);
175 }
176
177 int ip6_output(struct sk_buff *skb)
178 {
179 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
180 if (unlikely(idev->cnf.disable_ipv6)) {
181 IP6_INC_STATS(dev_net(skb->dst->dev), idev,
182 IPSTATS_MIB_OUTDISCARDS);
183 kfree_skb(skb);
184 return 0;
185 }
186
187 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
188 dst_allfrag(skb->dst))
189 return ip6_fragment(skb, ip6_output2);
190 else
191 return ip6_output2(skb);
192 }
193
194 /*
195 * xmit an sk_buff (used by TCP)
196 */
197
198 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
199 struct ipv6_txoptions *opt, int ipfragok)
200 {
201 struct net *net = sock_net(sk);
202 struct ipv6_pinfo *np = inet6_sk(sk);
203 struct in6_addr *first_hop = &fl->fl6_dst;
204 struct dst_entry *dst = skb->dst;
205 struct ipv6hdr *hdr;
206 u8 proto = fl->proto;
207 int seg_len = skb->len;
208 int hlimit, tclass;
209 u32 mtu;
210
211 if (opt) {
212 unsigned int head_room;
213
214 /* First: exthdrs may take lots of space (~8K for now)
215 MAX_HEADER is not enough.
216 */
217 head_room = opt->opt_nflen + opt->opt_flen;
218 seg_len += head_room;
219 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
220
221 if (skb_headroom(skb) < head_room) {
222 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
223 if (skb2 == NULL) {
224 IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
225 IPSTATS_MIB_OUTDISCARDS);
226 kfree_skb(skb);
227 return -ENOBUFS;
228 }
229 kfree_skb(skb);
230 skb = skb2;
231 if (sk)
232 skb_set_owner_w(skb, sk);
233 }
234 if (opt->opt_flen)
235 ipv6_push_frag_opts(skb, opt, &proto);
236 if (opt->opt_nflen)
237 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
238 }
239
240 skb_push(skb, sizeof(struct ipv6hdr));
241 skb_reset_network_header(skb);
242 hdr = ipv6_hdr(skb);
243
244 /* Allow local fragmentation. */
245 if (ipfragok)
246 skb->local_df = 1;
247
248 /*
249 * Fill in the IPv6 header
250 */
251
252 hlimit = -1;
253 if (np)
254 hlimit = np->hop_limit;
255 if (hlimit < 0)
256 hlimit = ip6_dst_hoplimit(dst);
257
258 tclass = -1;
259 if (np)
260 tclass = np->tclass;
261 if (tclass < 0)
262 tclass = 0;
263
264 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
265
266 hdr->payload_len = htons(seg_len);
267 hdr->nexthdr = proto;
268 hdr->hop_limit = hlimit;
269
270 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
271 ipv6_addr_copy(&hdr->daddr, first_hop);
272
273 skb->priority = sk->sk_priority;
274 skb->mark = sk->sk_mark;
275
276 mtu = dst_mtu(dst);
277 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
278 IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
279 IPSTATS_MIB_OUTREQUESTS);
280 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
281 dst_output);
282 }
283
284 if (net_ratelimit())
285 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
286 skb->dev = dst->dev;
287 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
288 IP6_INC_STATS(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
289 kfree_skb(skb);
290 return -EMSGSIZE;
291 }
292
293 EXPORT_SYMBOL(ip6_xmit);
294
295 /*
296 * To avoid extra problems ND packets are send through this
297 * routine. It's code duplication but I really want to avoid
298 * extra checks since ipv6_build_header is used by TCP (which
299 * is for us performance critical)
300 */
301
302 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
303 const struct in6_addr *saddr, const struct in6_addr *daddr,
304 int proto, int len)
305 {
306 struct ipv6_pinfo *np = inet6_sk(sk);
307 struct ipv6hdr *hdr;
308 int totlen;
309
310 skb->protocol = htons(ETH_P_IPV6);
311 skb->dev = dev;
312
313 totlen = len + sizeof(struct ipv6hdr);
314
315 skb_reset_network_header(skb);
316 skb_put(skb, sizeof(struct ipv6hdr));
317 hdr = ipv6_hdr(skb);
318
319 *(__be32*)hdr = htonl(0x60000000);
320
321 hdr->payload_len = htons(len);
322 hdr->nexthdr = proto;
323 hdr->hop_limit = np->hop_limit;
324
325 ipv6_addr_copy(&hdr->saddr, saddr);
326 ipv6_addr_copy(&hdr->daddr, daddr);
327
328 return 0;
329 }
330
331 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
332 {
333 struct ip6_ra_chain *ra;
334 struct sock *last = NULL;
335
336 read_lock(&ip6_ra_lock);
337 for (ra = ip6_ra_chain; ra; ra = ra->next) {
338 struct sock *sk = ra->sk;
339 if (sk && ra->sel == sel &&
340 (!sk->sk_bound_dev_if ||
341 sk->sk_bound_dev_if == skb->dev->ifindex)) {
342 if (last) {
343 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
344 if (skb2)
345 rawv6_rcv(last, skb2);
346 }
347 last = sk;
348 }
349 }
350
351 if (last) {
352 rawv6_rcv(last, skb);
353 read_unlock(&ip6_ra_lock);
354 return 1;
355 }
356 read_unlock(&ip6_ra_lock);
357 return 0;
358 }
359
360 static int ip6_forward_proxy_check(struct sk_buff *skb)
361 {
362 struct ipv6hdr *hdr = ipv6_hdr(skb);
363 u8 nexthdr = hdr->nexthdr;
364 int offset;
365
366 if (ipv6_ext_hdr(nexthdr)) {
367 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
368 if (offset < 0)
369 return 0;
370 } else
371 offset = sizeof(struct ipv6hdr);
372
373 if (nexthdr == IPPROTO_ICMPV6) {
374 struct icmp6hdr *icmp6;
375
376 if (!pskb_may_pull(skb, (skb_network_header(skb) +
377 offset + 1 - skb->data)))
378 return 0;
379
380 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
381
382 switch (icmp6->icmp6_type) {
383 case NDISC_ROUTER_SOLICITATION:
384 case NDISC_ROUTER_ADVERTISEMENT:
385 case NDISC_NEIGHBOUR_SOLICITATION:
386 case NDISC_NEIGHBOUR_ADVERTISEMENT:
387 case NDISC_REDIRECT:
388 /* For reaction involving unicast neighbor discovery
389 * message destined to the proxied address, pass it to
390 * input function.
391 */
392 return 1;
393 default:
394 break;
395 }
396 }
397
398 /*
399 * The proxying router can't forward traffic sent to a link-local
400 * address, so signal the sender and discard the packet. This
401 * behavior is clarified by the MIPv6 specification.
402 */
403 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
404 dst_link_failure(skb);
405 return -1;
406 }
407
408 return 0;
409 }
410
411 static inline int ip6_forward_finish(struct sk_buff *skb)
412 {
413 return dst_output(skb);
414 }
415
416 int ip6_forward(struct sk_buff *skb)
417 {
418 struct dst_entry *dst = skb->dst;
419 struct ipv6hdr *hdr = ipv6_hdr(skb);
420 struct inet6_skb_parm *opt = IP6CB(skb);
421 struct net *net = dev_net(dst->dev);
422
423 if (net->ipv6.devconf_all->forwarding == 0)
424 goto error;
425
426 if (skb_warn_if_lro(skb))
427 goto drop;
428
429 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
430 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
431 goto drop;
432 }
433
434 skb_forward_csum(skb);
435
436 /*
437 * We DO NOT make any processing on
438 * RA packets, pushing them to user level AS IS
439 * without ane WARRANTY that application will be able
440 * to interpret them. The reason is that we
441 * cannot make anything clever here.
442 *
443 * We are not end-node, so that if packet contains
444 * AH/ESP, we cannot make anything.
445 * Defragmentation also would be mistake, RA packets
446 * cannot be fragmented, because there is no warranty
447 * that different fragments will go along one path. --ANK
448 */
449 if (opt->ra) {
450 u8 *ptr = skb_network_header(skb) + opt->ra;
451 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
452 return 0;
453 }
454
455 /*
456 * check and decrement ttl
457 */
458 if (hdr->hop_limit <= 1) {
459 /* Force OUTPUT device used as source address */
460 skb->dev = dst->dev;
461 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
462 0, skb->dev);
463 IP6_INC_STATS_BH(net,
464 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
465
466 kfree_skb(skb);
467 return -ETIMEDOUT;
468 }
469
470 /* XXX: idev->cnf.proxy_ndp? */
471 if (net->ipv6.devconf_all->proxy_ndp &&
472 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
473 int proxied = ip6_forward_proxy_check(skb);
474 if (proxied > 0)
475 return ip6_input(skb);
476 else if (proxied < 0) {
477 IP6_INC_STATS(net, ip6_dst_idev(dst),
478 IPSTATS_MIB_INDISCARDS);
479 goto drop;
480 }
481 }
482
483 if (!xfrm6_route_forward(skb)) {
484 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
485 goto drop;
486 }
487 dst = skb->dst;
488
489 /* IPv6 specs say nothing about it, but it is clear that we cannot
490 send redirects to source routed frames.
491 We don't send redirects to frames decapsulated from IPsec.
492 */
493 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
494 !skb_sec_path(skb)) {
495 struct in6_addr *target = NULL;
496 struct rt6_info *rt;
497 struct neighbour *n = dst->neighbour;
498
499 /*
500 * incoming and outgoing devices are the same
501 * send a redirect.
502 */
503
504 rt = (struct rt6_info *) dst;
505 if ((rt->rt6i_flags & RTF_GATEWAY))
506 target = (struct in6_addr*)&n->primary_key;
507 else
508 target = &hdr->daddr;
509
510 /* Limit redirects both by destination (here)
511 and by source (inside ndisc_send_redirect)
512 */
513 if (xrlim_allow(dst, 1*HZ))
514 ndisc_send_redirect(skb, n, target);
515 } else {
516 int addrtype = ipv6_addr_type(&hdr->saddr);
517
518 /* This check is security critical. */
519 if (addrtype == IPV6_ADDR_ANY ||
520 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
521 goto error;
522 if (addrtype & IPV6_ADDR_LINKLOCAL) {
523 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
524 ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
525 goto error;
526 }
527 }
528
529 if (skb->len > dst_mtu(dst)) {
530 /* Again, force OUTPUT device used as source address */
531 skb->dev = dst->dev;
532 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
533 IP6_INC_STATS_BH(net,
534 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
535 IP6_INC_STATS_BH(net,
536 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
537 kfree_skb(skb);
538 return -EMSGSIZE;
539 }
540
541 if (skb_cow(skb, dst->dev->hard_header_len)) {
542 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
543 goto drop;
544 }
545
546 hdr = ipv6_hdr(skb);
547
548 /* Mangling hops number delayed to point after skb COW */
549
550 hdr->hop_limit--;
551
552 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
553 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
554 ip6_forward_finish);
555
556 error:
557 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
558 drop:
559 kfree_skb(skb);
560 return -EINVAL;
561 }
562
563 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
564 {
565 to->pkt_type = from->pkt_type;
566 to->priority = from->priority;
567 to->protocol = from->protocol;
568 dst_release(to->dst);
569 to->dst = dst_clone(from->dst);
570 to->dev = from->dev;
571 to->mark = from->mark;
572
573 #ifdef CONFIG_NET_SCHED
574 to->tc_index = from->tc_index;
575 #endif
576 nf_copy(to, from);
577 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
578 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
579 to->nf_trace = from->nf_trace;
580 #endif
581 skb_copy_secmark(to, from);
582 }
583
584 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
585 {
586 u16 offset = sizeof(struct ipv6hdr);
587 struct ipv6_opt_hdr *exthdr =
588 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
589 unsigned int packet_len = skb->tail - skb->network_header;
590 int found_rhdr = 0;
591 *nexthdr = &ipv6_hdr(skb)->nexthdr;
592
593 while (offset + 1 <= packet_len) {
594
595 switch (**nexthdr) {
596
597 case NEXTHDR_HOP:
598 break;
599 case NEXTHDR_ROUTING:
600 found_rhdr = 1;
601 break;
602 case NEXTHDR_DEST:
603 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
604 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
605 break;
606 #endif
607 if (found_rhdr)
608 return offset;
609 break;
610 default :
611 return offset;
612 }
613
614 offset += ipv6_optlen(exthdr);
615 *nexthdr = &exthdr->nexthdr;
616 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
617 offset);
618 }
619
620 return offset;
621 }
622
623 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
624 {
625 struct sk_buff *frag;
626 struct rt6_info *rt = (struct rt6_info*)skb->dst;
627 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
628 struct ipv6hdr *tmp_hdr;
629 struct frag_hdr *fh;
630 unsigned int mtu, hlen, left, len;
631 __be32 frag_id = 0;
632 int ptr, offset = 0, err=0;
633 u8 *prevhdr, nexthdr = 0;
634 struct net *net = dev_net(skb->dst->dev);
635
636 hlen = ip6_find_1stfragopt(skb, &prevhdr);
637 nexthdr = *prevhdr;
638
639 mtu = ip6_skb_dst_mtu(skb);
640
641 /* We must not fragment if the socket is set to force MTU discovery
642 * or if the skb it not generated by a local socket. (This last
643 * check should be redundant, but it's free.)
644 */
645 if (!skb->local_df) {
646 skb->dev = skb->dst->dev;
647 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
648 IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
649 IPSTATS_MIB_FRAGFAILS);
650 kfree_skb(skb);
651 return -EMSGSIZE;
652 }
653
654 if (np && np->frag_size < mtu) {
655 if (np->frag_size)
656 mtu = np->frag_size;
657 }
658 mtu -= hlen + sizeof(struct frag_hdr);
659
660 if (skb_shinfo(skb)->frag_list) {
661 int first_len = skb_pagelen(skb);
662 int truesizes = 0;
663
664 if (first_len - hlen > mtu ||
665 ((first_len - hlen) & 7) ||
666 skb_cloned(skb))
667 goto slow_path;
668
669 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
670 /* Correct geometry. */
671 if (frag->len > mtu ||
672 ((frag->len & 7) && frag->next) ||
673 skb_headroom(frag) < hlen)
674 goto slow_path;
675
676 /* Partially cloned skb? */
677 if (skb_shared(frag))
678 goto slow_path;
679
680 BUG_ON(frag->sk);
681 if (skb->sk) {
682 sock_hold(skb->sk);
683 frag->sk = skb->sk;
684 frag->destructor = sock_wfree;
685 truesizes += frag->truesize;
686 }
687 }
688
689 err = 0;
690 offset = 0;
691 frag = skb_shinfo(skb)->frag_list;
692 skb_shinfo(skb)->frag_list = NULL;
693 /* BUILD HEADER */
694
695 *prevhdr = NEXTHDR_FRAGMENT;
696 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
697 if (!tmp_hdr) {
698 IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
699 IPSTATS_MIB_FRAGFAILS);
700 return -ENOMEM;
701 }
702
703 __skb_pull(skb, hlen);
704 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
705 __skb_push(skb, hlen);
706 skb_reset_network_header(skb);
707 memcpy(skb_network_header(skb), tmp_hdr, hlen);
708
709 ipv6_select_ident(skb, fh);
710 fh->nexthdr = nexthdr;
711 fh->reserved = 0;
712 fh->frag_off = htons(IP6_MF);
713 frag_id = fh->identification;
714
715 first_len = skb_pagelen(skb);
716 skb->data_len = first_len - skb_headlen(skb);
717 skb->truesize -= truesizes;
718 skb->len = first_len;
719 ipv6_hdr(skb)->payload_len = htons(first_len -
720 sizeof(struct ipv6hdr));
721
722 dst_hold(&rt->u.dst);
723
724 for (;;) {
725 /* Prepare header of the next frame,
726 * before previous one went down. */
727 if (frag) {
728 frag->ip_summed = CHECKSUM_NONE;
729 skb_reset_transport_header(frag);
730 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
731 __skb_push(frag, hlen);
732 skb_reset_network_header(frag);
733 memcpy(skb_network_header(frag), tmp_hdr,
734 hlen);
735 offset += skb->len - hlen - sizeof(struct frag_hdr);
736 fh->nexthdr = nexthdr;
737 fh->reserved = 0;
738 fh->frag_off = htons(offset);
739 if (frag->next != NULL)
740 fh->frag_off |= htons(IP6_MF);
741 fh->identification = frag_id;
742 ipv6_hdr(frag)->payload_len =
743 htons(frag->len -
744 sizeof(struct ipv6hdr));
745 ip6_copy_metadata(frag, skb);
746 }
747
748 err = output(skb);
749 if(!err)
750 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
751 IPSTATS_MIB_FRAGCREATES);
752
753 if (err || !frag)
754 break;
755
756 skb = frag;
757 frag = skb->next;
758 skb->next = NULL;
759 }
760
761 kfree(tmp_hdr);
762
763 if (err == 0) {
764 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
765 IPSTATS_MIB_FRAGOKS);
766 dst_release(&rt->u.dst);
767 return 0;
768 }
769
770 while (frag) {
771 skb = frag->next;
772 kfree_skb(frag);
773 frag = skb;
774 }
775
776 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
777 IPSTATS_MIB_FRAGFAILS);
778 dst_release(&rt->u.dst);
779 return err;
780 }
781
782 slow_path:
783 left = skb->len - hlen; /* Space per frame */
784 ptr = hlen; /* Where to start from */
785
786 /*
787 * Fragment the datagram.
788 */
789
790 *prevhdr = NEXTHDR_FRAGMENT;
791
792 /*
793 * Keep copying data until we run out.
794 */
795 while(left > 0) {
796 len = left;
797 /* IF: it doesn't fit, use 'mtu' - the data space left */
798 if (len > mtu)
799 len = mtu;
800 /* IF: we are not sending upto and including the packet end
801 then align the next start on an eight byte boundary */
802 if (len < left) {
803 len &= ~7;
804 }
805 /*
806 * Allocate buffer.
807 */
808
809 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
810 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
811 IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
812 IPSTATS_MIB_FRAGFAILS);
813 err = -ENOMEM;
814 goto fail;
815 }
816
817 /*
818 * Set up data on packet
819 */
820
821 ip6_copy_metadata(frag, skb);
822 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
823 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
824 skb_reset_network_header(frag);
825 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
826 frag->transport_header = (frag->network_header + hlen +
827 sizeof(struct frag_hdr));
828
829 /*
830 * Charge the memory for the fragment to any owner
831 * it might possess
832 */
833 if (skb->sk)
834 skb_set_owner_w(frag, skb->sk);
835
836 /*
837 * Copy the packet header into the new buffer.
838 */
839 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
840
841 /*
842 * Build fragment header.
843 */
844 fh->nexthdr = nexthdr;
845 fh->reserved = 0;
846 if (!frag_id) {
847 ipv6_select_ident(skb, fh);
848 frag_id = fh->identification;
849 } else
850 fh->identification = frag_id;
851
852 /*
853 * Copy a block of the IP datagram.
854 */
855 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
856 BUG();
857 left -= len;
858
859 fh->frag_off = htons(offset);
860 if (left > 0)
861 fh->frag_off |= htons(IP6_MF);
862 ipv6_hdr(frag)->payload_len = htons(frag->len -
863 sizeof(struct ipv6hdr));
864
865 ptr += len;
866 offset += len;
867
868 /*
869 * Put this fragment into the sending queue.
870 */
871 err = output(frag);
872 if (err)
873 goto fail;
874
875 IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
876 IPSTATS_MIB_FRAGCREATES);
877 }
878 IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
879 IPSTATS_MIB_FRAGOKS);
880 kfree_skb(skb);
881 return err;
882
883 fail:
884 IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
885 IPSTATS_MIB_FRAGFAILS);
886 kfree_skb(skb);
887 return err;
888 }
889
890 static inline int ip6_rt_check(struct rt6key *rt_key,
891 struct in6_addr *fl_addr,
892 struct in6_addr *addr_cache)
893 {
894 return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
895 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
896 }
897
898 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
899 struct dst_entry *dst,
900 struct flowi *fl)
901 {
902 struct ipv6_pinfo *np = inet6_sk(sk);
903 struct rt6_info *rt = (struct rt6_info *)dst;
904
905 if (!dst)
906 goto out;
907
908 /* Yes, checking route validity in not connected
909 * case is not very simple. Take into account,
910 * that we do not support routing by source, TOS,
911 * and MSG_DONTROUTE --ANK (980726)
912 *
913 * 1. ip6_rt_check(): If route was host route,
914 * check that cached destination is current.
915 * If it is network route, we still may
916 * check its validity using saved pointer
917 * to the last used address: daddr_cache.
918 * We do not want to save whole address now,
919 * (because main consumer of this service
920 * is tcp, which has not this problem),
921 * so that the last trick works only on connected
922 * sockets.
923 * 2. oif also should be the same.
924 */
925 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
926 #ifdef CONFIG_IPV6_SUBTREES
927 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
928 #endif
929 (fl->oif && fl->oif != dst->dev->ifindex)) {
930 dst_release(dst);
931 dst = NULL;
932 }
933
934 out:
935 return dst;
936 }
937
938 static int ip6_dst_lookup_tail(struct sock *sk,
939 struct dst_entry **dst, struct flowi *fl)
940 {
941 int err;
942 struct net *net = sock_net(sk);
943
944 if (*dst == NULL)
945 *dst = ip6_route_output(net, sk, fl);
946
947 if ((err = (*dst)->error))
948 goto out_err_release;
949
950 if (ipv6_addr_any(&fl->fl6_src)) {
951 err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
952 &fl->fl6_dst,
953 sk ? inet6_sk(sk)->srcprefs : 0,
954 &fl->fl6_src);
955 if (err)
956 goto out_err_release;
957 }
958
959 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
960 /*
961 * Here if the dst entry we've looked up
962 * has a neighbour entry that is in the INCOMPLETE
963 * state and the src address from the flow is
964 * marked as OPTIMISTIC, we release the found
965 * dst entry and replace it instead with the
966 * dst entry of the nexthop router
967 */
968 if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
969 struct inet6_ifaddr *ifp;
970 struct flowi fl_gw;
971 int redirect;
972
973 ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
974 (*dst)->dev, 1);
975
976 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
977 if (ifp)
978 in6_ifa_put(ifp);
979
980 if (redirect) {
981 /*
982 * We need to get the dst entry for the
983 * default router instead
984 */
985 dst_release(*dst);
986 memcpy(&fl_gw, fl, sizeof(struct flowi));
987 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
988 *dst = ip6_route_output(net, sk, &fl_gw);
989 if ((err = (*dst)->error))
990 goto out_err_release;
991 }
992 }
993 #endif
994
995 return 0;
996
997 out_err_release:
998 if (err == -ENETUNREACH)
999 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1000 dst_release(*dst);
1001 *dst = NULL;
1002 return err;
1003 }
1004
1005 /**
1006 * ip6_dst_lookup - perform route lookup on flow
1007 * @sk: socket which provides route info
1008 * @dst: pointer to dst_entry * for result
1009 * @fl: flow to lookup
1010 *
1011 * This function performs a route lookup on the given flow.
1012 *
1013 * It returns zero on success, or a standard errno code on error.
1014 */
1015 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1016 {
1017 *dst = NULL;
1018 return ip6_dst_lookup_tail(sk, dst, fl);
1019 }
1020 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1021
1022 /**
1023 * ip6_sk_dst_lookup - perform socket cached route lookup on flow
1024 * @sk: socket which provides the dst cache and route info
1025 * @dst: pointer to dst_entry * for result
1026 * @fl: flow to lookup
1027 *
1028 * This function performs a route lookup on the given flow with the
1029 * possibility of using the cached route in the socket if it is valid.
1030 * It will take the socket dst lock when operating on the dst cache.
1031 * As a result, this function can only be used in process context.
1032 *
1033 * It returns zero on success, or a standard errno code on error.
1034 */
1035 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1036 {
1037 *dst = NULL;
1038 if (sk) {
1039 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1040 *dst = ip6_sk_dst_check(sk, *dst, fl);
1041 }
1042
1043 return ip6_dst_lookup_tail(sk, dst, fl);
1044 }
1045 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1046
1047 static inline int ip6_ufo_append_data(struct sock *sk,
1048 int getfrag(void *from, char *to, int offset, int len,
1049 int odd, struct sk_buff *skb),
1050 void *from, int length, int hh_len, int fragheaderlen,
1051 int transhdrlen, int mtu,unsigned int flags)
1052
1053 {
1054 struct sk_buff *skb;
1055 int err;
1056
1057 /* There is support for UDP large send offload by network
1058 * device, so create one single skb packet containing complete
1059 * udp datagram
1060 */
1061 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1062 skb = sock_alloc_send_skb(sk,
1063 hh_len + fragheaderlen + transhdrlen + 20,
1064 (flags & MSG_DONTWAIT), &err);
1065 if (skb == NULL)
1066 return -ENOMEM;
1067
1068 /* reserve space for Hardware header */
1069 skb_reserve(skb, hh_len);
1070
1071 /* create space for UDP/IP header */
1072 skb_put(skb,fragheaderlen + transhdrlen);
1073
1074 /* initialize network header pointer */
1075 skb_reset_network_header(skb);
1076
1077 /* initialize protocol header pointer */
1078 skb->transport_header = skb->network_header + fragheaderlen;
1079
1080 skb->ip_summed = CHECKSUM_PARTIAL;
1081 skb->csum = 0;
1082 sk->sk_sndmsg_off = 0;
1083 }
1084
1085 err = skb_append_datato_frags(sk,skb, getfrag, from,
1086 (length - transhdrlen));
1087 if (!err) {
1088 struct frag_hdr fhdr;
1089
1090 /* specify the length of each IP datagram fragment*/
1091 skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
1092 sizeof(struct frag_hdr);
1093 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1094 ipv6_select_ident(skb, &fhdr);
1095 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1096 __skb_queue_tail(&sk->sk_write_queue, skb);
1097
1098 return 0;
1099 }
1100 /* There is not enough support do UPD LSO,
1101 * so follow normal path
1102 */
1103 kfree_skb(skb);
1104
1105 return err;
1106 }
1107
1108 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1109 int offset, int len, int odd, struct sk_buff *skb),
1110 void *from, int length, int transhdrlen,
1111 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1112 struct rt6_info *rt, unsigned int flags)
1113 {
1114 struct inet_sock *inet = inet_sk(sk);
1115 struct ipv6_pinfo *np = inet6_sk(sk);
1116 struct sk_buff *skb;
1117 unsigned int maxfraglen, fragheaderlen;
1118 int exthdrlen;
1119 int hh_len;
1120 int mtu;
1121 int copy;
1122 int err;
1123 int offset = 0;
1124 int csummode = CHECKSUM_NONE;
1125
1126 if (flags&MSG_PROBE)
1127 return 0;
1128 if (skb_queue_empty(&sk->sk_write_queue)) {
1129 /*
1130 * setup for corking
1131 */
1132 if (opt) {
1133 if (np->cork.opt == NULL) {
1134 np->cork.opt = kmalloc(opt->tot_len,
1135 sk->sk_allocation);
1136 if (unlikely(np->cork.opt == NULL))
1137 return -ENOBUFS;
1138 } else if (np->cork.opt->tot_len < opt->tot_len) {
1139 printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
1140 return -EINVAL;
1141 }
1142 memcpy(np->cork.opt, opt, opt->tot_len);
1143 inet->cork.flags |= IPCORK_OPT;
1144 /* need source address above miyazawa*/
1145 }
1146 dst_hold(&rt->u.dst);
1147 inet->cork.dst = &rt->u.dst;
1148 inet->cork.fl = *fl;
1149 np->cork.hop_limit = hlimit;
1150 np->cork.tclass = tclass;
1151 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1152 rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1153 if (np->frag_size < mtu) {
1154 if (np->frag_size)
1155 mtu = np->frag_size;
1156 }
1157 inet->cork.fragsize = mtu;
1158 if (dst_allfrag(rt->u.dst.path))
1159 inet->cork.flags |= IPCORK_ALLFRAG;
1160 inet->cork.length = 0;
1161 sk->sk_sndmsg_page = NULL;
1162 sk->sk_sndmsg_off = 0;
1163 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
1164 rt->rt6i_nfheader_len;
1165 length += exthdrlen;
1166 transhdrlen += exthdrlen;
1167 } else {
1168 rt = (struct rt6_info *)inet->cork.dst;
1169 fl = &inet->cork.fl;
1170 if (inet->cork.flags & IPCORK_OPT)
1171 opt = np->cork.opt;
1172 transhdrlen = 0;
1173 exthdrlen = 0;
1174 mtu = inet->cork.fragsize;
1175 }
1176
1177 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1178
1179 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1180 (opt ? opt->opt_nflen : 0);
1181 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1182
1183 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1184 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1185 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1186 return -EMSGSIZE;
1187 }
1188 }
1189
1190 /*
1191 * Let's try using as much space as possible.
1192 * Use MTU if total length of the message fits into the MTU.
1193 * Otherwise, we need to reserve fragment header and
1194 * fragment alignment (= 8-15 octects, in total).
1195 *
1196 * Note that we may need to "move" the data from the tail of
1197 * of the buffer to the new fragment when we split
1198 * the message.
1199 *
1200 * FIXME: It may be fragmented into multiple chunks
1201 * at once if non-fragmentable extension headers
1202 * are too large.
1203 * --yoshfuji
1204 */
1205
1206 inet->cork.length += length;
1207 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1208 (rt->u.dst.dev->features & NETIF_F_UFO)) {
1209
1210 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1211 fragheaderlen, transhdrlen, mtu,
1212 flags);
1213 if (err)
1214 goto error;
1215 return 0;
1216 }
1217
1218 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1219 goto alloc_new_skb;
1220
1221 while (length > 0) {
1222 /* Check if the remaining data fits into current packet. */
1223 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1224 if (copy < length)
1225 copy = maxfraglen - skb->len;
1226
1227 if (copy <= 0) {
1228 char *data;
1229 unsigned int datalen;
1230 unsigned int fraglen;
1231 unsigned int fraggap;
1232 unsigned int alloclen;
1233 struct sk_buff *skb_prev;
1234 alloc_new_skb:
1235 skb_prev = skb;
1236
1237 /* There's no room in the current skb */
1238 if (skb_prev)
1239 fraggap = skb_prev->len - maxfraglen;
1240 else
1241 fraggap = 0;
1242
1243 /*
1244 * If remaining data exceeds the mtu,
1245 * we know we need more fragment(s).
1246 */
1247 datalen = length + fraggap;
1248 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1249 datalen = maxfraglen - fragheaderlen;
1250
1251 fraglen = datalen + fragheaderlen;
1252 if ((flags & MSG_MORE) &&
1253 !(rt->u.dst.dev->features&NETIF_F_SG))
1254 alloclen = mtu;
1255 else
1256 alloclen = datalen + fragheaderlen;
1257
1258 /*
1259 * The last fragment gets additional space at tail.
1260 * Note: we overallocate on fragments with MSG_MODE
1261 * because we have no idea if we're the last one.
1262 */
1263 if (datalen == length + fraggap)
1264 alloclen += rt->u.dst.trailer_len;
1265
1266 /*
1267 * We just reserve space for fragment header.
1268 * Note: this may be overallocation if the message
1269 * (without MSG_MORE) fits into the MTU.
1270 */
1271 alloclen += sizeof(struct frag_hdr);
1272
1273 if (transhdrlen) {
1274 skb = sock_alloc_send_skb(sk,
1275 alloclen + hh_len,
1276 (flags & MSG_DONTWAIT), &err);
1277 } else {
1278 skb = NULL;
1279 if (atomic_read(&sk->sk_wmem_alloc) <=
1280 2 * sk->sk_sndbuf)
1281 skb = sock_wmalloc(sk,
1282 alloclen + hh_len, 1,
1283 sk->sk_allocation);
1284 if (unlikely(skb == NULL))
1285 err = -ENOBUFS;
1286 }
1287 if (skb == NULL)
1288 goto error;
1289 /*
1290 * Fill in the control structures
1291 */
1292 skb->ip_summed = csummode;
1293 skb->csum = 0;
1294 /* reserve for fragmentation */
1295 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1296
1297 /*
1298 * Find where to start putting bytes
1299 */
1300 data = skb_put(skb, fraglen);
1301 skb_set_network_header(skb, exthdrlen);
1302 data += fragheaderlen;
1303 skb->transport_header = (skb->network_header +
1304 fragheaderlen);
1305 if (fraggap) {
1306 skb->csum = skb_copy_and_csum_bits(
1307 skb_prev, maxfraglen,
1308 data + transhdrlen, fraggap, 0);
1309 skb_prev->csum = csum_sub(skb_prev->csum,
1310 skb->csum);
1311 data += fraggap;
1312 pskb_trim_unique(skb_prev, maxfraglen);
1313 }
1314 copy = datalen - transhdrlen - fraggap;
1315 if (copy < 0) {
1316 err = -EINVAL;
1317 kfree_skb(skb);
1318 goto error;
1319 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1320 err = -EFAULT;
1321 kfree_skb(skb);
1322 goto error;
1323 }
1324
1325 offset += copy;
1326 length -= datalen - fraggap;
1327 transhdrlen = 0;
1328 exthdrlen = 0;
1329 csummode = CHECKSUM_NONE;
1330
1331 /*
1332 * Put the packet on the pending queue
1333 */
1334 __skb_queue_tail(&sk->sk_write_queue, skb);
1335 continue;
1336 }
1337
1338 if (copy > length)
1339 copy = length;
1340
1341 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1342 unsigned int off;
1343
1344 off = skb->len;
1345 if (getfrag(from, skb_put(skb, copy),
1346 offset, copy, off, skb) < 0) {
1347 __skb_trim(skb, off);
1348 err = -EFAULT;
1349 goto error;
1350 }
1351 } else {
1352 int i = skb_shinfo(skb)->nr_frags;
1353 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1354 struct page *page = sk->sk_sndmsg_page;
1355 int off = sk->sk_sndmsg_off;
1356 unsigned int left;
1357
1358 if (page && (left = PAGE_SIZE - off) > 0) {
1359 if (copy >= left)
1360 copy = left;
1361 if (page != frag->page) {
1362 if (i == MAX_SKB_FRAGS) {
1363 err = -EMSGSIZE;
1364 goto error;
1365 }
1366 get_page(page);
1367 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1368 frag = &skb_shinfo(skb)->frags[i];
1369 }
1370 } else if(i < MAX_SKB_FRAGS) {
1371 if (copy > PAGE_SIZE)
1372 copy = PAGE_SIZE;
1373 page = alloc_pages(sk->sk_allocation, 0);
1374 if (page == NULL) {
1375 err = -ENOMEM;
1376 goto error;
1377 }
1378 sk->sk_sndmsg_page = page;
1379 sk->sk_sndmsg_off = 0;
1380
1381 skb_fill_page_desc(skb, i, page, 0, 0);
1382 frag = &skb_shinfo(skb)->frags[i];
1383 } else {
1384 err = -EMSGSIZE;
1385 goto error;
1386 }
1387 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1388 err = -EFAULT;
1389 goto error;
1390 }
1391 sk->sk_sndmsg_off += copy;
1392 frag->size += copy;
1393 skb->len += copy;
1394 skb->data_len += copy;
1395 skb->truesize += copy;
1396 atomic_add(copy, &sk->sk_wmem_alloc);
1397 }
1398 offset += copy;
1399 length -= copy;
1400 }
1401 return 0;
1402 error:
1403 inet->cork.length -= length;
1404 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1405 return err;
1406 }
1407
1408 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1409 {
1410 inet->cork.flags &= ~IPCORK_OPT;
1411 kfree(np->cork.opt);
1412 np->cork.opt = NULL;
1413 if (inet->cork.dst) {
1414 dst_release(inet->cork.dst);
1415 inet->cork.dst = NULL;
1416 inet->cork.flags &= ~IPCORK_ALLFRAG;
1417 }
1418 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1419 }
1420
1421 int ip6_push_pending_frames(struct sock *sk)
1422 {
1423 struct sk_buff *skb, *tmp_skb;
1424 struct sk_buff **tail_skb;
1425 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1426 struct inet_sock *inet = inet_sk(sk);
1427 struct ipv6_pinfo *np = inet6_sk(sk);
1428 struct net *net = sock_net(sk);
1429 struct ipv6hdr *hdr;
1430 struct ipv6_txoptions *opt = np->cork.opt;
1431 struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1432 struct flowi *fl = &inet->cork.fl;
1433 unsigned char proto = fl->proto;
1434 int err = 0;
1435
1436 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1437 goto out;
1438 tail_skb = &(skb_shinfo(skb)->frag_list);
1439
1440 /* move skb->data to ip header from ext header */
1441 if (skb->data < skb_network_header(skb))
1442 __skb_pull(skb, skb_network_offset(skb));
1443 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1444 __skb_pull(tmp_skb, skb_network_header_len(skb));
1445 *tail_skb = tmp_skb;
1446 tail_skb = &(tmp_skb->next);
1447 skb->len += tmp_skb->len;
1448 skb->data_len += tmp_skb->len;
1449 skb->truesize += tmp_skb->truesize;
1450 __sock_put(tmp_skb->sk);
1451 tmp_skb->destructor = NULL;
1452 tmp_skb->sk = NULL;
1453 }
1454
1455 /* Allow local fragmentation. */
1456 if (np->pmtudisc < IPV6_PMTUDISC_DO)
1457 skb->local_df = 1;
1458
1459 ipv6_addr_copy(final_dst, &fl->fl6_dst);
1460 __skb_pull(skb, skb_network_header_len(skb));
1461 if (opt && opt->opt_flen)
1462 ipv6_push_frag_opts(skb, opt, &proto);
1463 if (opt && opt->opt_nflen)
1464 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1465
1466 skb_push(skb, sizeof(struct ipv6hdr));
1467 skb_reset_network_header(skb);
1468 hdr = ipv6_hdr(skb);
1469
1470 *(__be32*)hdr = fl->fl6_flowlabel |
1471 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1472
1473 hdr->hop_limit = np->cork.hop_limit;
1474 hdr->nexthdr = proto;
1475 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1476 ipv6_addr_copy(&hdr->daddr, final_dst);
1477
1478 skb->priority = sk->sk_priority;
1479 skb->mark = sk->sk_mark;
1480
1481 skb->dst = dst_clone(&rt->u.dst);
1482 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
1483 if (proto == IPPROTO_ICMPV6) {
1484 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1485
1486 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1487 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1488 }
1489
1490 err = ip6_local_out(skb);
1491 if (err) {
1492 if (err > 0)
1493 err = np->recverr ? net_xmit_errno(err) : 0;
1494 if (err)
1495 goto error;
1496 }
1497
1498 out:
1499 ip6_cork_release(inet, np);
1500 return err;
1501 error:
1502 goto out;
1503 }
1504
1505 void ip6_flush_pending_frames(struct sock *sk)
1506 {
1507 struct sk_buff *skb;
1508
1509 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1510 if (skb->dst)
1511 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb->dst),
1512 IPSTATS_MIB_OUTDISCARDS);
1513 kfree_skb(skb);
1514 }
1515
1516 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1517 }
This page took 0.101015 seconds and 5 git commands to generate.