[IPV4,IPV6]: Share cork.rt between IPv4 and IPv6.
[deliverable/linux.git] / net / ipv6 / ip6_output.c
1 /*
2 * IPv6 output functions
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $
9 *
10 * Based on linux/net/ipv4/ip_output.c
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 * A.N.Kuznetsov : airthmetics in fragmentation.
19 * extension headers are implemented.
20 * route changes now work.
21 * ip6_forward does not confuse sniffers.
22 * etc.
23 *
24 * H. von Brand : Added missing #include <linux/string.h>
25 * Imran Patel : frag id should be in NBO
26 * Kazunori MIYAZAWA @USAGI
27 * : add ip6_append_data and related functions
28 * for datagram xmit
29 */
30
31 #include <linux/errno.h>
32 #include <linux/kernel.h>
33 #include <linux/string.h>
34 #include <linux/socket.h>
35 #include <linux/net.h>
36 #include <linux/netdevice.h>
37 #include <linux/if_arp.h>
38 #include <linux/in6.h>
39 #include <linux/tcp.h>
40 #include <linux/route.h>
41 #include <linux/module.h>
42
43 #include <linux/netfilter.h>
44 #include <linux/netfilter_ipv6.h>
45
46 #include <net/sock.h>
47 #include <net/snmp.h>
48
49 #include <net/ipv6.h>
50 #include <net/ndisc.h>
51 #include <net/protocol.h>
52 #include <net/ip6_route.h>
53 #include <net/addrconf.h>
54 #include <net/rawv6.h>
55 #include <net/icmp.h>
56 #include <net/xfrm.h>
57 #include <net/checksum.h>
58
59 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
60
61 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
62 {
63 static u32 ipv6_fragmentation_id = 1;
64 static DEFINE_SPINLOCK(ip6_id_lock);
65
66 spin_lock_bh(&ip6_id_lock);
67 fhdr->identification = htonl(ipv6_fragmentation_id);
68 if (++ipv6_fragmentation_id == 0)
69 ipv6_fragmentation_id = 1;
70 spin_unlock_bh(&ip6_id_lock);
71 }
72
73 int __ip6_local_out(struct sk_buff *skb)
74 {
75 int len;
76
77 len = skb->len - sizeof(struct ipv6hdr);
78 if (len > IPV6_MAXPLEN)
79 len = 0;
80 ipv6_hdr(skb)->payload_len = htons(len);
81
82 return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev,
83 dst_output);
84 }
85
86 int ip6_local_out(struct sk_buff *skb)
87 {
88 int err;
89
90 err = __ip6_local_out(skb);
91 if (likely(err == 1))
92 err = dst_output(skb);
93
94 return err;
95 }
96 EXPORT_SYMBOL_GPL(ip6_local_out);
97
98 static int ip6_output_finish(struct sk_buff *skb)
99 {
100 struct dst_entry *dst = skb->dst;
101
102 if (dst->hh)
103 return neigh_hh_output(dst->hh, skb);
104 else if (dst->neighbour)
105 return dst->neighbour->output(skb);
106
107 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
108 kfree_skb(skb);
109 return -EINVAL;
110
111 }
112
113 /* dev_loopback_xmit for use with netfilter. */
114 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
115 {
116 skb_reset_mac_header(newskb);
117 __skb_pull(newskb, skb_network_offset(newskb));
118 newskb->pkt_type = PACKET_LOOPBACK;
119 newskb->ip_summed = CHECKSUM_UNNECESSARY;
120 BUG_TRAP(newskb->dst);
121
122 netif_rx(newskb);
123 return 0;
124 }
125
126
127 static int ip6_output2(struct sk_buff *skb)
128 {
129 struct dst_entry *dst = skb->dst;
130 struct net_device *dev = dst->dev;
131
132 skb->protocol = htons(ETH_P_IPV6);
133 skb->dev = dev;
134
135 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
136 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
137 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
138
139 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
140 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
141 &ipv6_hdr(skb)->saddr)) {
142 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
143
144 /* Do not check for IFF_ALLMULTI; multicast routing
145 is not supported in any case.
146 */
147 if (newskb)
148 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
149 NULL, newskb->dev,
150 ip6_dev_loopback_xmit);
151
152 if (ipv6_hdr(skb)->hop_limit == 0) {
153 IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
154 kfree_skb(skb);
155 return 0;
156 }
157 }
158
159 IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS);
160 }
161
162 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
163 ip6_output_finish);
164 }
165
166 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
167 {
168 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
169
170 return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
171 skb->dst->dev->mtu : dst_mtu(skb->dst);
172 }
173
174 int ip6_output(struct sk_buff *skb)
175 {
176 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
177 dst_allfrag(skb->dst))
178 return ip6_fragment(skb, ip6_output2);
179 else
180 return ip6_output2(skb);
181 }
182
183 /*
184 * xmit an sk_buff (used by TCP)
185 */
186
187 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
188 struct ipv6_txoptions *opt, int ipfragok)
189 {
190 struct ipv6_pinfo *np = inet6_sk(sk);
191 struct in6_addr *first_hop = &fl->fl6_dst;
192 struct dst_entry *dst = skb->dst;
193 struct ipv6hdr *hdr;
194 u8 proto = fl->proto;
195 int seg_len = skb->len;
196 int hlimit, tclass;
197 u32 mtu;
198
199 if (opt) {
200 unsigned int head_room;
201
202 /* First: exthdrs may take lots of space (~8K for now)
203 MAX_HEADER is not enough.
204 */
205 head_room = opt->opt_nflen + opt->opt_flen;
206 seg_len += head_room;
207 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
208
209 if (skb_headroom(skb) < head_room) {
210 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
211 if (skb2 == NULL) {
212 IP6_INC_STATS(ip6_dst_idev(skb->dst),
213 IPSTATS_MIB_OUTDISCARDS);
214 kfree_skb(skb);
215 return -ENOBUFS;
216 }
217 kfree_skb(skb);
218 skb = skb2;
219 if (sk)
220 skb_set_owner_w(skb, sk);
221 }
222 if (opt->opt_flen)
223 ipv6_push_frag_opts(skb, opt, &proto);
224 if (opt->opt_nflen)
225 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
226 }
227
228 skb_push(skb, sizeof(struct ipv6hdr));
229 skb_reset_network_header(skb);
230 hdr = ipv6_hdr(skb);
231
232 /*
233 * Fill in the IPv6 header
234 */
235
236 hlimit = -1;
237 if (np)
238 hlimit = np->hop_limit;
239 if (hlimit < 0)
240 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
241 if (hlimit < 0)
242 hlimit = ipv6_get_hoplimit(dst->dev);
243
244 tclass = -1;
245 if (np)
246 tclass = np->tclass;
247 if (tclass < 0)
248 tclass = 0;
249
250 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
251
252 hdr->payload_len = htons(seg_len);
253 hdr->nexthdr = proto;
254 hdr->hop_limit = hlimit;
255
256 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
257 ipv6_addr_copy(&hdr->daddr, first_hop);
258
259 skb->priority = sk->sk_priority;
260 skb->mark = sk->sk_mark;
261
262 mtu = dst_mtu(dst);
263 if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) {
264 IP6_INC_STATS(ip6_dst_idev(skb->dst),
265 IPSTATS_MIB_OUTREQUESTS);
266 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
267 dst_output);
268 }
269
270 if (net_ratelimit())
271 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
272 skb->dev = dst->dev;
273 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
274 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
275 kfree_skb(skb);
276 return -EMSGSIZE;
277 }
278
279 EXPORT_SYMBOL(ip6_xmit);
280
281 /*
282 * To avoid extra problems ND packets are send through this
283 * routine. It's code duplication but I really want to avoid
284 * extra checks since ipv6_build_header is used by TCP (which
285 * is for us performance critical)
286 */
287
288 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
289 struct in6_addr *saddr, struct in6_addr *daddr,
290 int proto, int len)
291 {
292 struct ipv6_pinfo *np = inet6_sk(sk);
293 struct ipv6hdr *hdr;
294 int totlen;
295
296 skb->protocol = htons(ETH_P_IPV6);
297 skb->dev = dev;
298
299 totlen = len + sizeof(struct ipv6hdr);
300
301 skb_reset_network_header(skb);
302 skb_put(skb, sizeof(struct ipv6hdr));
303 hdr = ipv6_hdr(skb);
304
305 *(__be32*)hdr = htonl(0x60000000);
306
307 hdr->payload_len = htons(len);
308 hdr->nexthdr = proto;
309 hdr->hop_limit = np->hop_limit;
310
311 ipv6_addr_copy(&hdr->saddr, saddr);
312 ipv6_addr_copy(&hdr->daddr, daddr);
313
314 return 0;
315 }
316
317 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
318 {
319 struct ip6_ra_chain *ra;
320 struct sock *last = NULL;
321
322 read_lock(&ip6_ra_lock);
323 for (ra = ip6_ra_chain; ra; ra = ra->next) {
324 struct sock *sk = ra->sk;
325 if (sk && ra->sel == sel &&
326 (!sk->sk_bound_dev_if ||
327 sk->sk_bound_dev_if == skb->dev->ifindex)) {
328 if (last) {
329 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
330 if (skb2)
331 rawv6_rcv(last, skb2);
332 }
333 last = sk;
334 }
335 }
336
337 if (last) {
338 rawv6_rcv(last, skb);
339 read_unlock(&ip6_ra_lock);
340 return 1;
341 }
342 read_unlock(&ip6_ra_lock);
343 return 0;
344 }
345
346 static int ip6_forward_proxy_check(struct sk_buff *skb)
347 {
348 struct ipv6hdr *hdr = ipv6_hdr(skb);
349 u8 nexthdr = hdr->nexthdr;
350 int offset;
351
352 if (ipv6_ext_hdr(nexthdr)) {
353 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
354 if (offset < 0)
355 return 0;
356 } else
357 offset = sizeof(struct ipv6hdr);
358
359 if (nexthdr == IPPROTO_ICMPV6) {
360 struct icmp6hdr *icmp6;
361
362 if (!pskb_may_pull(skb, (skb_network_header(skb) +
363 offset + 1 - skb->data)))
364 return 0;
365
366 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
367
368 switch (icmp6->icmp6_type) {
369 case NDISC_ROUTER_SOLICITATION:
370 case NDISC_ROUTER_ADVERTISEMENT:
371 case NDISC_NEIGHBOUR_SOLICITATION:
372 case NDISC_NEIGHBOUR_ADVERTISEMENT:
373 case NDISC_REDIRECT:
374 /* For reaction involving unicast neighbor discovery
375 * message destined to the proxied address, pass it to
376 * input function.
377 */
378 return 1;
379 default:
380 break;
381 }
382 }
383
384 /*
385 * The proxying router can't forward traffic sent to a link-local
386 * address, so signal the sender and discard the packet. This
387 * behavior is clarified by the MIPv6 specification.
388 */
389 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
390 dst_link_failure(skb);
391 return -1;
392 }
393
394 return 0;
395 }
396
397 static inline int ip6_forward_finish(struct sk_buff *skb)
398 {
399 return dst_output(skb);
400 }
401
402 int ip6_forward(struct sk_buff *skb)
403 {
404 struct dst_entry *dst = skb->dst;
405 struct ipv6hdr *hdr = ipv6_hdr(skb);
406 struct inet6_skb_parm *opt = IP6CB(skb);
407 struct net *net = dst->dev->nd_net;
408
409 if (ipv6_devconf.forwarding == 0)
410 goto error;
411
412 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
413 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
414 goto drop;
415 }
416
417 skb_forward_csum(skb);
418
419 /*
420 * We DO NOT make any processing on
421 * RA packets, pushing them to user level AS IS
422 * without ane WARRANTY that application will be able
423 * to interpret them. The reason is that we
424 * cannot make anything clever here.
425 *
426 * We are not end-node, so that if packet contains
427 * AH/ESP, we cannot make anything.
428 * Defragmentation also would be mistake, RA packets
429 * cannot be fragmented, because there is no warranty
430 * that different fragments will go along one path. --ANK
431 */
432 if (opt->ra) {
433 u8 *ptr = skb_network_header(skb) + opt->ra;
434 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
435 return 0;
436 }
437
438 /*
439 * check and decrement ttl
440 */
441 if (hdr->hop_limit <= 1) {
442 /* Force OUTPUT device used as source address */
443 skb->dev = dst->dev;
444 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
445 0, skb->dev);
446 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
447
448 kfree_skb(skb);
449 return -ETIMEDOUT;
450 }
451
452 /* XXX: idev->cnf.proxy_ndp? */
453 if (ipv6_devconf.proxy_ndp &&
454 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
455 int proxied = ip6_forward_proxy_check(skb);
456 if (proxied > 0)
457 return ip6_input(skb);
458 else if (proxied < 0) {
459 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
460 goto drop;
461 }
462 }
463
464 if (!xfrm6_route_forward(skb)) {
465 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
466 goto drop;
467 }
468 dst = skb->dst;
469
470 /* IPv6 specs say nothing about it, but it is clear that we cannot
471 send redirects to source routed frames.
472 We don't send redirects to frames decapsulated from IPsec.
473 */
474 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
475 !skb->sp) {
476 struct in6_addr *target = NULL;
477 struct rt6_info *rt;
478 struct neighbour *n = dst->neighbour;
479
480 /*
481 * incoming and outgoing devices are the same
482 * send a redirect.
483 */
484
485 rt = (struct rt6_info *) dst;
486 if ((rt->rt6i_flags & RTF_GATEWAY))
487 target = (struct in6_addr*)&n->primary_key;
488 else
489 target = &hdr->daddr;
490
491 /* Limit redirects both by destination (here)
492 and by source (inside ndisc_send_redirect)
493 */
494 if (xrlim_allow(dst, 1*HZ))
495 ndisc_send_redirect(skb, n, target);
496 } else {
497 int addrtype = ipv6_addr_type(&hdr->saddr);
498
499 /* This check is security critical. */
500 if (addrtype & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK))
501 goto error;
502 if (addrtype & IPV6_ADDR_LINKLOCAL) {
503 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
504 ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
505 goto error;
506 }
507 }
508
509 if (skb->len > dst_mtu(dst)) {
510 /* Again, force OUTPUT device used as source address */
511 skb->dev = dst->dev;
512 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
513 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
514 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
515 kfree_skb(skb);
516 return -EMSGSIZE;
517 }
518
519 if (skb_cow(skb, dst->dev->hard_header_len)) {
520 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
521 goto drop;
522 }
523
524 hdr = ipv6_hdr(skb);
525
526 /* Mangling hops number delayed to point after skb COW */
527
528 hdr->hop_limit--;
529
530 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
531 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
532 ip6_forward_finish);
533
534 error:
535 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
536 drop:
537 kfree_skb(skb);
538 return -EINVAL;
539 }
540
541 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
542 {
543 to->pkt_type = from->pkt_type;
544 to->priority = from->priority;
545 to->protocol = from->protocol;
546 dst_release(to->dst);
547 to->dst = dst_clone(from->dst);
548 to->dev = from->dev;
549 to->mark = from->mark;
550
551 #ifdef CONFIG_NET_SCHED
552 to->tc_index = from->tc_index;
553 #endif
554 nf_copy(to, from);
555 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
556 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
557 to->nf_trace = from->nf_trace;
558 #endif
559 skb_copy_secmark(to, from);
560 }
561
562 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
563 {
564 u16 offset = sizeof(struct ipv6hdr);
565 struct ipv6_opt_hdr *exthdr =
566 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
567 unsigned int packet_len = skb->tail - skb->network_header;
568 int found_rhdr = 0;
569 *nexthdr = &ipv6_hdr(skb)->nexthdr;
570
571 while (offset + 1 <= packet_len) {
572
573 switch (**nexthdr) {
574
575 case NEXTHDR_HOP:
576 break;
577 case NEXTHDR_ROUTING:
578 found_rhdr = 1;
579 break;
580 case NEXTHDR_DEST:
581 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
582 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
583 break;
584 #endif
585 if (found_rhdr)
586 return offset;
587 break;
588 default :
589 return offset;
590 }
591
592 offset += ipv6_optlen(exthdr);
593 *nexthdr = &exthdr->nexthdr;
594 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
595 offset);
596 }
597
598 return offset;
599 }
600
601 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
602 {
603 struct net_device *dev;
604 struct sk_buff *frag;
605 struct rt6_info *rt = (struct rt6_info*)skb->dst;
606 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
607 struct ipv6hdr *tmp_hdr;
608 struct frag_hdr *fh;
609 unsigned int mtu, hlen, left, len;
610 __be32 frag_id = 0;
611 int ptr, offset = 0, err=0;
612 u8 *prevhdr, nexthdr = 0;
613
614 dev = rt->u.dst.dev;
615 hlen = ip6_find_1stfragopt(skb, &prevhdr);
616 nexthdr = *prevhdr;
617
618 mtu = ip6_skb_dst_mtu(skb);
619
620 /* We must not fragment if the socket is set to force MTU discovery
621 * or if the skb it not generated by a local socket. (This last
622 * check should be redundant, but it's free.)
623 */
624 if (!skb->local_df) {
625 skb->dev = skb->dst->dev;
626 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
627 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
628 kfree_skb(skb);
629 return -EMSGSIZE;
630 }
631
632 if (np && np->frag_size < mtu) {
633 if (np->frag_size)
634 mtu = np->frag_size;
635 }
636 mtu -= hlen + sizeof(struct frag_hdr);
637
638 if (skb_shinfo(skb)->frag_list) {
639 int first_len = skb_pagelen(skb);
640 int truesizes = 0;
641
642 if (first_len - hlen > mtu ||
643 ((first_len - hlen) & 7) ||
644 skb_cloned(skb))
645 goto slow_path;
646
647 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
648 /* Correct geometry. */
649 if (frag->len > mtu ||
650 ((frag->len & 7) && frag->next) ||
651 skb_headroom(frag) < hlen)
652 goto slow_path;
653
654 /* Partially cloned skb? */
655 if (skb_shared(frag))
656 goto slow_path;
657
658 BUG_ON(frag->sk);
659 if (skb->sk) {
660 sock_hold(skb->sk);
661 frag->sk = skb->sk;
662 frag->destructor = sock_wfree;
663 truesizes += frag->truesize;
664 }
665 }
666
667 err = 0;
668 offset = 0;
669 frag = skb_shinfo(skb)->frag_list;
670 skb_shinfo(skb)->frag_list = NULL;
671 /* BUILD HEADER */
672
673 *prevhdr = NEXTHDR_FRAGMENT;
674 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
675 if (!tmp_hdr) {
676 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
677 return -ENOMEM;
678 }
679
680 __skb_pull(skb, hlen);
681 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
682 __skb_push(skb, hlen);
683 skb_reset_network_header(skb);
684 memcpy(skb_network_header(skb), tmp_hdr, hlen);
685
686 ipv6_select_ident(skb, fh);
687 fh->nexthdr = nexthdr;
688 fh->reserved = 0;
689 fh->frag_off = htons(IP6_MF);
690 frag_id = fh->identification;
691
692 first_len = skb_pagelen(skb);
693 skb->data_len = first_len - skb_headlen(skb);
694 skb->truesize -= truesizes;
695 skb->len = first_len;
696 ipv6_hdr(skb)->payload_len = htons(first_len -
697 sizeof(struct ipv6hdr));
698
699 dst_hold(&rt->u.dst);
700
701 for (;;) {
702 /* Prepare header of the next frame,
703 * before previous one went down. */
704 if (frag) {
705 frag->ip_summed = CHECKSUM_NONE;
706 skb_reset_transport_header(frag);
707 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
708 __skb_push(frag, hlen);
709 skb_reset_network_header(frag);
710 memcpy(skb_network_header(frag), tmp_hdr,
711 hlen);
712 offset += skb->len - hlen - sizeof(struct frag_hdr);
713 fh->nexthdr = nexthdr;
714 fh->reserved = 0;
715 fh->frag_off = htons(offset);
716 if (frag->next != NULL)
717 fh->frag_off |= htons(IP6_MF);
718 fh->identification = frag_id;
719 ipv6_hdr(frag)->payload_len =
720 htons(frag->len -
721 sizeof(struct ipv6hdr));
722 ip6_copy_metadata(frag, skb);
723 }
724
725 err = output(skb);
726 if(!err)
727 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGCREATES);
728
729 if (err || !frag)
730 break;
731
732 skb = frag;
733 frag = skb->next;
734 skb->next = NULL;
735 }
736
737 kfree(tmp_hdr);
738
739 if (err == 0) {
740 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGOKS);
741 dst_release(&rt->u.dst);
742 return 0;
743 }
744
745 while (frag) {
746 skb = frag->next;
747 kfree_skb(frag);
748 frag = skb;
749 }
750
751 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGFAILS);
752 dst_release(&rt->u.dst);
753 return err;
754 }
755
756 slow_path:
757 left = skb->len - hlen; /* Space per frame */
758 ptr = hlen; /* Where to start from */
759
760 /*
761 * Fragment the datagram.
762 */
763
764 *prevhdr = NEXTHDR_FRAGMENT;
765
766 /*
767 * Keep copying data until we run out.
768 */
769 while(left > 0) {
770 len = left;
771 /* IF: it doesn't fit, use 'mtu' - the data space left */
772 if (len > mtu)
773 len = mtu;
774 /* IF: we are not sending upto and including the packet end
775 then align the next start on an eight byte boundary */
776 if (len < left) {
777 len &= ~7;
778 }
779 /*
780 * Allocate buffer.
781 */
782
783 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
784 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
785 IP6_INC_STATS(ip6_dst_idev(skb->dst),
786 IPSTATS_MIB_FRAGFAILS);
787 err = -ENOMEM;
788 goto fail;
789 }
790
791 /*
792 * Set up data on packet
793 */
794
795 ip6_copy_metadata(frag, skb);
796 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
797 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
798 skb_reset_network_header(frag);
799 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
800 frag->transport_header = (frag->network_header + hlen +
801 sizeof(struct frag_hdr));
802
803 /*
804 * Charge the memory for the fragment to any owner
805 * it might possess
806 */
807 if (skb->sk)
808 skb_set_owner_w(frag, skb->sk);
809
810 /*
811 * Copy the packet header into the new buffer.
812 */
813 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
814
815 /*
816 * Build fragment header.
817 */
818 fh->nexthdr = nexthdr;
819 fh->reserved = 0;
820 if (!frag_id) {
821 ipv6_select_ident(skb, fh);
822 frag_id = fh->identification;
823 } else
824 fh->identification = frag_id;
825
826 /*
827 * Copy a block of the IP datagram.
828 */
829 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
830 BUG();
831 left -= len;
832
833 fh->frag_off = htons(offset);
834 if (left > 0)
835 fh->frag_off |= htons(IP6_MF);
836 ipv6_hdr(frag)->payload_len = htons(frag->len -
837 sizeof(struct ipv6hdr));
838
839 ptr += len;
840 offset += len;
841
842 /*
843 * Put this fragment into the sending queue.
844 */
845 err = output(frag);
846 if (err)
847 goto fail;
848
849 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGCREATES);
850 }
851 IP6_INC_STATS(ip6_dst_idev(skb->dst),
852 IPSTATS_MIB_FRAGOKS);
853 kfree_skb(skb);
854 return err;
855
856 fail:
857 IP6_INC_STATS(ip6_dst_idev(skb->dst),
858 IPSTATS_MIB_FRAGFAILS);
859 kfree_skb(skb);
860 return err;
861 }
862
863 static inline int ip6_rt_check(struct rt6key *rt_key,
864 struct in6_addr *fl_addr,
865 struct in6_addr *addr_cache)
866 {
867 return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
868 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
869 }
870
871 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
872 struct dst_entry *dst,
873 struct flowi *fl)
874 {
875 struct ipv6_pinfo *np = inet6_sk(sk);
876 struct rt6_info *rt = (struct rt6_info *)dst;
877
878 if (!dst)
879 goto out;
880
881 /* Yes, checking route validity in not connected
882 * case is not very simple. Take into account,
883 * that we do not support routing by source, TOS,
884 * and MSG_DONTROUTE --ANK (980726)
885 *
886 * 1. ip6_rt_check(): If route was host route,
887 * check that cached destination is current.
888 * If it is network route, we still may
889 * check its validity using saved pointer
890 * to the last used address: daddr_cache.
891 * We do not want to save whole address now,
892 * (because main consumer of this service
893 * is tcp, which has not this problem),
894 * so that the last trick works only on connected
895 * sockets.
896 * 2. oif also should be the same.
897 */
898 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
899 #ifdef CONFIG_IPV6_SUBTREES
900 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
901 #endif
902 (fl->oif && fl->oif != dst->dev->ifindex)) {
903 dst_release(dst);
904 dst = NULL;
905 }
906
907 out:
908 return dst;
909 }
910
911 static int ip6_dst_lookup_tail(struct sock *sk,
912 struct dst_entry **dst, struct flowi *fl)
913 {
914 int err;
915 struct net *net = sk->sk_net;
916
917 if (*dst == NULL)
918 *dst = ip6_route_output(net, sk, fl);
919
920 if ((err = (*dst)->error))
921 goto out_err_release;
922
923 if (ipv6_addr_any(&fl->fl6_src)) {
924 err = ipv6_dev_get_saddr(ip6_dst_idev(*dst)->dev,
925 &fl->fl6_dst, &fl->fl6_src);
926 if (err)
927 goto out_err_release;
928 }
929
930 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
931 /*
932 * Here if the dst entry we've looked up
933 * has a neighbour entry that is in the INCOMPLETE
934 * state and the src address from the flow is
935 * marked as OPTIMISTIC, we release the found
936 * dst entry and replace it instead with the
937 * dst entry of the nexthop router
938 */
939 if (!((*dst)->neighbour->nud_state & NUD_VALID)) {
940 struct inet6_ifaddr *ifp;
941 struct flowi fl_gw;
942 int redirect;
943
944 ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
945 (*dst)->dev, 1);
946
947 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
948 if (ifp)
949 in6_ifa_put(ifp);
950
951 if (redirect) {
952 /*
953 * We need to get the dst entry for the
954 * default router instead
955 */
956 dst_release(*dst);
957 memcpy(&fl_gw, fl, sizeof(struct flowi));
958 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
959 *dst = ip6_route_output(net, sk, &fl_gw);
960 if ((err = (*dst)->error))
961 goto out_err_release;
962 }
963 }
964 #endif
965
966 return 0;
967
968 out_err_release:
969 if (err == -ENETUNREACH)
970 IP6_INC_STATS_BH(NULL, IPSTATS_MIB_OUTNOROUTES);
971 dst_release(*dst);
972 *dst = NULL;
973 return err;
974 }
975
976 /**
977 * ip6_dst_lookup - perform route lookup on flow
978 * @sk: socket which provides route info
979 * @dst: pointer to dst_entry * for result
980 * @fl: flow to lookup
981 *
982 * This function performs a route lookup on the given flow.
983 *
984 * It returns zero on success, or a standard errno code on error.
985 */
986 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
987 {
988 *dst = NULL;
989 return ip6_dst_lookup_tail(sk, dst, fl);
990 }
991 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
992
993 /**
994 * ip6_sk_dst_lookup - perform socket cached route lookup on flow
995 * @sk: socket which provides the dst cache and route info
996 * @dst: pointer to dst_entry * for result
997 * @fl: flow to lookup
998 *
999 * This function performs a route lookup on the given flow with the
1000 * possibility of using the cached route in the socket if it is valid.
1001 * It will take the socket dst lock when operating on the dst cache.
1002 * As a result, this function can only be used in process context.
1003 *
1004 * It returns zero on success, or a standard errno code on error.
1005 */
1006 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1007 {
1008 *dst = NULL;
1009 if (sk) {
1010 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1011 *dst = ip6_sk_dst_check(sk, *dst, fl);
1012 }
1013
1014 return ip6_dst_lookup_tail(sk, dst, fl);
1015 }
1016 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1017
1018 static inline int ip6_ufo_append_data(struct sock *sk,
1019 int getfrag(void *from, char *to, int offset, int len,
1020 int odd, struct sk_buff *skb),
1021 void *from, int length, int hh_len, int fragheaderlen,
1022 int transhdrlen, int mtu,unsigned int flags)
1023
1024 {
1025 struct sk_buff *skb;
1026 int err;
1027
1028 /* There is support for UDP large send offload by network
1029 * device, so create one single skb packet containing complete
1030 * udp datagram
1031 */
1032 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1033 skb = sock_alloc_send_skb(sk,
1034 hh_len + fragheaderlen + transhdrlen + 20,
1035 (flags & MSG_DONTWAIT), &err);
1036 if (skb == NULL)
1037 return -ENOMEM;
1038
1039 /* reserve space for Hardware header */
1040 skb_reserve(skb, hh_len);
1041
1042 /* create space for UDP/IP header */
1043 skb_put(skb,fragheaderlen + transhdrlen);
1044
1045 /* initialize network header pointer */
1046 skb_reset_network_header(skb);
1047
1048 /* initialize protocol header pointer */
1049 skb->transport_header = skb->network_header + fragheaderlen;
1050
1051 skb->ip_summed = CHECKSUM_PARTIAL;
1052 skb->csum = 0;
1053 sk->sk_sndmsg_off = 0;
1054 }
1055
1056 err = skb_append_datato_frags(sk,skb, getfrag, from,
1057 (length - transhdrlen));
1058 if (!err) {
1059 struct frag_hdr fhdr;
1060
1061 /* specify the length of each IP datagram fragment*/
1062 skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
1063 sizeof(struct frag_hdr);
1064 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1065 ipv6_select_ident(skb, &fhdr);
1066 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1067 __skb_queue_tail(&sk->sk_write_queue, skb);
1068
1069 return 0;
1070 }
1071 /* There is not enough support do UPD LSO,
1072 * so follow normal path
1073 */
1074 kfree_skb(skb);
1075
1076 return err;
1077 }
1078
1079 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1080 int offset, int len, int odd, struct sk_buff *skb),
1081 void *from, int length, int transhdrlen,
1082 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1083 struct rt6_info *rt, unsigned int flags)
1084 {
1085 struct inet_sock *inet = inet_sk(sk);
1086 struct ipv6_pinfo *np = inet6_sk(sk);
1087 struct sk_buff *skb;
1088 unsigned int maxfraglen, fragheaderlen;
1089 int exthdrlen;
1090 int hh_len;
1091 int mtu;
1092 int copy;
1093 int err;
1094 int offset = 0;
1095 int csummode = CHECKSUM_NONE;
1096
1097 if (flags&MSG_PROBE)
1098 return 0;
1099 if (skb_queue_empty(&sk->sk_write_queue)) {
1100 /*
1101 * setup for corking
1102 */
1103 if (opt) {
1104 if (np->cork.opt == NULL) {
1105 np->cork.opt = kmalloc(opt->tot_len,
1106 sk->sk_allocation);
1107 if (unlikely(np->cork.opt == NULL))
1108 return -ENOBUFS;
1109 } else if (np->cork.opt->tot_len < opt->tot_len) {
1110 printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
1111 return -EINVAL;
1112 }
1113 memcpy(np->cork.opt, opt, opt->tot_len);
1114 inet->cork.flags |= IPCORK_OPT;
1115 /* need source address above miyazawa*/
1116 }
1117 dst_hold(&rt->u.dst);
1118 inet->cork.dst = &rt->u.dst;
1119 inet->cork.fl = *fl;
1120 np->cork.hop_limit = hlimit;
1121 np->cork.tclass = tclass;
1122 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1123 rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1124 if (np->frag_size < mtu) {
1125 if (np->frag_size)
1126 mtu = np->frag_size;
1127 }
1128 inet->cork.fragsize = mtu;
1129 if (dst_allfrag(rt->u.dst.path))
1130 inet->cork.flags |= IPCORK_ALLFRAG;
1131 inet->cork.length = 0;
1132 sk->sk_sndmsg_page = NULL;
1133 sk->sk_sndmsg_off = 0;
1134 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
1135 rt->rt6i_nfheader_len;
1136 length += exthdrlen;
1137 transhdrlen += exthdrlen;
1138 } else {
1139 rt = (struct rt6_info *)inet->cork.dst;
1140 fl = &inet->cork.fl;
1141 if (inet->cork.flags & IPCORK_OPT)
1142 opt = np->cork.opt;
1143 transhdrlen = 0;
1144 exthdrlen = 0;
1145 mtu = inet->cork.fragsize;
1146 }
1147
1148 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1149
1150 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1151 (opt ? opt->opt_nflen : 0);
1152 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1153
1154 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1155 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1156 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1157 return -EMSGSIZE;
1158 }
1159 }
1160
1161 /*
1162 * Let's try using as much space as possible.
1163 * Use MTU if total length of the message fits into the MTU.
1164 * Otherwise, we need to reserve fragment header and
1165 * fragment alignment (= 8-15 octects, in total).
1166 *
1167 * Note that we may need to "move" the data from the tail of
1168 * of the buffer to the new fragment when we split
1169 * the message.
1170 *
1171 * FIXME: It may be fragmented into multiple chunks
1172 * at once if non-fragmentable extension headers
1173 * are too large.
1174 * --yoshfuji
1175 */
1176
1177 inet->cork.length += length;
1178 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1179 (rt->u.dst.dev->features & NETIF_F_UFO)) {
1180
1181 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1182 fragheaderlen, transhdrlen, mtu,
1183 flags);
1184 if (err)
1185 goto error;
1186 return 0;
1187 }
1188
1189 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1190 goto alloc_new_skb;
1191
1192 while (length > 0) {
1193 /* Check if the remaining data fits into current packet. */
1194 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1195 if (copy < length)
1196 copy = maxfraglen - skb->len;
1197
1198 if (copy <= 0) {
1199 char *data;
1200 unsigned int datalen;
1201 unsigned int fraglen;
1202 unsigned int fraggap;
1203 unsigned int alloclen;
1204 struct sk_buff *skb_prev;
1205 alloc_new_skb:
1206 skb_prev = skb;
1207
1208 /* There's no room in the current skb */
1209 if (skb_prev)
1210 fraggap = skb_prev->len - maxfraglen;
1211 else
1212 fraggap = 0;
1213
1214 /*
1215 * If remaining data exceeds the mtu,
1216 * we know we need more fragment(s).
1217 */
1218 datalen = length + fraggap;
1219 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1220 datalen = maxfraglen - fragheaderlen;
1221
1222 fraglen = datalen + fragheaderlen;
1223 if ((flags & MSG_MORE) &&
1224 !(rt->u.dst.dev->features&NETIF_F_SG))
1225 alloclen = mtu;
1226 else
1227 alloclen = datalen + fragheaderlen;
1228
1229 /*
1230 * The last fragment gets additional space at tail.
1231 * Note: we overallocate on fragments with MSG_MODE
1232 * because we have no idea if we're the last one.
1233 */
1234 if (datalen == length + fraggap)
1235 alloclen += rt->u.dst.trailer_len;
1236
1237 /*
1238 * We just reserve space for fragment header.
1239 * Note: this may be overallocation if the message
1240 * (without MSG_MORE) fits into the MTU.
1241 */
1242 alloclen += sizeof(struct frag_hdr);
1243
1244 if (transhdrlen) {
1245 skb = sock_alloc_send_skb(sk,
1246 alloclen + hh_len,
1247 (flags & MSG_DONTWAIT), &err);
1248 } else {
1249 skb = NULL;
1250 if (atomic_read(&sk->sk_wmem_alloc) <=
1251 2 * sk->sk_sndbuf)
1252 skb = sock_wmalloc(sk,
1253 alloclen + hh_len, 1,
1254 sk->sk_allocation);
1255 if (unlikely(skb == NULL))
1256 err = -ENOBUFS;
1257 }
1258 if (skb == NULL)
1259 goto error;
1260 /*
1261 * Fill in the control structures
1262 */
1263 skb->ip_summed = csummode;
1264 skb->csum = 0;
1265 /* reserve for fragmentation */
1266 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1267
1268 /*
1269 * Find where to start putting bytes
1270 */
1271 data = skb_put(skb, fraglen);
1272 skb_set_network_header(skb, exthdrlen);
1273 data += fragheaderlen;
1274 skb->transport_header = (skb->network_header +
1275 fragheaderlen);
1276 if (fraggap) {
1277 skb->csum = skb_copy_and_csum_bits(
1278 skb_prev, maxfraglen,
1279 data + transhdrlen, fraggap, 0);
1280 skb_prev->csum = csum_sub(skb_prev->csum,
1281 skb->csum);
1282 data += fraggap;
1283 pskb_trim_unique(skb_prev, maxfraglen);
1284 }
1285 copy = datalen - transhdrlen - fraggap;
1286 if (copy < 0) {
1287 err = -EINVAL;
1288 kfree_skb(skb);
1289 goto error;
1290 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1291 err = -EFAULT;
1292 kfree_skb(skb);
1293 goto error;
1294 }
1295
1296 offset += copy;
1297 length -= datalen - fraggap;
1298 transhdrlen = 0;
1299 exthdrlen = 0;
1300 csummode = CHECKSUM_NONE;
1301
1302 /*
1303 * Put the packet on the pending queue
1304 */
1305 __skb_queue_tail(&sk->sk_write_queue, skb);
1306 continue;
1307 }
1308
1309 if (copy > length)
1310 copy = length;
1311
1312 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1313 unsigned int off;
1314
1315 off = skb->len;
1316 if (getfrag(from, skb_put(skb, copy),
1317 offset, copy, off, skb) < 0) {
1318 __skb_trim(skb, off);
1319 err = -EFAULT;
1320 goto error;
1321 }
1322 } else {
1323 int i = skb_shinfo(skb)->nr_frags;
1324 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1325 struct page *page = sk->sk_sndmsg_page;
1326 int off = sk->sk_sndmsg_off;
1327 unsigned int left;
1328
1329 if (page && (left = PAGE_SIZE - off) > 0) {
1330 if (copy >= left)
1331 copy = left;
1332 if (page != frag->page) {
1333 if (i == MAX_SKB_FRAGS) {
1334 err = -EMSGSIZE;
1335 goto error;
1336 }
1337 get_page(page);
1338 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1339 frag = &skb_shinfo(skb)->frags[i];
1340 }
1341 } else if(i < MAX_SKB_FRAGS) {
1342 if (copy > PAGE_SIZE)
1343 copy = PAGE_SIZE;
1344 page = alloc_pages(sk->sk_allocation, 0);
1345 if (page == NULL) {
1346 err = -ENOMEM;
1347 goto error;
1348 }
1349 sk->sk_sndmsg_page = page;
1350 sk->sk_sndmsg_off = 0;
1351
1352 skb_fill_page_desc(skb, i, page, 0, 0);
1353 frag = &skb_shinfo(skb)->frags[i];
1354 } else {
1355 err = -EMSGSIZE;
1356 goto error;
1357 }
1358 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1359 err = -EFAULT;
1360 goto error;
1361 }
1362 sk->sk_sndmsg_off += copy;
1363 frag->size += copy;
1364 skb->len += copy;
1365 skb->data_len += copy;
1366 skb->truesize += copy;
1367 atomic_add(copy, &sk->sk_wmem_alloc);
1368 }
1369 offset += copy;
1370 length -= copy;
1371 }
1372 return 0;
1373 error:
1374 inet->cork.length -= length;
1375 IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1376 return err;
1377 }
1378
1379 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1380 {
1381 inet->cork.flags &= ~IPCORK_OPT;
1382 kfree(np->cork.opt);
1383 np->cork.opt = NULL;
1384 if (inet->cork.dst) {
1385 dst_release(inet->cork.dst);
1386 inet->cork.dst = NULL;
1387 inet->cork.flags &= ~IPCORK_ALLFRAG;
1388 }
1389 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1390 }
1391
1392 int ip6_push_pending_frames(struct sock *sk)
1393 {
1394 struct sk_buff *skb, *tmp_skb;
1395 struct sk_buff **tail_skb;
1396 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1397 struct inet_sock *inet = inet_sk(sk);
1398 struct ipv6_pinfo *np = inet6_sk(sk);
1399 struct ipv6hdr *hdr;
1400 struct ipv6_txoptions *opt = np->cork.opt;
1401 struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1402 struct flowi *fl = &inet->cork.fl;
1403 unsigned char proto = fl->proto;
1404 int err = 0;
1405
1406 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1407 goto out;
1408 tail_skb = &(skb_shinfo(skb)->frag_list);
1409
1410 /* move skb->data to ip header from ext header */
1411 if (skb->data < skb_network_header(skb))
1412 __skb_pull(skb, skb_network_offset(skb));
1413 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1414 __skb_pull(tmp_skb, skb_network_header_len(skb));
1415 *tail_skb = tmp_skb;
1416 tail_skb = &(tmp_skb->next);
1417 skb->len += tmp_skb->len;
1418 skb->data_len += tmp_skb->len;
1419 skb->truesize += tmp_skb->truesize;
1420 __sock_put(tmp_skb->sk);
1421 tmp_skb->destructor = NULL;
1422 tmp_skb->sk = NULL;
1423 }
1424
1425 /* Allow local fragmentation. */
1426 if (np->pmtudisc < IPV6_PMTUDISC_DO)
1427 skb->local_df = 1;
1428
1429 ipv6_addr_copy(final_dst, &fl->fl6_dst);
1430 __skb_pull(skb, skb_network_header_len(skb));
1431 if (opt && opt->opt_flen)
1432 ipv6_push_frag_opts(skb, opt, &proto);
1433 if (opt && opt->opt_nflen)
1434 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1435
1436 skb_push(skb, sizeof(struct ipv6hdr));
1437 skb_reset_network_header(skb);
1438 hdr = ipv6_hdr(skb);
1439
1440 *(__be32*)hdr = fl->fl6_flowlabel |
1441 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1442
1443 hdr->hop_limit = np->cork.hop_limit;
1444 hdr->nexthdr = proto;
1445 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1446 ipv6_addr_copy(&hdr->daddr, final_dst);
1447
1448 skb->priority = sk->sk_priority;
1449 skb->mark = sk->sk_mark;
1450
1451 skb->dst = dst_clone(&rt->u.dst);
1452 IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
1453 if (proto == IPPROTO_ICMPV6) {
1454 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1455
1456 ICMP6MSGOUT_INC_STATS_BH(idev, icmp6_hdr(skb)->icmp6_type);
1457 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
1458 }
1459
1460 err = ip6_local_out(skb);
1461 if (err) {
1462 if (err > 0)
1463 err = np->recverr ? net_xmit_errno(err) : 0;
1464 if (err)
1465 goto error;
1466 }
1467
1468 out:
1469 ip6_cork_release(inet, np);
1470 return err;
1471 error:
1472 goto out;
1473 }
1474
1475 void ip6_flush_pending_frames(struct sock *sk)
1476 {
1477 struct sk_buff *skb;
1478
1479 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1480 if (skb->dst)
1481 IP6_INC_STATS(ip6_dst_idev(skb->dst),
1482 IPSTATS_MIB_OUTDISCARDS);
1483 kfree_skb(skb);
1484 }
1485
1486 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1487 }
This page took 0.130519 seconds and 5 git commands to generate.