Merge branch 'for-linus' of git://git390.marist.edu/pub/scm/linux-2.6
[deliverable/linux.git] / net / ipv6 / ip6_output.c
1 /*
2 * IPv6 output functions
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
41
42 #include <linux/netfilter.h>
43 #include <linux/netfilter_ipv6.h>
44
45 #include <net/sock.h>
46 #include <net/snmp.h>
47
48 #include <net/ipv6.h>
49 #include <net/ndisc.h>
50 #include <net/protocol.h>
51 #include <net/ip6_route.h>
52 #include <net/addrconf.h>
53 #include <net/rawv6.h>
54 #include <net/icmp.h>
55 #include <net/xfrm.h>
56 #include <net/checksum.h>
57 #include <linux/mroute6.h>
58
59 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
60
61 int __ip6_local_out(struct sk_buff *skb)
62 {
63 int len;
64
65 len = skb->len - sizeof(struct ipv6hdr);
66 if (len > IPV6_MAXPLEN)
67 len = 0;
68 ipv6_hdr(skb)->payload_len = htons(len);
69
70 return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
71 skb_dst(skb)->dev, dst_output);
72 }
73
74 int ip6_local_out(struct sk_buff *skb)
75 {
76 int err;
77
78 err = __ip6_local_out(skb);
79 if (likely(err == 1))
80 err = dst_output(skb);
81
82 return err;
83 }
84 EXPORT_SYMBOL_GPL(ip6_local_out);
85
86 /* dev_loopback_xmit for use with netfilter. */
87 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
88 {
89 skb_reset_mac_header(newskb);
90 __skb_pull(newskb, skb_network_offset(newskb));
91 newskb->pkt_type = PACKET_LOOPBACK;
92 newskb->ip_summed = CHECKSUM_UNNECESSARY;
93 WARN_ON(!skb_dst(newskb));
94
95 netif_rx_ni(newskb);
96 return 0;
97 }
98
99 static int ip6_finish_output2(struct sk_buff *skb)
100 {
101 struct dst_entry *dst = skb_dst(skb);
102 struct net_device *dev = dst->dev;
103 struct neighbour *neigh;
104
105 skb->protocol = htons(ETH_P_IPV6);
106 skb->dev = dev;
107
108 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
109 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
110
111 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
112 ((mroute6_socket(dev_net(dev), skb) &&
113 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
114 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
115 &ipv6_hdr(skb)->saddr))) {
116 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
117
118 /* Do not check for IFF_ALLMULTI; multicast routing
119 is not supported in any case.
120 */
121 if (newskb)
122 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
123 newskb, NULL, newskb->dev,
124 ip6_dev_loopback_xmit);
125
126 if (ipv6_hdr(skb)->hop_limit == 0) {
127 IP6_INC_STATS(dev_net(dev), idev,
128 IPSTATS_MIB_OUTDISCARDS);
129 kfree_skb(skb);
130 return 0;
131 }
132 }
133
134 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
135 skb->len);
136 }
137
138 rcu_read_lock();
139 neigh = dst_get_neighbour(dst);
140 if (neigh) {
141 int res = neigh_output(neigh, skb);
142
143 rcu_read_unlock();
144 return res;
145 }
146 rcu_read_unlock();
147 IP6_INC_STATS_BH(dev_net(dst->dev),
148 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
149 kfree_skb(skb);
150 return -EINVAL;
151 }
152
153 static int ip6_finish_output(struct sk_buff *skb)
154 {
155 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
156 dst_allfrag(skb_dst(skb)))
157 return ip6_fragment(skb, ip6_finish_output2);
158 else
159 return ip6_finish_output2(skb);
160 }
161
162 int ip6_output(struct sk_buff *skb)
163 {
164 struct net_device *dev = skb_dst(skb)->dev;
165 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
166 if (unlikely(idev->cnf.disable_ipv6)) {
167 IP6_INC_STATS(dev_net(dev), idev,
168 IPSTATS_MIB_OUTDISCARDS);
169 kfree_skb(skb);
170 return 0;
171 }
172
173 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
174 ip6_finish_output,
175 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
176 }
177
178 /*
179 * xmit an sk_buff (used by TCP, SCTP and DCCP)
180 */
181
182 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
183 struct ipv6_txoptions *opt, int tclass)
184 {
185 struct net *net = sock_net(sk);
186 struct ipv6_pinfo *np = inet6_sk(sk);
187 struct in6_addr *first_hop = &fl6->daddr;
188 struct dst_entry *dst = skb_dst(skb);
189 struct ipv6hdr *hdr;
190 u8 proto = fl6->flowi6_proto;
191 int seg_len = skb->len;
192 int hlimit = -1;
193 u32 mtu;
194
195 if (opt) {
196 unsigned int head_room;
197
198 /* First: exthdrs may take lots of space (~8K for now)
199 MAX_HEADER is not enough.
200 */
201 head_room = opt->opt_nflen + opt->opt_flen;
202 seg_len += head_room;
203 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
204
205 if (skb_headroom(skb) < head_room) {
206 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
207 if (skb2 == NULL) {
208 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
209 IPSTATS_MIB_OUTDISCARDS);
210 kfree_skb(skb);
211 return -ENOBUFS;
212 }
213 kfree_skb(skb);
214 skb = skb2;
215 skb_set_owner_w(skb, sk);
216 }
217 if (opt->opt_flen)
218 ipv6_push_frag_opts(skb, opt, &proto);
219 if (opt->opt_nflen)
220 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
221 }
222
223 skb_push(skb, sizeof(struct ipv6hdr));
224 skb_reset_network_header(skb);
225 hdr = ipv6_hdr(skb);
226
227 /*
228 * Fill in the IPv6 header
229 */
230 if (np)
231 hlimit = np->hop_limit;
232 if (hlimit < 0)
233 hlimit = ip6_dst_hoplimit(dst);
234
235 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl6->flowlabel;
236
237 hdr->payload_len = htons(seg_len);
238 hdr->nexthdr = proto;
239 hdr->hop_limit = hlimit;
240
241 ipv6_addr_copy(&hdr->saddr, &fl6->saddr);
242 ipv6_addr_copy(&hdr->daddr, first_hop);
243
244 skb->priority = sk->sk_priority;
245 skb->mark = sk->sk_mark;
246
247 mtu = dst_mtu(dst);
248 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
249 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
250 IPSTATS_MIB_OUT, skb->len);
251 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
252 dst->dev, dst_output);
253 }
254
255 if (net_ratelimit())
256 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
257 skb->dev = dst->dev;
258 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
259 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
260 kfree_skb(skb);
261 return -EMSGSIZE;
262 }
263
264 EXPORT_SYMBOL(ip6_xmit);
265
266 /*
267 * To avoid extra problems ND packets are send through this
268 * routine. It's code duplication but I really want to avoid
269 * extra checks since ipv6_build_header is used by TCP (which
270 * is for us performance critical)
271 */
272
273 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
274 const struct in6_addr *saddr, const struct in6_addr *daddr,
275 int proto, int len)
276 {
277 struct ipv6_pinfo *np = inet6_sk(sk);
278 struct ipv6hdr *hdr;
279
280 skb->protocol = htons(ETH_P_IPV6);
281 skb->dev = dev;
282
283 skb_reset_network_header(skb);
284 skb_put(skb, sizeof(struct ipv6hdr));
285 hdr = ipv6_hdr(skb);
286
287 *(__be32*)hdr = htonl(0x60000000);
288
289 hdr->payload_len = htons(len);
290 hdr->nexthdr = proto;
291 hdr->hop_limit = np->hop_limit;
292
293 ipv6_addr_copy(&hdr->saddr, saddr);
294 ipv6_addr_copy(&hdr->daddr, daddr);
295
296 return 0;
297 }
298
299 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
300 {
301 struct ip6_ra_chain *ra;
302 struct sock *last = NULL;
303
304 read_lock(&ip6_ra_lock);
305 for (ra = ip6_ra_chain; ra; ra = ra->next) {
306 struct sock *sk = ra->sk;
307 if (sk && ra->sel == sel &&
308 (!sk->sk_bound_dev_if ||
309 sk->sk_bound_dev_if == skb->dev->ifindex)) {
310 if (last) {
311 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
312 if (skb2)
313 rawv6_rcv(last, skb2);
314 }
315 last = sk;
316 }
317 }
318
319 if (last) {
320 rawv6_rcv(last, skb);
321 read_unlock(&ip6_ra_lock);
322 return 1;
323 }
324 read_unlock(&ip6_ra_lock);
325 return 0;
326 }
327
328 static int ip6_forward_proxy_check(struct sk_buff *skb)
329 {
330 struct ipv6hdr *hdr = ipv6_hdr(skb);
331 u8 nexthdr = hdr->nexthdr;
332 int offset;
333
334 if (ipv6_ext_hdr(nexthdr)) {
335 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
336 if (offset < 0)
337 return 0;
338 } else
339 offset = sizeof(struct ipv6hdr);
340
341 if (nexthdr == IPPROTO_ICMPV6) {
342 struct icmp6hdr *icmp6;
343
344 if (!pskb_may_pull(skb, (skb_network_header(skb) +
345 offset + 1 - skb->data)))
346 return 0;
347
348 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
349
350 switch (icmp6->icmp6_type) {
351 case NDISC_ROUTER_SOLICITATION:
352 case NDISC_ROUTER_ADVERTISEMENT:
353 case NDISC_NEIGHBOUR_SOLICITATION:
354 case NDISC_NEIGHBOUR_ADVERTISEMENT:
355 case NDISC_REDIRECT:
356 /* For reaction involving unicast neighbor discovery
357 * message destined to the proxied address, pass it to
358 * input function.
359 */
360 return 1;
361 default:
362 break;
363 }
364 }
365
366 /*
367 * The proxying router can't forward traffic sent to a link-local
368 * address, so signal the sender and discard the packet. This
369 * behavior is clarified by the MIPv6 specification.
370 */
371 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
372 dst_link_failure(skb);
373 return -1;
374 }
375
376 return 0;
377 }
378
379 static inline int ip6_forward_finish(struct sk_buff *skb)
380 {
381 return dst_output(skb);
382 }
383
384 int ip6_forward(struct sk_buff *skb)
385 {
386 struct dst_entry *dst = skb_dst(skb);
387 struct ipv6hdr *hdr = ipv6_hdr(skb);
388 struct inet6_skb_parm *opt = IP6CB(skb);
389 struct net *net = dev_net(dst->dev);
390 struct neighbour *n;
391 u32 mtu;
392
393 if (net->ipv6.devconf_all->forwarding == 0)
394 goto error;
395
396 if (skb_warn_if_lro(skb))
397 goto drop;
398
399 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
400 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
401 goto drop;
402 }
403
404 if (skb->pkt_type != PACKET_HOST)
405 goto drop;
406
407 skb_forward_csum(skb);
408
409 /*
410 * We DO NOT make any processing on
411 * RA packets, pushing them to user level AS IS
412 * without ane WARRANTY that application will be able
413 * to interpret them. The reason is that we
414 * cannot make anything clever here.
415 *
416 * We are not end-node, so that if packet contains
417 * AH/ESP, we cannot make anything.
418 * Defragmentation also would be mistake, RA packets
419 * cannot be fragmented, because there is no warranty
420 * that different fragments will go along one path. --ANK
421 */
422 if (opt->ra) {
423 u8 *ptr = skb_network_header(skb) + opt->ra;
424 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
425 return 0;
426 }
427
428 /*
429 * check and decrement ttl
430 */
431 if (hdr->hop_limit <= 1) {
432 /* Force OUTPUT device used as source address */
433 skb->dev = dst->dev;
434 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
435 IP6_INC_STATS_BH(net,
436 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
437
438 kfree_skb(skb);
439 return -ETIMEDOUT;
440 }
441
442 /* XXX: idev->cnf.proxy_ndp? */
443 if (net->ipv6.devconf_all->proxy_ndp &&
444 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
445 int proxied = ip6_forward_proxy_check(skb);
446 if (proxied > 0)
447 return ip6_input(skb);
448 else if (proxied < 0) {
449 IP6_INC_STATS(net, ip6_dst_idev(dst),
450 IPSTATS_MIB_INDISCARDS);
451 goto drop;
452 }
453 }
454
455 if (!xfrm6_route_forward(skb)) {
456 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
457 goto drop;
458 }
459 dst = skb_dst(skb);
460
461 /* IPv6 specs say nothing about it, but it is clear that we cannot
462 send redirects to source routed frames.
463 We don't send redirects to frames decapsulated from IPsec.
464 */
465 n = dst_get_neighbour(dst);
466 if (skb->dev == dst->dev && n && opt->srcrt == 0 && !skb_sec_path(skb)) {
467 struct in6_addr *target = NULL;
468 struct rt6_info *rt;
469
470 /*
471 * incoming and outgoing devices are the same
472 * send a redirect.
473 */
474
475 rt = (struct rt6_info *) dst;
476 if ((rt->rt6i_flags & RTF_GATEWAY))
477 target = (struct in6_addr*)&n->primary_key;
478 else
479 target = &hdr->daddr;
480
481 if (!rt->rt6i_peer)
482 rt6_bind_peer(rt, 1);
483
484 /* Limit redirects both by destination (here)
485 and by source (inside ndisc_send_redirect)
486 */
487 if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
488 ndisc_send_redirect(skb, n, target);
489 } else {
490 int addrtype = ipv6_addr_type(&hdr->saddr);
491
492 /* This check is security critical. */
493 if (addrtype == IPV6_ADDR_ANY ||
494 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
495 goto error;
496 if (addrtype & IPV6_ADDR_LINKLOCAL) {
497 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
498 ICMPV6_NOT_NEIGHBOUR, 0);
499 goto error;
500 }
501 }
502
503 mtu = dst_mtu(dst);
504 if (mtu < IPV6_MIN_MTU)
505 mtu = IPV6_MIN_MTU;
506
507 if (skb->len > mtu && !skb_is_gso(skb)) {
508 /* Again, force OUTPUT device used as source address */
509 skb->dev = dst->dev;
510 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
511 IP6_INC_STATS_BH(net,
512 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
513 IP6_INC_STATS_BH(net,
514 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
515 kfree_skb(skb);
516 return -EMSGSIZE;
517 }
518
519 if (skb_cow(skb, dst->dev->hard_header_len)) {
520 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
521 goto drop;
522 }
523
524 hdr = ipv6_hdr(skb);
525
526 /* Mangling hops number delayed to point after skb COW */
527
528 hdr->hop_limit--;
529
530 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
531 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
532 ip6_forward_finish);
533
534 error:
535 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
536 drop:
537 kfree_skb(skb);
538 return -EINVAL;
539 }
540
541 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
542 {
543 to->pkt_type = from->pkt_type;
544 to->priority = from->priority;
545 to->protocol = from->protocol;
546 skb_dst_drop(to);
547 skb_dst_set(to, dst_clone(skb_dst(from)));
548 to->dev = from->dev;
549 to->mark = from->mark;
550
551 #ifdef CONFIG_NET_SCHED
552 to->tc_index = from->tc_index;
553 #endif
554 nf_copy(to, from);
555 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
556 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
557 to->nf_trace = from->nf_trace;
558 #endif
559 skb_copy_secmark(to, from);
560 }
561
562 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
563 {
564 u16 offset = sizeof(struct ipv6hdr);
565 struct ipv6_opt_hdr *exthdr =
566 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
567 unsigned int packet_len = skb->tail - skb->network_header;
568 int found_rhdr = 0;
569 *nexthdr = &ipv6_hdr(skb)->nexthdr;
570
571 while (offset + 1 <= packet_len) {
572
573 switch (**nexthdr) {
574
575 case NEXTHDR_HOP:
576 break;
577 case NEXTHDR_ROUTING:
578 found_rhdr = 1;
579 break;
580 case NEXTHDR_DEST:
581 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
582 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
583 break;
584 #endif
585 if (found_rhdr)
586 return offset;
587 break;
588 default :
589 return offset;
590 }
591
592 offset += ipv6_optlen(exthdr);
593 *nexthdr = &exthdr->nexthdr;
594 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
595 offset);
596 }
597
598 return offset;
599 }
600
601 void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
602 {
603 static atomic_t ipv6_fragmentation_id;
604 int old, new;
605
606 if (rt) {
607 struct inet_peer *peer;
608
609 if (!rt->rt6i_peer)
610 rt6_bind_peer(rt, 1);
611 peer = rt->rt6i_peer;
612 if (peer) {
613 fhdr->identification = htonl(inet_getid(peer, 0));
614 return;
615 }
616 }
617 do {
618 old = atomic_read(&ipv6_fragmentation_id);
619 new = old + 1;
620 if (!new)
621 new = 1;
622 } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old);
623 fhdr->identification = htonl(new);
624 }
625
626 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
627 {
628 struct sk_buff *frag;
629 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
630 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
631 struct ipv6hdr *tmp_hdr;
632 struct frag_hdr *fh;
633 unsigned int mtu, hlen, left, len;
634 __be32 frag_id = 0;
635 int ptr, offset = 0, err=0;
636 u8 *prevhdr, nexthdr = 0;
637 struct net *net = dev_net(skb_dst(skb)->dev);
638
639 hlen = ip6_find_1stfragopt(skb, &prevhdr);
640 nexthdr = *prevhdr;
641
642 mtu = ip6_skb_dst_mtu(skb);
643
644 /* We must not fragment if the socket is set to force MTU discovery
645 * or if the skb it not generated by a local socket.
646 */
647 if (!skb->local_df && skb->len > mtu) {
648 skb->dev = skb_dst(skb)->dev;
649 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
650 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
651 IPSTATS_MIB_FRAGFAILS);
652 kfree_skb(skb);
653 return -EMSGSIZE;
654 }
655
656 if (np && np->frag_size < mtu) {
657 if (np->frag_size)
658 mtu = np->frag_size;
659 }
660 mtu -= hlen + sizeof(struct frag_hdr);
661
662 if (skb_has_frag_list(skb)) {
663 int first_len = skb_pagelen(skb);
664 struct sk_buff *frag2;
665
666 if (first_len - hlen > mtu ||
667 ((first_len - hlen) & 7) ||
668 skb_cloned(skb))
669 goto slow_path;
670
671 skb_walk_frags(skb, frag) {
672 /* Correct geometry. */
673 if (frag->len > mtu ||
674 ((frag->len & 7) && frag->next) ||
675 skb_headroom(frag) < hlen)
676 goto slow_path_clean;
677
678 /* Partially cloned skb? */
679 if (skb_shared(frag))
680 goto slow_path_clean;
681
682 BUG_ON(frag->sk);
683 if (skb->sk) {
684 frag->sk = skb->sk;
685 frag->destructor = sock_wfree;
686 }
687 skb->truesize -= frag->truesize;
688 }
689
690 err = 0;
691 offset = 0;
692 frag = skb_shinfo(skb)->frag_list;
693 skb_frag_list_init(skb);
694 /* BUILD HEADER */
695
696 *prevhdr = NEXTHDR_FRAGMENT;
697 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
698 if (!tmp_hdr) {
699 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
700 IPSTATS_MIB_FRAGFAILS);
701 return -ENOMEM;
702 }
703
704 __skb_pull(skb, hlen);
705 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
706 __skb_push(skb, hlen);
707 skb_reset_network_header(skb);
708 memcpy(skb_network_header(skb), tmp_hdr, hlen);
709
710 ipv6_select_ident(fh, rt);
711 fh->nexthdr = nexthdr;
712 fh->reserved = 0;
713 fh->frag_off = htons(IP6_MF);
714 frag_id = fh->identification;
715
716 first_len = skb_pagelen(skb);
717 skb->data_len = first_len - skb_headlen(skb);
718 skb->len = first_len;
719 ipv6_hdr(skb)->payload_len = htons(first_len -
720 sizeof(struct ipv6hdr));
721
722 dst_hold(&rt->dst);
723
724 for (;;) {
725 /* Prepare header of the next frame,
726 * before previous one went down. */
727 if (frag) {
728 frag->ip_summed = CHECKSUM_NONE;
729 skb_reset_transport_header(frag);
730 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
731 __skb_push(frag, hlen);
732 skb_reset_network_header(frag);
733 memcpy(skb_network_header(frag), tmp_hdr,
734 hlen);
735 offset += skb->len - hlen - sizeof(struct frag_hdr);
736 fh->nexthdr = nexthdr;
737 fh->reserved = 0;
738 fh->frag_off = htons(offset);
739 if (frag->next != NULL)
740 fh->frag_off |= htons(IP6_MF);
741 fh->identification = frag_id;
742 ipv6_hdr(frag)->payload_len =
743 htons(frag->len -
744 sizeof(struct ipv6hdr));
745 ip6_copy_metadata(frag, skb);
746 }
747
748 err = output(skb);
749 if(!err)
750 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
751 IPSTATS_MIB_FRAGCREATES);
752
753 if (err || !frag)
754 break;
755
756 skb = frag;
757 frag = skb->next;
758 skb->next = NULL;
759 }
760
761 kfree(tmp_hdr);
762
763 if (err == 0) {
764 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
765 IPSTATS_MIB_FRAGOKS);
766 dst_release(&rt->dst);
767 return 0;
768 }
769
770 while (frag) {
771 skb = frag->next;
772 kfree_skb(frag);
773 frag = skb;
774 }
775
776 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
777 IPSTATS_MIB_FRAGFAILS);
778 dst_release(&rt->dst);
779 return err;
780
781 slow_path_clean:
782 skb_walk_frags(skb, frag2) {
783 if (frag2 == frag)
784 break;
785 frag2->sk = NULL;
786 frag2->destructor = NULL;
787 skb->truesize += frag2->truesize;
788 }
789 }
790
791 slow_path:
792 left = skb->len - hlen; /* Space per frame */
793 ptr = hlen; /* Where to start from */
794
795 /*
796 * Fragment the datagram.
797 */
798
799 *prevhdr = NEXTHDR_FRAGMENT;
800
801 /*
802 * Keep copying data until we run out.
803 */
804 while(left > 0) {
805 len = left;
806 /* IF: it doesn't fit, use 'mtu' - the data space left */
807 if (len > mtu)
808 len = mtu;
809 /* IF: we are not sending up to and including the packet end
810 then align the next start on an eight byte boundary */
811 if (len < left) {
812 len &= ~7;
813 }
814 /*
815 * Allocate buffer.
816 */
817
818 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->dst.dev), GFP_ATOMIC)) == NULL) {
819 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
820 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
821 IPSTATS_MIB_FRAGFAILS);
822 err = -ENOMEM;
823 goto fail;
824 }
825
826 /*
827 * Set up data on packet
828 */
829
830 ip6_copy_metadata(frag, skb);
831 skb_reserve(frag, LL_RESERVED_SPACE(rt->dst.dev));
832 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
833 skb_reset_network_header(frag);
834 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
835 frag->transport_header = (frag->network_header + hlen +
836 sizeof(struct frag_hdr));
837
838 /*
839 * Charge the memory for the fragment to any owner
840 * it might possess
841 */
842 if (skb->sk)
843 skb_set_owner_w(frag, skb->sk);
844
845 /*
846 * Copy the packet header into the new buffer.
847 */
848 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
849
850 /*
851 * Build fragment header.
852 */
853 fh->nexthdr = nexthdr;
854 fh->reserved = 0;
855 if (!frag_id) {
856 ipv6_select_ident(fh, rt);
857 frag_id = fh->identification;
858 } else
859 fh->identification = frag_id;
860
861 /*
862 * Copy a block of the IP datagram.
863 */
864 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
865 BUG();
866 left -= len;
867
868 fh->frag_off = htons(offset);
869 if (left > 0)
870 fh->frag_off |= htons(IP6_MF);
871 ipv6_hdr(frag)->payload_len = htons(frag->len -
872 sizeof(struct ipv6hdr));
873
874 ptr += len;
875 offset += len;
876
877 /*
878 * Put this fragment into the sending queue.
879 */
880 err = output(frag);
881 if (err)
882 goto fail;
883
884 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
885 IPSTATS_MIB_FRAGCREATES);
886 }
887 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
888 IPSTATS_MIB_FRAGOKS);
889 kfree_skb(skb);
890 return err;
891
892 fail:
893 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
894 IPSTATS_MIB_FRAGFAILS);
895 kfree_skb(skb);
896 return err;
897 }
898
899 static inline int ip6_rt_check(const struct rt6key *rt_key,
900 const struct in6_addr *fl_addr,
901 const struct in6_addr *addr_cache)
902 {
903 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
904 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
905 }
906
907 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
908 struct dst_entry *dst,
909 const struct flowi6 *fl6)
910 {
911 struct ipv6_pinfo *np = inet6_sk(sk);
912 struct rt6_info *rt = (struct rt6_info *)dst;
913
914 if (!dst)
915 goto out;
916
917 /* Yes, checking route validity in not connected
918 * case is not very simple. Take into account,
919 * that we do not support routing by source, TOS,
920 * and MSG_DONTROUTE --ANK (980726)
921 *
922 * 1. ip6_rt_check(): If route was host route,
923 * check that cached destination is current.
924 * If it is network route, we still may
925 * check its validity using saved pointer
926 * to the last used address: daddr_cache.
927 * We do not want to save whole address now,
928 * (because main consumer of this service
929 * is tcp, which has not this problem),
930 * so that the last trick works only on connected
931 * sockets.
932 * 2. oif also should be the same.
933 */
934 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
935 #ifdef CONFIG_IPV6_SUBTREES
936 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
937 #endif
938 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
939 dst_release(dst);
940 dst = NULL;
941 }
942
943 out:
944 return dst;
945 }
946
947 static int ip6_dst_lookup_tail(struct sock *sk,
948 struct dst_entry **dst, struct flowi6 *fl6)
949 {
950 struct net *net = sock_net(sk);
951 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
952 struct neighbour *n;
953 #endif
954 int err;
955
956 if (*dst == NULL)
957 *dst = ip6_route_output(net, sk, fl6);
958
959 if ((err = (*dst)->error))
960 goto out_err_release;
961
962 if (ipv6_addr_any(&fl6->saddr)) {
963 struct rt6_info *rt = (struct rt6_info *) *dst;
964 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
965 sk ? inet6_sk(sk)->srcprefs : 0,
966 &fl6->saddr);
967 if (err)
968 goto out_err_release;
969 }
970
971 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
972 /*
973 * Here if the dst entry we've looked up
974 * has a neighbour entry that is in the INCOMPLETE
975 * state and the src address from the flow is
976 * marked as OPTIMISTIC, we release the found
977 * dst entry and replace it instead with the
978 * dst entry of the nexthop router
979 */
980 rcu_read_lock();
981 n = dst_get_neighbour(*dst);
982 if (n && !(n->nud_state & NUD_VALID)) {
983 struct inet6_ifaddr *ifp;
984 struct flowi6 fl_gw6;
985 int redirect;
986
987 rcu_read_unlock();
988 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
989 (*dst)->dev, 1);
990
991 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
992 if (ifp)
993 in6_ifa_put(ifp);
994
995 if (redirect) {
996 /*
997 * We need to get the dst entry for the
998 * default router instead
999 */
1000 dst_release(*dst);
1001 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1002 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1003 *dst = ip6_route_output(net, sk, &fl_gw6);
1004 if ((err = (*dst)->error))
1005 goto out_err_release;
1006 }
1007 } else {
1008 rcu_read_unlock();
1009 }
1010 #endif
1011
1012 return 0;
1013
1014 out_err_release:
1015 if (err == -ENETUNREACH)
1016 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1017 dst_release(*dst);
1018 *dst = NULL;
1019 return err;
1020 }
1021
1022 /**
1023 * ip6_dst_lookup - perform route lookup on flow
1024 * @sk: socket which provides route info
1025 * @dst: pointer to dst_entry * for result
1026 * @fl6: flow to lookup
1027 *
1028 * This function performs a route lookup on the given flow.
1029 *
1030 * It returns zero on success, or a standard errno code on error.
1031 */
1032 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
1033 {
1034 *dst = NULL;
1035 return ip6_dst_lookup_tail(sk, dst, fl6);
1036 }
1037 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1038
1039 /**
1040 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1041 * @sk: socket which provides route info
1042 * @fl6: flow to lookup
1043 * @final_dst: final destination address for ipsec lookup
1044 * @can_sleep: we are in a sleepable context
1045 *
1046 * This function performs a route lookup on the given flow.
1047 *
1048 * It returns a valid dst pointer on success, or a pointer encoded
1049 * error code.
1050 */
1051 struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1052 const struct in6_addr *final_dst,
1053 bool can_sleep)
1054 {
1055 struct dst_entry *dst = NULL;
1056 int err;
1057
1058 err = ip6_dst_lookup_tail(sk, &dst, fl6);
1059 if (err)
1060 return ERR_PTR(err);
1061 if (final_dst)
1062 ipv6_addr_copy(&fl6->daddr, final_dst);
1063 if (can_sleep)
1064 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1065
1066 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1067 }
1068 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1069
1070 /**
1071 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1072 * @sk: socket which provides the dst cache and route info
1073 * @fl6: flow to lookup
1074 * @final_dst: final destination address for ipsec lookup
1075 * @can_sleep: we are in a sleepable context
1076 *
1077 * This function performs a route lookup on the given flow with the
1078 * possibility of using the cached route in the socket if it is valid.
1079 * It will take the socket dst lock when operating on the dst cache.
1080 * As a result, this function can only be used in process context.
1081 *
1082 * It returns a valid dst pointer on success, or a pointer encoded
1083 * error code.
1084 */
1085 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1086 const struct in6_addr *final_dst,
1087 bool can_sleep)
1088 {
1089 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1090 int err;
1091
1092 dst = ip6_sk_dst_check(sk, dst, fl6);
1093
1094 err = ip6_dst_lookup_tail(sk, &dst, fl6);
1095 if (err)
1096 return ERR_PTR(err);
1097 if (final_dst)
1098 ipv6_addr_copy(&fl6->daddr, final_dst);
1099 if (can_sleep)
1100 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1101
1102 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1103 }
1104 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1105
1106 static inline int ip6_ufo_append_data(struct sock *sk,
1107 int getfrag(void *from, char *to, int offset, int len,
1108 int odd, struct sk_buff *skb),
1109 void *from, int length, int hh_len, int fragheaderlen,
1110 int transhdrlen, int mtu,unsigned int flags,
1111 struct rt6_info *rt)
1112
1113 {
1114 struct sk_buff *skb;
1115 int err;
1116
1117 /* There is support for UDP large send offload by network
1118 * device, so create one single skb packet containing complete
1119 * udp datagram
1120 */
1121 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1122 skb = sock_alloc_send_skb(sk,
1123 hh_len + fragheaderlen + transhdrlen + 20,
1124 (flags & MSG_DONTWAIT), &err);
1125 if (skb == NULL)
1126 return err;
1127
1128 /* reserve space for Hardware header */
1129 skb_reserve(skb, hh_len);
1130
1131 /* create space for UDP/IP header */
1132 skb_put(skb,fragheaderlen + transhdrlen);
1133
1134 /* initialize network header pointer */
1135 skb_reset_network_header(skb);
1136
1137 /* initialize protocol header pointer */
1138 skb->transport_header = skb->network_header + fragheaderlen;
1139
1140 skb->ip_summed = CHECKSUM_PARTIAL;
1141 skb->csum = 0;
1142 }
1143
1144 err = skb_append_datato_frags(sk,skb, getfrag, from,
1145 (length - transhdrlen));
1146 if (!err) {
1147 struct frag_hdr fhdr;
1148
1149 /* Specify the length of each IPv6 datagram fragment.
1150 * It has to be a multiple of 8.
1151 */
1152 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1153 sizeof(struct frag_hdr)) & ~7;
1154 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1155 ipv6_select_ident(&fhdr, rt);
1156 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1157 __skb_queue_tail(&sk->sk_write_queue, skb);
1158
1159 return 0;
1160 }
1161 /* There is not enough support do UPD LSO,
1162 * so follow normal path
1163 */
1164 kfree_skb(skb);
1165
1166 return err;
1167 }
1168
1169 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1170 gfp_t gfp)
1171 {
1172 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1173 }
1174
1175 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1176 gfp_t gfp)
1177 {
1178 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1179 }
1180
1181 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1182 int offset, int len, int odd, struct sk_buff *skb),
1183 void *from, int length, int transhdrlen,
1184 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1185 struct rt6_info *rt, unsigned int flags, int dontfrag)
1186 {
1187 struct inet_sock *inet = inet_sk(sk);
1188 struct ipv6_pinfo *np = inet6_sk(sk);
1189 struct inet_cork *cork;
1190 struct sk_buff *skb;
1191 unsigned int maxfraglen, fragheaderlen;
1192 int exthdrlen;
1193 int dst_exthdrlen;
1194 int hh_len;
1195 int mtu;
1196 int copy;
1197 int err;
1198 int offset = 0;
1199 int csummode = CHECKSUM_NONE;
1200 __u8 tx_flags = 0;
1201
1202 if (flags&MSG_PROBE)
1203 return 0;
1204 cork = &inet->cork.base;
1205 if (skb_queue_empty(&sk->sk_write_queue)) {
1206 /*
1207 * setup for corking
1208 */
1209 if (opt) {
1210 if (WARN_ON(np->cork.opt))
1211 return -EINVAL;
1212
1213 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1214 if (unlikely(np->cork.opt == NULL))
1215 return -ENOBUFS;
1216
1217 np->cork.opt->tot_len = opt->tot_len;
1218 np->cork.opt->opt_flen = opt->opt_flen;
1219 np->cork.opt->opt_nflen = opt->opt_nflen;
1220
1221 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1222 sk->sk_allocation);
1223 if (opt->dst0opt && !np->cork.opt->dst0opt)
1224 return -ENOBUFS;
1225
1226 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1227 sk->sk_allocation);
1228 if (opt->dst1opt && !np->cork.opt->dst1opt)
1229 return -ENOBUFS;
1230
1231 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1232 sk->sk_allocation);
1233 if (opt->hopopt && !np->cork.opt->hopopt)
1234 return -ENOBUFS;
1235
1236 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1237 sk->sk_allocation);
1238 if (opt->srcrt && !np->cork.opt->srcrt)
1239 return -ENOBUFS;
1240
1241 /* need source address above miyazawa*/
1242 }
1243 dst_hold(&rt->dst);
1244 cork->dst = &rt->dst;
1245 inet->cork.fl.u.ip6 = *fl6;
1246 np->cork.hop_limit = hlimit;
1247 np->cork.tclass = tclass;
1248 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1249 rt->dst.dev->mtu : dst_mtu(&rt->dst);
1250 if (np->frag_size < mtu) {
1251 if (np->frag_size)
1252 mtu = np->frag_size;
1253 }
1254 cork->fragsize = mtu;
1255 if (dst_allfrag(rt->dst.path))
1256 cork->flags |= IPCORK_ALLFRAG;
1257 cork->length = 0;
1258 sk->sk_sndmsg_page = NULL;
1259 sk->sk_sndmsg_off = 0;
1260 exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len;
1261 length += exthdrlen;
1262 transhdrlen += exthdrlen;
1263 dst_exthdrlen = rt->dst.header_len;
1264 } else {
1265 rt = (struct rt6_info *)cork->dst;
1266 fl6 = &inet->cork.fl.u.ip6;
1267 opt = np->cork.opt;
1268 transhdrlen = 0;
1269 exthdrlen = 0;
1270 dst_exthdrlen = 0;
1271 mtu = cork->fragsize;
1272 }
1273
1274 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1275
1276 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1277 (opt ? opt->opt_nflen : 0);
1278 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1279
1280 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1281 if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1282 ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen);
1283 return -EMSGSIZE;
1284 }
1285 }
1286
1287 /* For UDP, check if TX timestamp is enabled */
1288 if (sk->sk_type == SOCK_DGRAM) {
1289 err = sock_tx_timestamp(sk, &tx_flags);
1290 if (err)
1291 goto error;
1292 }
1293
1294 /*
1295 * Let's try using as much space as possible.
1296 * Use MTU if total length of the message fits into the MTU.
1297 * Otherwise, we need to reserve fragment header and
1298 * fragment alignment (= 8-15 octects, in total).
1299 *
1300 * Note that we may need to "move" the data from the tail of
1301 * of the buffer to the new fragment when we split
1302 * the message.
1303 *
1304 * FIXME: It may be fragmented into multiple chunks
1305 * at once if non-fragmentable extension headers
1306 * are too large.
1307 * --yoshfuji
1308 */
1309
1310 cork->length += length;
1311 if (length > mtu) {
1312 int proto = sk->sk_protocol;
1313 if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
1314 ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
1315 return -EMSGSIZE;
1316 }
1317
1318 if (proto == IPPROTO_UDP &&
1319 (rt->dst.dev->features & NETIF_F_UFO)) {
1320
1321 err = ip6_ufo_append_data(sk, getfrag, from, length,
1322 hh_len, fragheaderlen,
1323 transhdrlen, mtu, flags, rt);
1324 if (err)
1325 goto error;
1326 return 0;
1327 }
1328 }
1329
1330 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1331 goto alloc_new_skb;
1332
1333 while (length > 0) {
1334 /* Check if the remaining data fits into current packet. */
1335 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1336 if (copy < length)
1337 copy = maxfraglen - skb->len;
1338
1339 if (copy <= 0) {
1340 char *data;
1341 unsigned int datalen;
1342 unsigned int fraglen;
1343 unsigned int fraggap;
1344 unsigned int alloclen;
1345 struct sk_buff *skb_prev;
1346 alloc_new_skb:
1347 skb_prev = skb;
1348
1349 /* There's no room in the current skb */
1350 if (skb_prev)
1351 fraggap = skb_prev->len - maxfraglen;
1352 else
1353 fraggap = 0;
1354
1355 /*
1356 * If remaining data exceeds the mtu,
1357 * we know we need more fragment(s).
1358 */
1359 datalen = length + fraggap;
1360 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1361 datalen = maxfraglen - fragheaderlen;
1362
1363 fraglen = datalen + fragheaderlen;
1364 if ((flags & MSG_MORE) &&
1365 !(rt->dst.dev->features&NETIF_F_SG))
1366 alloclen = mtu;
1367 else
1368 alloclen = datalen + fragheaderlen;
1369
1370 alloclen += dst_exthdrlen;
1371
1372 /*
1373 * The last fragment gets additional space at tail.
1374 * Note: we overallocate on fragments with MSG_MODE
1375 * because we have no idea if we're the last one.
1376 */
1377 if (datalen == length + fraggap)
1378 alloclen += rt->dst.trailer_len;
1379
1380 /*
1381 * We just reserve space for fragment header.
1382 * Note: this may be overallocation if the message
1383 * (without MSG_MORE) fits into the MTU.
1384 */
1385 alloclen += sizeof(struct frag_hdr);
1386
1387 if (transhdrlen) {
1388 skb = sock_alloc_send_skb(sk,
1389 alloclen + hh_len,
1390 (flags & MSG_DONTWAIT), &err);
1391 } else {
1392 skb = NULL;
1393 if (atomic_read(&sk->sk_wmem_alloc) <=
1394 2 * sk->sk_sndbuf)
1395 skb = sock_wmalloc(sk,
1396 alloclen + hh_len, 1,
1397 sk->sk_allocation);
1398 if (unlikely(skb == NULL))
1399 err = -ENOBUFS;
1400 else {
1401 /* Only the initial fragment
1402 * is time stamped.
1403 */
1404 tx_flags = 0;
1405 }
1406 }
1407 if (skb == NULL)
1408 goto error;
1409 /*
1410 * Fill in the control structures
1411 */
1412 skb->ip_summed = csummode;
1413 skb->csum = 0;
1414 /* reserve for fragmentation */
1415 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1416
1417 if (sk->sk_type == SOCK_DGRAM)
1418 skb_shinfo(skb)->tx_flags = tx_flags;
1419
1420 /*
1421 * Find where to start putting bytes
1422 */
1423 data = skb_put(skb, fraglen + dst_exthdrlen);
1424 skb_set_network_header(skb, exthdrlen + dst_exthdrlen);
1425 data += fragheaderlen + dst_exthdrlen;
1426 skb->transport_header = (skb->network_header +
1427 fragheaderlen);
1428 if (fraggap) {
1429 skb->csum = skb_copy_and_csum_bits(
1430 skb_prev, maxfraglen,
1431 data + transhdrlen, fraggap, 0);
1432 skb_prev->csum = csum_sub(skb_prev->csum,
1433 skb->csum);
1434 data += fraggap;
1435 pskb_trim_unique(skb_prev, maxfraglen);
1436 }
1437 copy = datalen - transhdrlen - fraggap;
1438
1439 if (copy < 0) {
1440 err = -EINVAL;
1441 kfree_skb(skb);
1442 goto error;
1443 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1444 err = -EFAULT;
1445 kfree_skb(skb);
1446 goto error;
1447 }
1448
1449 offset += copy;
1450 length -= datalen - fraggap;
1451 transhdrlen = 0;
1452 exthdrlen = 0;
1453 dst_exthdrlen = 0;
1454 csummode = CHECKSUM_NONE;
1455
1456 /*
1457 * Put the packet on the pending queue
1458 */
1459 __skb_queue_tail(&sk->sk_write_queue, skb);
1460 continue;
1461 }
1462
1463 if (copy > length)
1464 copy = length;
1465
1466 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1467 unsigned int off;
1468
1469 off = skb->len;
1470 if (getfrag(from, skb_put(skb, copy),
1471 offset, copy, off, skb) < 0) {
1472 __skb_trim(skb, off);
1473 err = -EFAULT;
1474 goto error;
1475 }
1476 } else {
1477 int i = skb_shinfo(skb)->nr_frags;
1478 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1479 struct page *page = sk->sk_sndmsg_page;
1480 int off = sk->sk_sndmsg_off;
1481 unsigned int left;
1482
1483 if (page && (left = PAGE_SIZE - off) > 0) {
1484 if (copy >= left)
1485 copy = left;
1486 if (page != skb_frag_page(frag)) {
1487 if (i == MAX_SKB_FRAGS) {
1488 err = -EMSGSIZE;
1489 goto error;
1490 }
1491 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1492 skb_frag_ref(skb, i);
1493 frag = &skb_shinfo(skb)->frags[i];
1494 }
1495 } else if(i < MAX_SKB_FRAGS) {
1496 if (copy > PAGE_SIZE)
1497 copy = PAGE_SIZE;
1498 page = alloc_pages(sk->sk_allocation, 0);
1499 if (page == NULL) {
1500 err = -ENOMEM;
1501 goto error;
1502 }
1503 sk->sk_sndmsg_page = page;
1504 sk->sk_sndmsg_off = 0;
1505
1506 skb_fill_page_desc(skb, i, page, 0, 0);
1507 frag = &skb_shinfo(skb)->frags[i];
1508 } else {
1509 err = -EMSGSIZE;
1510 goto error;
1511 }
1512 if (getfrag(from,
1513 skb_frag_address(frag) + skb_frag_size(frag),
1514 offset, copy, skb->len, skb) < 0) {
1515 err = -EFAULT;
1516 goto error;
1517 }
1518 sk->sk_sndmsg_off += copy;
1519 skb_frag_size_add(frag, copy);
1520 skb->len += copy;
1521 skb->data_len += copy;
1522 skb->truesize += copy;
1523 atomic_add(copy, &sk->sk_wmem_alloc);
1524 }
1525 offset += copy;
1526 length -= copy;
1527 }
1528 return 0;
1529 error:
1530 cork->length -= length;
1531 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1532 return err;
1533 }
1534
1535 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1536 {
1537 if (np->cork.opt) {
1538 kfree(np->cork.opt->dst0opt);
1539 kfree(np->cork.opt->dst1opt);
1540 kfree(np->cork.opt->hopopt);
1541 kfree(np->cork.opt->srcrt);
1542 kfree(np->cork.opt);
1543 np->cork.opt = NULL;
1544 }
1545
1546 if (inet->cork.base.dst) {
1547 dst_release(inet->cork.base.dst);
1548 inet->cork.base.dst = NULL;
1549 inet->cork.base.flags &= ~IPCORK_ALLFRAG;
1550 }
1551 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1552 }
1553
1554 int ip6_push_pending_frames(struct sock *sk)
1555 {
1556 struct sk_buff *skb, *tmp_skb;
1557 struct sk_buff **tail_skb;
1558 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1559 struct inet_sock *inet = inet_sk(sk);
1560 struct ipv6_pinfo *np = inet6_sk(sk);
1561 struct net *net = sock_net(sk);
1562 struct ipv6hdr *hdr;
1563 struct ipv6_txoptions *opt = np->cork.opt;
1564 struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
1565 struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
1566 unsigned char proto = fl6->flowi6_proto;
1567 int err = 0;
1568
1569 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1570 goto out;
1571 tail_skb = &(skb_shinfo(skb)->frag_list);
1572
1573 /* move skb->data to ip header from ext header */
1574 if (skb->data < skb_network_header(skb))
1575 __skb_pull(skb, skb_network_offset(skb));
1576 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1577 __skb_pull(tmp_skb, skb_network_header_len(skb));
1578 *tail_skb = tmp_skb;
1579 tail_skb = &(tmp_skb->next);
1580 skb->len += tmp_skb->len;
1581 skb->data_len += tmp_skb->len;
1582 skb->truesize += tmp_skb->truesize;
1583 tmp_skb->destructor = NULL;
1584 tmp_skb->sk = NULL;
1585 }
1586
1587 /* Allow local fragmentation. */
1588 if (np->pmtudisc < IPV6_PMTUDISC_DO)
1589 skb->local_df = 1;
1590
1591 ipv6_addr_copy(final_dst, &fl6->daddr);
1592 __skb_pull(skb, skb_network_header_len(skb));
1593 if (opt && opt->opt_flen)
1594 ipv6_push_frag_opts(skb, opt, &proto);
1595 if (opt && opt->opt_nflen)
1596 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1597
1598 skb_push(skb, sizeof(struct ipv6hdr));
1599 skb_reset_network_header(skb);
1600 hdr = ipv6_hdr(skb);
1601
1602 *(__be32*)hdr = fl6->flowlabel |
1603 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1604
1605 hdr->hop_limit = np->cork.hop_limit;
1606 hdr->nexthdr = proto;
1607 ipv6_addr_copy(&hdr->saddr, &fl6->saddr);
1608 ipv6_addr_copy(&hdr->daddr, final_dst);
1609
1610 skb->priority = sk->sk_priority;
1611 skb->mark = sk->sk_mark;
1612
1613 skb_dst_set(skb, dst_clone(&rt->dst));
1614 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1615 if (proto == IPPROTO_ICMPV6) {
1616 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1617
1618 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1619 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1620 }
1621
1622 err = ip6_local_out(skb);
1623 if (err) {
1624 if (err > 0)
1625 err = net_xmit_errno(err);
1626 if (err)
1627 goto error;
1628 }
1629
1630 out:
1631 ip6_cork_release(inet, np);
1632 return err;
1633 error:
1634 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1635 goto out;
1636 }
1637
1638 void ip6_flush_pending_frames(struct sock *sk)
1639 {
1640 struct sk_buff *skb;
1641
1642 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1643 if (skb_dst(skb))
1644 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1645 IPSTATS_MIB_OUTDISCARDS);
1646 kfree_skb(skb);
1647 }
1648
1649 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1650 }
This page took 0.094161 seconds and 6 git commands to generate.