ipv6: Do not assign non-valid address on interface.
[deliverable/linux.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
1ab1457c 3 * Linux INET6 implementation
1da177e4
LT
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
1da177e4 29#include <linux/errno.h>
ef76bc23 30#include <linux/kernel.h>
1da177e4
LT
31#include <linux/string.h>
32#include <linux/socket.h>
33#include <linux/net.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/in6.h>
37#include <linux/tcp.h>
38#include <linux/route.h>
b59f45d0 39#include <linux/module.h>
1da177e4
LT
40
41#include <linux/netfilter.h>
42#include <linux/netfilter_ipv6.h>
43
44#include <net/sock.h>
45#include <net/snmp.h>
46
47#include <net/ipv6.h>
48#include <net/ndisc.h>
49#include <net/protocol.h>
50#include <net/ip6_route.h>
51#include <net/addrconf.h>
52#include <net/rawv6.h>
53#include <net/icmp.h>
54#include <net/xfrm.h>
55#include <net/checksum.h>
7bc570c8 56#include <linux/mroute6.h>
1da177e4
LT
57
58static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
59
60static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
61{
62 static u32 ipv6_fragmentation_id = 1;
63 static DEFINE_SPINLOCK(ip6_id_lock);
64
65 spin_lock_bh(&ip6_id_lock);
66 fhdr->identification = htonl(ipv6_fragmentation_id);
67 if (++ipv6_fragmentation_id == 0)
68 ipv6_fragmentation_id = 1;
69 spin_unlock_bh(&ip6_id_lock);
70}
71
ef76bc23
HX
72int __ip6_local_out(struct sk_buff *skb)
73{
74 int len;
75
76 len = skb->len - sizeof(struct ipv6hdr);
77 if (len > IPV6_MAXPLEN)
78 len = 0;
79 ipv6_hdr(skb)->payload_len = htons(len);
80
6e23ae2a 81 return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev,
ef76bc23
HX
82 dst_output);
83}
84
85int ip6_local_out(struct sk_buff *skb)
86{
87 int err;
88
89 err = __ip6_local_out(skb);
90 if (likely(err == 1))
91 err = dst_output(skb);
92
93 return err;
94}
95EXPORT_SYMBOL_GPL(ip6_local_out);
96
ad643a79 97static int ip6_output_finish(struct sk_buff *skb)
1da177e4 98{
1da177e4 99 struct dst_entry *dst = skb->dst;
1da177e4 100
3644f0ce
SH
101 if (dst->hh)
102 return neigh_hh_output(dst->hh, skb);
103 else if (dst->neighbour)
1da177e4
LT
104 return dst->neighbour->output(skb);
105
a11d206d 106 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
107 kfree_skb(skb);
108 return -EINVAL;
109
110}
111
112/* dev_loopback_xmit for use with netfilter. */
113static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
114{
459a98ed 115 skb_reset_mac_header(newskb);
bbe735e4 116 __skb_pull(newskb, skb_network_offset(newskb));
1da177e4
LT
117 newskb->pkt_type = PACKET_LOOPBACK;
118 newskb->ip_summed = CHECKSUM_UNNECESSARY;
119 BUG_TRAP(newskb->dst);
120
121 netif_rx(newskb);
122 return 0;
123}
124
125
126static int ip6_output2(struct sk_buff *skb)
127{
128 struct dst_entry *dst = skb->dst;
129 struct net_device *dev = dst->dev;
130
131 skb->protocol = htons(ETH_P_IPV6);
132 skb->dev = dev;
133
0660e03f 134 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
1da177e4 135 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
a11d206d 136 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1da177e4
LT
137
138 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
7bc570c8
YH
139 ((mroute6_socket && !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
140 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
141 &ipv6_hdr(skb)->saddr))) {
1da177e4
LT
142 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
143
144 /* Do not check for IFF_ALLMULTI; multicast routing
145 is not supported in any case.
146 */
147 if (newskb)
6e23ae2a
PM
148 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
149 NULL, newskb->dev,
1da177e4
LT
150 ip6_dev_loopback_xmit);
151
0660e03f 152 if (ipv6_hdr(skb)->hop_limit == 0) {
a11d206d 153 IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
154 kfree_skb(skb);
155 return 0;
156 }
157 }
158
a11d206d 159 IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS);
1da177e4
LT
160 }
161
6e23ae2a
PM
162 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
163 ip6_output_finish);
1da177e4
LT
164}
165
628a5c56
JH
166static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
167{
168 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
169
170 return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
171 skb->dst->dev->mtu : dst_mtu(skb->dst);
172}
173
1da177e4
LT
174int ip6_output(struct sk_buff *skb)
175{
628a5c56 176 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
e89e9cf5 177 dst_allfrag(skb->dst))
1da177e4
LT
178 return ip6_fragment(skb, ip6_output2);
179 else
180 return ip6_output2(skb);
181}
182
1da177e4
LT
183/*
184 * xmit an sk_buff (used by TCP)
185 */
186
187int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
188 struct ipv6_txoptions *opt, int ipfragok)
189{
b30bd282 190 struct ipv6_pinfo *np = inet6_sk(sk);
1da177e4
LT
191 struct in6_addr *first_hop = &fl->fl6_dst;
192 struct dst_entry *dst = skb->dst;
193 struct ipv6hdr *hdr;
194 u8 proto = fl->proto;
195 int seg_len = skb->len;
41a1f8ea 196 int hlimit, tclass;
1da177e4
LT
197 u32 mtu;
198
199 if (opt) {
c2636b4d 200 unsigned int head_room;
1da177e4
LT
201
202 /* First: exthdrs may take lots of space (~8K for now)
203 MAX_HEADER is not enough.
204 */
205 head_room = opt->opt_nflen + opt->opt_flen;
206 seg_len += head_room;
207 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
208
209 if (skb_headroom(skb) < head_room) {
210 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
a11d206d
YH
211 if (skb2 == NULL) {
212 IP6_INC_STATS(ip6_dst_idev(skb->dst),
213 IPSTATS_MIB_OUTDISCARDS);
214 kfree_skb(skb);
1da177e4
LT
215 return -ENOBUFS;
216 }
a11d206d
YH
217 kfree_skb(skb);
218 skb = skb2;
1da177e4
LT
219 if (sk)
220 skb_set_owner_w(skb, sk);
221 }
222 if (opt->opt_flen)
223 ipv6_push_frag_opts(skb, opt, &proto);
224 if (opt->opt_nflen)
225 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
226 }
227
e2d1bca7
ACM
228 skb_push(skb, sizeof(struct ipv6hdr));
229 skb_reset_network_header(skb);
0660e03f 230 hdr = ipv6_hdr(skb);
1da177e4
LT
231
232 /*
233 * Fill in the IPv6 header
234 */
235
1da177e4
LT
236 hlimit = -1;
237 if (np)
238 hlimit = np->hop_limit;
239 if (hlimit < 0)
6b75d090 240 hlimit = ip6_dst_hoplimit(dst);
1da177e4 241
41a1f8ea
YH
242 tclass = -1;
243 if (np)
244 tclass = np->tclass;
245 if (tclass < 0)
246 tclass = 0;
247
90bcaf7b 248 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
41a1f8ea 249
1da177e4
LT
250 hdr->payload_len = htons(seg_len);
251 hdr->nexthdr = proto;
252 hdr->hop_limit = hlimit;
253
254 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
255 ipv6_addr_copy(&hdr->daddr, first_hop);
256
a2c2064f 257 skb->priority = sk->sk_priority;
4a19ec58 258 skb->mark = sk->sk_mark;
a2c2064f 259
1da177e4 260 mtu = dst_mtu(dst);
89114afd 261 if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) {
a11d206d
YH
262 IP6_INC_STATS(ip6_dst_idev(skb->dst),
263 IPSTATS_MIB_OUTREQUESTS);
6e23ae2a 264 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
6869c4d8 265 dst_output);
1da177e4
LT
266 }
267
268 if (net_ratelimit())
269 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
270 skb->dev = dst->dev;
271 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
a11d206d 272 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
273 kfree_skb(skb);
274 return -EMSGSIZE;
275}
276
7159039a
YH
277EXPORT_SYMBOL(ip6_xmit);
278
1da177e4
LT
279/*
280 * To avoid extra problems ND packets are send through this
281 * routine. It's code duplication but I really want to avoid
282 * extra checks since ipv6_build_header is used by TCP (which
283 * is for us performance critical)
284 */
285
286int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
9acd9f3a 287 const struct in6_addr *saddr, const struct in6_addr *daddr,
1da177e4
LT
288 int proto, int len)
289{
290 struct ipv6_pinfo *np = inet6_sk(sk);
291 struct ipv6hdr *hdr;
292 int totlen;
293
294 skb->protocol = htons(ETH_P_IPV6);
295 skb->dev = dev;
296
297 totlen = len + sizeof(struct ipv6hdr);
298
55f79cc0
ACM
299 skb_reset_network_header(skb);
300 skb_put(skb, sizeof(struct ipv6hdr));
0660e03f 301 hdr = ipv6_hdr(skb);
1da177e4 302
ae08e1f0 303 *(__be32*)hdr = htonl(0x60000000);
1da177e4
LT
304
305 hdr->payload_len = htons(len);
306 hdr->nexthdr = proto;
307 hdr->hop_limit = np->hop_limit;
308
309 ipv6_addr_copy(&hdr->saddr, saddr);
310 ipv6_addr_copy(&hdr->daddr, daddr);
311
312 return 0;
313}
314
315static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
316{
317 struct ip6_ra_chain *ra;
318 struct sock *last = NULL;
319
320 read_lock(&ip6_ra_lock);
321 for (ra = ip6_ra_chain; ra; ra = ra->next) {
322 struct sock *sk = ra->sk;
0bd1b59b
AM
323 if (sk && ra->sel == sel &&
324 (!sk->sk_bound_dev_if ||
325 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
326 if (last) {
327 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
328 if (skb2)
329 rawv6_rcv(last, skb2);
330 }
331 last = sk;
332 }
333 }
334
335 if (last) {
336 rawv6_rcv(last, skb);
337 read_unlock(&ip6_ra_lock);
338 return 1;
339 }
340 read_unlock(&ip6_ra_lock);
341 return 0;
342}
343
e21e0b5f
VN
344static int ip6_forward_proxy_check(struct sk_buff *skb)
345{
0660e03f 346 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f
VN
347 u8 nexthdr = hdr->nexthdr;
348 int offset;
349
350 if (ipv6_ext_hdr(nexthdr)) {
351 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
352 if (offset < 0)
353 return 0;
354 } else
355 offset = sizeof(struct ipv6hdr);
356
357 if (nexthdr == IPPROTO_ICMPV6) {
358 struct icmp6hdr *icmp6;
359
d56f90a7
ACM
360 if (!pskb_may_pull(skb, (skb_network_header(skb) +
361 offset + 1 - skb->data)))
e21e0b5f
VN
362 return 0;
363
d56f90a7 364 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
365
366 switch (icmp6->icmp6_type) {
367 case NDISC_ROUTER_SOLICITATION:
368 case NDISC_ROUTER_ADVERTISEMENT:
369 case NDISC_NEIGHBOUR_SOLICITATION:
370 case NDISC_NEIGHBOUR_ADVERTISEMENT:
371 case NDISC_REDIRECT:
372 /* For reaction involving unicast neighbor discovery
373 * message destined to the proxied address, pass it to
374 * input function.
375 */
376 return 1;
377 default:
378 break;
379 }
380 }
381
74553b09
VN
382 /*
383 * The proxying router can't forward traffic sent to a link-local
384 * address, so signal the sender and discard the packet. This
385 * behavior is clarified by the MIPv6 specification.
386 */
387 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
388 dst_link_failure(skb);
389 return -1;
390 }
391
e21e0b5f
VN
392 return 0;
393}
394
1da177e4
LT
395static inline int ip6_forward_finish(struct sk_buff *skb)
396{
397 return dst_output(skb);
398}
399
400int ip6_forward(struct sk_buff *skb)
401{
402 struct dst_entry *dst = skb->dst;
0660e03f 403 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 404 struct inet6_skb_parm *opt = IP6CB(skb);
c346dca1 405 struct net *net = dev_net(dst->dev);
1ab1457c 406
1da177e4
LT
407 if (ipv6_devconf.forwarding == 0)
408 goto error;
409
4497b076
BH
410 if (skb_warn_if_lro(skb))
411 goto drop;
412
1da177e4 413 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
a11d206d 414 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
1da177e4
LT
415 goto drop;
416 }
417
35fc92a9 418 skb_forward_csum(skb);
1da177e4
LT
419
420 /*
421 * We DO NOT make any processing on
422 * RA packets, pushing them to user level AS IS
423 * without ane WARRANTY that application will be able
424 * to interpret them. The reason is that we
425 * cannot make anything clever here.
426 *
427 * We are not end-node, so that if packet contains
428 * AH/ESP, we cannot make anything.
429 * Defragmentation also would be mistake, RA packets
430 * cannot be fragmented, because there is no warranty
431 * that different fragments will go along one path. --ANK
432 */
433 if (opt->ra) {
d56f90a7 434 u8 *ptr = skb_network_header(skb) + opt->ra;
1da177e4
LT
435 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
436 return 0;
437 }
438
439 /*
440 * check and decrement ttl
441 */
442 if (hdr->hop_limit <= 1) {
443 /* Force OUTPUT device used as source address */
444 skb->dev = dst->dev;
445 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
446 0, skb->dev);
a11d206d 447 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
448
449 kfree_skb(skb);
450 return -ETIMEDOUT;
451 }
452
fbea49e1
YH
453 /* XXX: idev->cnf.proxy_ndp? */
454 if (ipv6_devconf.proxy_ndp &&
8a3edd80 455 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
456 int proxied = ip6_forward_proxy_check(skb);
457 if (proxied > 0)
e21e0b5f 458 return ip6_input(skb);
74553b09 459 else if (proxied < 0) {
a11d206d 460 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
74553b09
VN
461 goto drop;
462 }
e21e0b5f
VN
463 }
464
1da177e4 465 if (!xfrm6_route_forward(skb)) {
a11d206d 466 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
1da177e4
LT
467 goto drop;
468 }
469 dst = skb->dst;
470
471 /* IPv6 specs say nothing about it, but it is clear that we cannot
472 send redirects to source routed frames.
1e5dc146 473 We don't send redirects to frames decapsulated from IPsec.
1da177e4 474 */
1e5dc146
MN
475 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
476 !skb->sp) {
1da177e4
LT
477 struct in6_addr *target = NULL;
478 struct rt6_info *rt;
479 struct neighbour *n = dst->neighbour;
480
481 /*
482 * incoming and outgoing devices are the same
483 * send a redirect.
484 */
485
486 rt = (struct rt6_info *) dst;
487 if ((rt->rt6i_flags & RTF_GATEWAY))
488 target = (struct in6_addr*)&n->primary_key;
489 else
490 target = &hdr->daddr;
491
492 /* Limit redirects both by destination (here)
493 and by source (inside ndisc_send_redirect)
494 */
495 if (xrlim_allow(dst, 1*HZ))
496 ndisc_send_redirect(skb, n, target);
5bb1ab09
DS
497 } else {
498 int addrtype = ipv6_addr_type(&hdr->saddr);
499
1da177e4 500 /* This check is security critical. */
5bb1ab09
DS
501 if (addrtype & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK))
502 goto error;
503 if (addrtype & IPV6_ADDR_LINKLOCAL) {
504 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
505 ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
506 goto error;
507 }
1da177e4
LT
508 }
509
510 if (skb->len > dst_mtu(dst)) {
511 /* Again, force OUTPUT device used as source address */
512 skb->dev = dst->dev;
513 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
a11d206d
YH
514 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
515 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
516 kfree_skb(skb);
517 return -EMSGSIZE;
518 }
519
520 if (skb_cow(skb, dst->dev->hard_header_len)) {
a11d206d 521 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
522 goto drop;
523 }
524
0660e03f 525 hdr = ipv6_hdr(skb);
1da177e4
LT
526
527 /* Mangling hops number delayed to point after skb COW */
1ab1457c 528
1da177e4
LT
529 hdr->hop_limit--;
530
a11d206d 531 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
6e23ae2a
PM
532 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
533 ip6_forward_finish);
1da177e4
LT
534
535error:
a11d206d 536 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
537drop:
538 kfree_skb(skb);
539 return -EINVAL;
540}
541
542static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
543{
544 to->pkt_type = from->pkt_type;
545 to->priority = from->priority;
546 to->protocol = from->protocol;
1da177e4
LT
547 dst_release(to->dst);
548 to->dst = dst_clone(from->dst);
549 to->dev = from->dev;
82e91ffe 550 to->mark = from->mark;
1da177e4
LT
551
552#ifdef CONFIG_NET_SCHED
553 to->tc_index = from->tc_index;
554#endif
e7ac05f3 555 nf_copy(to, from);
ba9dda3a
JK
556#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
557 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
558 to->nf_trace = from->nf_trace;
559#endif
984bc16c 560 skb_copy_secmark(to, from);
1da177e4
LT
561}
562
563int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
564{
565 u16 offset = sizeof(struct ipv6hdr);
0660e03f
ACM
566 struct ipv6_opt_hdr *exthdr =
567 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
27a884dc 568 unsigned int packet_len = skb->tail - skb->network_header;
1da177e4 569 int found_rhdr = 0;
0660e03f 570 *nexthdr = &ipv6_hdr(skb)->nexthdr;
1da177e4
LT
571
572 while (offset + 1 <= packet_len) {
573
574 switch (**nexthdr) {
575
576 case NEXTHDR_HOP:
27637df9 577 break;
1da177e4 578 case NEXTHDR_ROUTING:
27637df9
MN
579 found_rhdr = 1;
580 break;
1da177e4 581 case NEXTHDR_DEST:
59fbb3a6 582#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
27637df9
MN
583 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
584 break;
585#endif
586 if (found_rhdr)
587 return offset;
1da177e4
LT
588 break;
589 default :
590 return offset;
591 }
27637df9
MN
592
593 offset += ipv6_optlen(exthdr);
594 *nexthdr = &exthdr->nexthdr;
d56f90a7
ACM
595 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
596 offset);
1da177e4
LT
597 }
598
599 return offset;
600}
601
602static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
603{
604 struct net_device *dev;
605 struct sk_buff *frag;
606 struct rt6_info *rt = (struct rt6_info*)skb->dst;
d91675f9 607 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
1da177e4
LT
608 struct ipv6hdr *tmp_hdr;
609 struct frag_hdr *fh;
610 unsigned int mtu, hlen, left, len;
ae08e1f0 611 __be32 frag_id = 0;
1da177e4
LT
612 int ptr, offset = 0, err=0;
613 u8 *prevhdr, nexthdr = 0;
614
615 dev = rt->u.dst.dev;
616 hlen = ip6_find_1stfragopt(skb, &prevhdr);
617 nexthdr = *prevhdr;
618
628a5c56 619 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
620
621 /* We must not fragment if the socket is set to force MTU discovery
622 * or if the skb it not generated by a local socket. (This last
623 * check should be redundant, but it's free.)
624 */
b5c15fc0 625 if (!skb->local_df) {
b881ef76
JH
626 skb->dev = skb->dst->dev;
627 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
628 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
629 kfree_skb(skb);
630 return -EMSGSIZE;
631 }
632
d91675f9
YH
633 if (np && np->frag_size < mtu) {
634 if (np->frag_size)
635 mtu = np->frag_size;
636 }
637 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4
LT
638
639 if (skb_shinfo(skb)->frag_list) {
640 int first_len = skb_pagelen(skb);
29ffe1a5 641 int truesizes = 0;
1da177e4
LT
642
643 if (first_len - hlen > mtu ||
644 ((first_len - hlen) & 7) ||
645 skb_cloned(skb))
646 goto slow_path;
647
648 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
649 /* Correct geometry. */
650 if (frag->len > mtu ||
651 ((frag->len & 7) && frag->next) ||
652 skb_headroom(frag) < hlen)
653 goto slow_path;
654
1da177e4
LT
655 /* Partially cloned skb? */
656 if (skb_shared(frag))
657 goto slow_path;
2fdba6b0
HX
658
659 BUG_ON(frag->sk);
660 if (skb->sk) {
661 sock_hold(skb->sk);
662 frag->sk = skb->sk;
663 frag->destructor = sock_wfree;
29ffe1a5 664 truesizes += frag->truesize;
2fdba6b0 665 }
1da177e4
LT
666 }
667
668 err = 0;
669 offset = 0;
670 frag = skb_shinfo(skb)->frag_list;
671 skb_shinfo(skb)->frag_list = NULL;
672 /* BUILD HEADER */
673
9a217a1c 674 *prevhdr = NEXTHDR_FRAGMENT;
d56f90a7 675 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
1da177e4 676 if (!tmp_hdr) {
a11d206d 677 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
678 return -ENOMEM;
679 }
680
1da177e4
LT
681 __skb_pull(skb, hlen);
682 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
e2d1bca7
ACM
683 __skb_push(skb, hlen);
684 skb_reset_network_header(skb);
d56f90a7 685 memcpy(skb_network_header(skb), tmp_hdr, hlen);
1da177e4
LT
686
687 ipv6_select_ident(skb, fh);
688 fh->nexthdr = nexthdr;
689 fh->reserved = 0;
690 fh->frag_off = htons(IP6_MF);
691 frag_id = fh->identification;
692
693 first_len = skb_pagelen(skb);
694 skb->data_len = first_len - skb_headlen(skb);
29ffe1a5 695 skb->truesize -= truesizes;
1da177e4 696 skb->len = first_len;
0660e03f
ACM
697 ipv6_hdr(skb)->payload_len = htons(first_len -
698 sizeof(struct ipv6hdr));
a11d206d
YH
699
700 dst_hold(&rt->u.dst);
1da177e4
LT
701
702 for (;;) {
703 /* Prepare header of the next frame,
704 * before previous one went down. */
705 if (frag) {
706 frag->ip_summed = CHECKSUM_NONE;
badff6d0 707 skb_reset_transport_header(frag);
1da177e4 708 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
e2d1bca7
ACM
709 __skb_push(frag, hlen);
710 skb_reset_network_header(frag);
d56f90a7
ACM
711 memcpy(skb_network_header(frag), tmp_hdr,
712 hlen);
1da177e4
LT
713 offset += skb->len - hlen - sizeof(struct frag_hdr);
714 fh->nexthdr = nexthdr;
715 fh->reserved = 0;
716 fh->frag_off = htons(offset);
717 if (frag->next != NULL)
718 fh->frag_off |= htons(IP6_MF);
719 fh->identification = frag_id;
0660e03f
ACM
720 ipv6_hdr(frag)->payload_len =
721 htons(frag->len -
722 sizeof(struct ipv6hdr));
1da177e4
LT
723 ip6_copy_metadata(frag, skb);
724 }
1ab1457c 725
1da177e4 726 err = output(skb);
dafee490 727 if(!err)
a11d206d 728 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGCREATES);
dafee490 729
1da177e4
LT
730 if (err || !frag)
731 break;
732
733 skb = frag;
734 frag = skb->next;
735 skb->next = NULL;
736 }
737
a51482bd 738 kfree(tmp_hdr);
1da177e4
LT
739
740 if (err == 0) {
a11d206d
YH
741 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGOKS);
742 dst_release(&rt->u.dst);
1da177e4
LT
743 return 0;
744 }
745
746 while (frag) {
747 skb = frag->next;
748 kfree_skb(frag);
749 frag = skb;
750 }
751
a11d206d
YH
752 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGFAILS);
753 dst_release(&rt->u.dst);
1da177e4
LT
754 return err;
755 }
756
757slow_path:
758 left = skb->len - hlen; /* Space per frame */
759 ptr = hlen; /* Where to start from */
760
761 /*
762 * Fragment the datagram.
763 */
764
765 *prevhdr = NEXTHDR_FRAGMENT;
766
767 /*
768 * Keep copying data until we run out.
769 */
770 while(left > 0) {
771 len = left;
772 /* IF: it doesn't fit, use 'mtu' - the data space left */
773 if (len > mtu)
774 len = mtu;
775 /* IF: we are not sending upto and including the packet end
776 then align the next start on an eight byte boundary */
777 if (len < left) {
778 len &= ~7;
779 }
780 /*
781 * Allocate buffer.
782 */
783
f5184d26 784 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
64ce2073 785 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
a11d206d
YH
786 IP6_INC_STATS(ip6_dst_idev(skb->dst),
787 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
788 err = -ENOMEM;
789 goto fail;
790 }
791
792 /*
793 * Set up data on packet
794 */
795
796 ip6_copy_metadata(frag, skb);
797 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
798 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
c1d2bbe1 799 skb_reset_network_header(frag);
badff6d0 800 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
b0e380b1
ACM
801 frag->transport_header = (frag->network_header + hlen +
802 sizeof(struct frag_hdr));
1da177e4
LT
803
804 /*
805 * Charge the memory for the fragment to any owner
806 * it might possess
807 */
808 if (skb->sk)
809 skb_set_owner_w(frag, skb->sk);
810
811 /*
812 * Copy the packet header into the new buffer.
813 */
d626f62b 814 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
1da177e4
LT
815
816 /*
817 * Build fragment header.
818 */
819 fh->nexthdr = nexthdr;
820 fh->reserved = 0;
f36d6ab1 821 if (!frag_id) {
1da177e4
LT
822 ipv6_select_ident(skb, fh);
823 frag_id = fh->identification;
824 } else
825 fh->identification = frag_id;
826
827 /*
828 * Copy a block of the IP datagram.
829 */
8984e41d 830 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
1da177e4
LT
831 BUG();
832 left -= len;
833
834 fh->frag_off = htons(offset);
835 if (left > 0)
836 fh->frag_off |= htons(IP6_MF);
0660e03f
ACM
837 ipv6_hdr(frag)->payload_len = htons(frag->len -
838 sizeof(struct ipv6hdr));
1da177e4
LT
839
840 ptr += len;
841 offset += len;
842
843 /*
844 * Put this fragment into the sending queue.
845 */
1da177e4
LT
846 err = output(frag);
847 if (err)
848 goto fail;
dafee490 849
a11d206d 850 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGCREATES);
1da177e4 851 }
a11d206d
YH
852 IP6_INC_STATS(ip6_dst_idev(skb->dst),
853 IPSTATS_MIB_FRAGOKS);
1da177e4 854 kfree_skb(skb);
1da177e4
LT
855 return err;
856
857fail:
a11d206d
YH
858 IP6_INC_STATS(ip6_dst_idev(skb->dst),
859 IPSTATS_MIB_FRAGFAILS);
1ab1457c 860 kfree_skb(skb);
1da177e4
LT
861 return err;
862}
863
cf6b1982
YH
864static inline int ip6_rt_check(struct rt6key *rt_key,
865 struct in6_addr *fl_addr,
866 struct in6_addr *addr_cache)
867{
868 return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
869 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
870}
871
497c615a
HX
872static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
873 struct dst_entry *dst,
874 struct flowi *fl)
1da177e4 875{
497c615a
HX
876 struct ipv6_pinfo *np = inet6_sk(sk);
877 struct rt6_info *rt = (struct rt6_info *)dst;
1da177e4 878
497c615a
HX
879 if (!dst)
880 goto out;
881
882 /* Yes, checking route validity in not connected
883 * case is not very simple. Take into account,
884 * that we do not support routing by source, TOS,
885 * and MSG_DONTROUTE --ANK (980726)
886 *
cf6b1982
YH
887 * 1. ip6_rt_check(): If route was host route,
888 * check that cached destination is current.
497c615a
HX
889 * If it is network route, we still may
890 * check its validity using saved pointer
891 * to the last used address: daddr_cache.
892 * We do not want to save whole address now,
893 * (because main consumer of this service
894 * is tcp, which has not this problem),
895 * so that the last trick works only on connected
896 * sockets.
897 * 2. oif also should be the same.
898 */
cf6b1982 899 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
8e1ef0a9
YH
900#ifdef CONFIG_IPV6_SUBTREES
901 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
902#endif
cf6b1982 903 (fl->oif && fl->oif != dst->dev->ifindex)) {
497c615a
HX
904 dst_release(dst);
905 dst = NULL;
1da177e4
LT
906 }
907
497c615a
HX
908out:
909 return dst;
910}
911
912static int ip6_dst_lookup_tail(struct sock *sk,
913 struct dst_entry **dst, struct flowi *fl)
914{
915 int err;
3b1e0a65 916 struct net *net = sock_net(sk);
497c615a 917
1da177e4 918 if (*dst == NULL)
8a3edd80 919 *dst = ip6_route_output(net, sk, fl);
1da177e4
LT
920
921 if ((err = (*dst)->error))
922 goto out_err_release;
923
924 if (ipv6_addr_any(&fl->fl6_src)) {
5e5f3f0f 925 err = ipv6_dev_get_saddr(ip6_dst_idev(*dst)->dev,
7cbca67c
YH
926 &fl->fl6_dst,
927 sk ? inet6_sk(sk)->srcprefs : 0,
928 &fl->fl6_src);
44456d37 929 if (err)
1da177e4 930 goto out_err_release;
1da177e4
LT
931 }
932
95c385b4
NH
933#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
934 /*
935 * Here if the dst entry we've looked up
936 * has a neighbour entry that is in the INCOMPLETE
937 * state and the src address from the flow is
938 * marked as OPTIMISTIC, we release the found
939 * dst entry and replace it instead with the
940 * dst entry of the nexthop router
941 */
942 if (!((*dst)->neighbour->nud_state & NUD_VALID)) {
943 struct inet6_ifaddr *ifp;
944 struct flowi fl_gw;
945 int redirect;
946
8a3edd80 947 ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
1cab3da6 948 (*dst)->dev, 1);
95c385b4
NH
949
950 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
951 if (ifp)
952 in6_ifa_put(ifp);
953
954 if (redirect) {
955 /*
956 * We need to get the dst entry for the
957 * default router instead
958 */
959 dst_release(*dst);
960 memcpy(&fl_gw, fl, sizeof(struct flowi));
961 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
8a3edd80 962 *dst = ip6_route_output(net, sk, &fl_gw);
95c385b4
NH
963 if ((err = (*dst)->error))
964 goto out_err_release;
965 }
966 }
967#endif
968
1da177e4
LT
969 return 0;
970
971out_err_release:
ca46f9c8
MC
972 if (err == -ENETUNREACH)
973 IP6_INC_STATS_BH(NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
974 dst_release(*dst);
975 *dst = NULL;
976 return err;
977}
34a0b3cd 978
497c615a
HX
979/**
980 * ip6_dst_lookup - perform route lookup on flow
981 * @sk: socket which provides route info
982 * @dst: pointer to dst_entry * for result
983 * @fl: flow to lookup
984 *
985 * This function performs a route lookup on the given flow.
986 *
987 * It returns zero on success, or a standard errno code on error.
988 */
989int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
990{
991 *dst = NULL;
992 return ip6_dst_lookup_tail(sk, dst, fl);
993}
3cf3dc6c
ACM
994EXPORT_SYMBOL_GPL(ip6_dst_lookup);
995
497c615a
HX
996/**
997 * ip6_sk_dst_lookup - perform socket cached route lookup on flow
998 * @sk: socket which provides the dst cache and route info
999 * @dst: pointer to dst_entry * for result
1000 * @fl: flow to lookup
1001 *
1002 * This function performs a route lookup on the given flow with the
1003 * possibility of using the cached route in the socket if it is valid.
1004 * It will take the socket dst lock when operating on the dst cache.
1005 * As a result, this function can only be used in process context.
1006 *
1007 * It returns zero on success, or a standard errno code on error.
1008 */
1009int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1010{
1011 *dst = NULL;
1012 if (sk) {
1013 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1014 *dst = ip6_sk_dst_check(sk, *dst, fl);
1015 }
1016
1017 return ip6_dst_lookup_tail(sk, dst, fl);
1018}
1019EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1020
34a0b3cd 1021static inline int ip6_ufo_append_data(struct sock *sk,
e89e9cf5
AR
1022 int getfrag(void *from, char *to, int offset, int len,
1023 int odd, struct sk_buff *skb),
1024 void *from, int length, int hh_len, int fragheaderlen,
1025 int transhdrlen, int mtu,unsigned int flags)
1026
1027{
1028 struct sk_buff *skb;
1029 int err;
1030
1031 /* There is support for UDP large send offload by network
1032 * device, so create one single skb packet containing complete
1033 * udp datagram
1034 */
1035 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1036 skb = sock_alloc_send_skb(sk,
1037 hh_len + fragheaderlen + transhdrlen + 20,
1038 (flags & MSG_DONTWAIT), &err);
1039 if (skb == NULL)
1040 return -ENOMEM;
1041
1042 /* reserve space for Hardware header */
1043 skb_reserve(skb, hh_len);
1044
1045 /* create space for UDP/IP header */
1046 skb_put(skb,fragheaderlen + transhdrlen);
1047
1048 /* initialize network header pointer */
c1d2bbe1 1049 skb_reset_network_header(skb);
e89e9cf5
AR
1050
1051 /* initialize protocol header pointer */
b0e380b1 1052 skb->transport_header = skb->network_header + fragheaderlen;
e89e9cf5 1053
84fa7933 1054 skb->ip_summed = CHECKSUM_PARTIAL;
e89e9cf5
AR
1055 skb->csum = 0;
1056 sk->sk_sndmsg_off = 0;
1057 }
1058
1059 err = skb_append_datato_frags(sk,skb, getfrag, from,
1060 (length - transhdrlen));
1061 if (!err) {
1062 struct frag_hdr fhdr;
1063
1064 /* specify the length of each IP datagram fragment*/
1ab1457c 1065 skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
7967168c 1066 sizeof(struct frag_hdr);
f83ef8c0 1067 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
e89e9cf5
AR
1068 ipv6_select_ident(skb, &fhdr);
1069 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1070 __skb_queue_tail(&sk->sk_write_queue, skb);
1071
1072 return 0;
1073 }
1074 /* There is not enough support do UPD LSO,
1075 * so follow normal path
1076 */
1077 kfree_skb(skb);
1078
1079 return err;
1080}
1da177e4 1081
41a1f8ea
YH
1082int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1083 int offset, int len, int odd, struct sk_buff *skb),
1084 void *from, int length, int transhdrlen,
1085 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1086 struct rt6_info *rt, unsigned int flags)
1da177e4
LT
1087{
1088 struct inet_sock *inet = inet_sk(sk);
1089 struct ipv6_pinfo *np = inet6_sk(sk);
1090 struct sk_buff *skb;
1091 unsigned int maxfraglen, fragheaderlen;
1092 int exthdrlen;
1093 int hh_len;
1094 int mtu;
1095 int copy;
1096 int err;
1097 int offset = 0;
1098 int csummode = CHECKSUM_NONE;
1099
1100 if (flags&MSG_PROBE)
1101 return 0;
1102 if (skb_queue_empty(&sk->sk_write_queue)) {
1103 /*
1104 * setup for corking
1105 */
1106 if (opt) {
1107 if (np->cork.opt == NULL) {
1108 np->cork.opt = kmalloc(opt->tot_len,
1109 sk->sk_allocation);
1110 if (unlikely(np->cork.opt == NULL))
1111 return -ENOBUFS;
1112 } else if (np->cork.opt->tot_len < opt->tot_len) {
1113 printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
1114 return -EINVAL;
1115 }
1116 memcpy(np->cork.opt, opt, opt->tot_len);
1117 inet->cork.flags |= IPCORK_OPT;
1118 /* need source address above miyazawa*/
1119 }
1120 dst_hold(&rt->u.dst);
c8cdaf99 1121 inet->cork.dst = &rt->u.dst;
1da177e4
LT
1122 inet->cork.fl = *fl;
1123 np->cork.hop_limit = hlimit;
41a1f8ea 1124 np->cork.tclass = tclass;
628a5c56
JH
1125 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1126 rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
c7503609 1127 if (np->frag_size < mtu) {
d91675f9
YH
1128 if (np->frag_size)
1129 mtu = np->frag_size;
1130 }
1131 inet->cork.fragsize = mtu;
1da177e4
LT
1132 if (dst_allfrag(rt->u.dst.path))
1133 inet->cork.flags |= IPCORK_ALLFRAG;
1134 inet->cork.length = 0;
1135 sk->sk_sndmsg_page = NULL;
1136 sk->sk_sndmsg_off = 0;
01488942 1137 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
a1b05140 1138 rt->rt6i_nfheader_len;
1da177e4
LT
1139 length += exthdrlen;
1140 transhdrlen += exthdrlen;
1141 } else {
c8cdaf99 1142 rt = (struct rt6_info *)inet->cork.dst;
1da177e4
LT
1143 fl = &inet->cork.fl;
1144 if (inet->cork.flags & IPCORK_OPT)
1145 opt = np->cork.opt;
1146 transhdrlen = 0;
1147 exthdrlen = 0;
1148 mtu = inet->cork.fragsize;
1149 }
1150
1151 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1152
a1b05140 1153 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1154 (opt ? opt->opt_nflen : 0);
1da177e4
LT
1155 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1156
1157 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1158 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1159 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1160 return -EMSGSIZE;
1161 }
1162 }
1163
1164 /*
1165 * Let's try using as much space as possible.
1166 * Use MTU if total length of the message fits into the MTU.
1167 * Otherwise, we need to reserve fragment header and
1168 * fragment alignment (= 8-15 octects, in total).
1169 *
1170 * Note that we may need to "move" the data from the tail of
1ab1457c 1171 * of the buffer to the new fragment when we split
1da177e4
LT
1172 * the message.
1173 *
1ab1457c 1174 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1175 * at once if non-fragmentable extension headers
1176 * are too large.
1ab1457c 1177 * --yoshfuji
1da177e4
LT
1178 */
1179
1180 inet->cork.length += length;
e89e9cf5
AR
1181 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1182 (rt->u.dst.dev->features & NETIF_F_UFO)) {
1183
baa829d8
PM
1184 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1185 fragheaderlen, transhdrlen, mtu,
1186 flags);
1187 if (err)
e89e9cf5 1188 goto error;
e89e9cf5
AR
1189 return 0;
1190 }
1da177e4
LT
1191
1192 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1193 goto alloc_new_skb;
1194
1195 while (length > 0) {
1196 /* Check if the remaining data fits into current packet. */
1197 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1198 if (copy < length)
1199 copy = maxfraglen - skb->len;
1200
1201 if (copy <= 0) {
1202 char *data;
1203 unsigned int datalen;
1204 unsigned int fraglen;
1205 unsigned int fraggap;
1206 unsigned int alloclen;
1207 struct sk_buff *skb_prev;
1208alloc_new_skb:
1209 skb_prev = skb;
1210
1211 /* There's no room in the current skb */
1212 if (skb_prev)
1213 fraggap = skb_prev->len - maxfraglen;
1214 else
1215 fraggap = 0;
1216
1217 /*
1218 * If remaining data exceeds the mtu,
1219 * we know we need more fragment(s).
1220 */
1221 datalen = length + fraggap;
1222 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1223 datalen = maxfraglen - fragheaderlen;
1224
1225 fraglen = datalen + fragheaderlen;
1226 if ((flags & MSG_MORE) &&
1227 !(rt->u.dst.dev->features&NETIF_F_SG))
1228 alloclen = mtu;
1229 else
1230 alloclen = datalen + fragheaderlen;
1231
1232 /*
1233 * The last fragment gets additional space at tail.
1234 * Note: we overallocate on fragments with MSG_MODE
1235 * because we have no idea if we're the last one.
1236 */
1237 if (datalen == length + fraggap)
1238 alloclen += rt->u.dst.trailer_len;
1239
1240 /*
1241 * We just reserve space for fragment header.
1ab1457c 1242 * Note: this may be overallocation if the message
1da177e4
LT
1243 * (without MSG_MORE) fits into the MTU.
1244 */
1245 alloclen += sizeof(struct frag_hdr);
1246
1247 if (transhdrlen) {
1248 skb = sock_alloc_send_skb(sk,
1249 alloclen + hh_len,
1250 (flags & MSG_DONTWAIT), &err);
1251 } else {
1252 skb = NULL;
1253 if (atomic_read(&sk->sk_wmem_alloc) <=
1254 2 * sk->sk_sndbuf)
1255 skb = sock_wmalloc(sk,
1256 alloclen + hh_len, 1,
1257 sk->sk_allocation);
1258 if (unlikely(skb == NULL))
1259 err = -ENOBUFS;
1260 }
1261 if (skb == NULL)
1262 goto error;
1263 /*
1264 * Fill in the control structures
1265 */
1266 skb->ip_summed = csummode;
1267 skb->csum = 0;
1268 /* reserve for fragmentation */
1269 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1270
1271 /*
1272 * Find where to start putting bytes
1273 */
1274 data = skb_put(skb, fraglen);
c14d2450 1275 skb_set_network_header(skb, exthdrlen);
1da177e4 1276 data += fragheaderlen;
b0e380b1
ACM
1277 skb->transport_header = (skb->network_header +
1278 fragheaderlen);
1da177e4
LT
1279 if (fraggap) {
1280 skb->csum = skb_copy_and_csum_bits(
1281 skb_prev, maxfraglen,
1282 data + transhdrlen, fraggap, 0);
1283 skb_prev->csum = csum_sub(skb_prev->csum,
1284 skb->csum);
1285 data += fraggap;
e9fa4f7b 1286 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4
LT
1287 }
1288 copy = datalen - transhdrlen - fraggap;
1289 if (copy < 0) {
1290 err = -EINVAL;
1291 kfree_skb(skb);
1292 goto error;
1293 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1294 err = -EFAULT;
1295 kfree_skb(skb);
1296 goto error;
1297 }
1298
1299 offset += copy;
1300 length -= datalen - fraggap;
1301 transhdrlen = 0;
1302 exthdrlen = 0;
1303 csummode = CHECKSUM_NONE;
1304
1305 /*
1306 * Put the packet on the pending queue
1307 */
1308 __skb_queue_tail(&sk->sk_write_queue, skb);
1309 continue;
1310 }
1311
1312 if (copy > length)
1313 copy = length;
1314
1315 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1316 unsigned int off;
1317
1318 off = skb->len;
1319 if (getfrag(from, skb_put(skb, copy),
1320 offset, copy, off, skb) < 0) {
1321 __skb_trim(skb, off);
1322 err = -EFAULT;
1323 goto error;
1324 }
1325 } else {
1326 int i = skb_shinfo(skb)->nr_frags;
1327 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1328 struct page *page = sk->sk_sndmsg_page;
1329 int off = sk->sk_sndmsg_off;
1330 unsigned int left;
1331
1332 if (page && (left = PAGE_SIZE - off) > 0) {
1333 if (copy >= left)
1334 copy = left;
1335 if (page != frag->page) {
1336 if (i == MAX_SKB_FRAGS) {
1337 err = -EMSGSIZE;
1338 goto error;
1339 }
1340 get_page(page);
1341 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1342 frag = &skb_shinfo(skb)->frags[i];
1343 }
1344 } else if(i < MAX_SKB_FRAGS) {
1345 if (copy > PAGE_SIZE)
1346 copy = PAGE_SIZE;
1347 page = alloc_pages(sk->sk_allocation, 0);
1348 if (page == NULL) {
1349 err = -ENOMEM;
1350 goto error;
1351 }
1352 sk->sk_sndmsg_page = page;
1353 sk->sk_sndmsg_off = 0;
1354
1355 skb_fill_page_desc(skb, i, page, 0, 0);
1356 frag = &skb_shinfo(skb)->frags[i];
1da177e4
LT
1357 } else {
1358 err = -EMSGSIZE;
1359 goto error;
1360 }
1361 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1362 err = -EFAULT;
1363 goto error;
1364 }
1365 sk->sk_sndmsg_off += copy;
1366 frag->size += copy;
1367 skb->len += copy;
1368 skb->data_len += copy;
f945fa7a
HX
1369 skb->truesize += copy;
1370 atomic_add(copy, &sk->sk_wmem_alloc);
1da177e4
LT
1371 }
1372 offset += copy;
1373 length -= copy;
1374 }
1375 return 0;
1376error:
1377 inet->cork.length -= length;
a11d206d 1378 IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1379 return err;
1380}
1381
bf138862
PE
1382static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1383{
1384 inet->cork.flags &= ~IPCORK_OPT;
1385 kfree(np->cork.opt);
1386 np->cork.opt = NULL;
c8cdaf99
YH
1387 if (inet->cork.dst) {
1388 dst_release(inet->cork.dst);
1389 inet->cork.dst = NULL;
bf138862
PE
1390 inet->cork.flags &= ~IPCORK_ALLFRAG;
1391 }
1392 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1393}
1394
1da177e4
LT
1395int ip6_push_pending_frames(struct sock *sk)
1396{
1397 struct sk_buff *skb, *tmp_skb;
1398 struct sk_buff **tail_skb;
1399 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1400 struct inet_sock *inet = inet_sk(sk);
1401 struct ipv6_pinfo *np = inet6_sk(sk);
1402 struct ipv6hdr *hdr;
1403 struct ipv6_txoptions *opt = np->cork.opt;
c8cdaf99 1404 struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1da177e4
LT
1405 struct flowi *fl = &inet->cork.fl;
1406 unsigned char proto = fl->proto;
1407 int err = 0;
1408
1409 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1410 goto out;
1411 tail_skb = &(skb_shinfo(skb)->frag_list);
1412
1413 /* move skb->data to ip header from ext header */
d56f90a7 1414 if (skb->data < skb_network_header(skb))
bbe735e4 1415 __skb_pull(skb, skb_network_offset(skb));
1da177e4 1416 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
cfe1fc77 1417 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1418 *tail_skb = tmp_skb;
1419 tail_skb = &(tmp_skb->next);
1420 skb->len += tmp_skb->len;
1421 skb->data_len += tmp_skb->len;
1da177e4
LT
1422 skb->truesize += tmp_skb->truesize;
1423 __sock_put(tmp_skb->sk);
1424 tmp_skb->destructor = NULL;
1425 tmp_skb->sk = NULL;
1da177e4
LT
1426 }
1427
28a89453 1428 /* Allow local fragmentation. */
b5c15fc0 1429 if (np->pmtudisc < IPV6_PMTUDISC_DO)
28a89453
HX
1430 skb->local_df = 1;
1431
1da177e4 1432 ipv6_addr_copy(final_dst, &fl->fl6_dst);
cfe1fc77 1433 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1434 if (opt && opt->opt_flen)
1435 ipv6_push_frag_opts(skb, opt, &proto);
1436 if (opt && opt->opt_nflen)
1437 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1438
e2d1bca7
ACM
1439 skb_push(skb, sizeof(struct ipv6hdr));
1440 skb_reset_network_header(skb);
0660e03f 1441 hdr = ipv6_hdr(skb);
1ab1457c 1442
90bcaf7b 1443 *(__be32*)hdr = fl->fl6_flowlabel |
41a1f8ea 1444 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1da177e4 1445
1da177e4
LT
1446 hdr->hop_limit = np->cork.hop_limit;
1447 hdr->nexthdr = proto;
1448 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1449 ipv6_addr_copy(&hdr->daddr, final_dst);
1450
a2c2064f 1451 skb->priority = sk->sk_priority;
4a19ec58 1452 skb->mark = sk->sk_mark;
a2c2064f 1453
1da177e4 1454 skb->dst = dst_clone(&rt->u.dst);
a11d206d 1455 IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
14878f75
DS
1456 if (proto == IPPROTO_ICMPV6) {
1457 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1458
1459 ICMP6MSGOUT_INC_STATS_BH(idev, icmp6_hdr(skb)->icmp6_type);
1460 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
1461 }
1462
ef76bc23 1463 err = ip6_local_out(skb);
1da177e4
LT
1464 if (err) {
1465 if (err > 0)
3320da89 1466 err = np->recverr ? net_xmit_errno(err) : 0;
1da177e4
LT
1467 if (err)
1468 goto error;
1469 }
1470
1471out:
bf138862 1472 ip6_cork_release(inet, np);
1da177e4
LT
1473 return err;
1474error:
1475 goto out;
1476}
1477
1478void ip6_flush_pending_frames(struct sock *sk)
1479{
1da177e4
LT
1480 struct sk_buff *skb;
1481
1482 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
e1f52208
YH
1483 if (skb->dst)
1484 IP6_INC_STATS(ip6_dst_idev(skb->dst),
1485 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1486 kfree_skb(skb);
1487 }
1488
bf138862 1489 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1da177e4 1490}
This page took 0.591707 seconds and 5 git commands to generate.