ac3200: fix error path
[deliverable/linux.git] / net / ipv4 / tcp_ipv4.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
1da177e4
LT
8 * IPv4 specific functions
9 *
10 *
11 * code split from:
12 * linux/ipv4/tcp.c
13 * linux/ipv4/tcp_input.c
14 * linux/ipv4/tcp_output.c
15 *
16 * See tcp.c for author information
17 *
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation; either version
21 * 2 of the License, or (at your option) any later version.
22 */
23
24/*
25 * Changes:
26 * David S. Miller : New socket lookup architecture.
27 * This code is dedicated to John Dyson.
28 * David S. Miller : Change semantics of established hash,
29 * half is devoted to TIME_WAIT sockets
30 * and the rest go in the other half.
31 * Andi Kleen : Add support for syncookies and fixed
32 * some bugs: ip options weren't passed to
33 * the TCP layer, missed a check for an
34 * ACK bit.
35 * Andi Kleen : Implemented fast path mtu discovery.
36 * Fixed many serious bugs in the
60236fdd 37 * request_sock handling and moved
1da177e4
LT
38 * most of it into the af independent code.
39 * Added tail drop and some other bugfixes.
caa20d9a 40 * Added new listen semantics.
1da177e4
LT
41 * Mike McLagan : Routing by source
42 * Juan Jose Ciarlante: ip_dynaddr bits
43 * Andi Kleen: various fixes.
44 * Vitaly E. Lavrov : Transparent proxy revived after year
45 * coma.
46 * Andi Kleen : Fix new listen.
47 * Andi Kleen : Fix accept error reporting.
48 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
49 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
50 * a single port at the same time.
51 */
52
1da177e4 53
eb4dea58 54#include <linux/bottom_half.h>
1da177e4
LT
55#include <linux/types.h>
56#include <linux/fcntl.h>
57#include <linux/module.h>
58#include <linux/random.h>
59#include <linux/cache.h>
60#include <linux/jhash.h>
61#include <linux/init.h>
62#include <linux/times.h>
5a0e3ad6 63#include <linux/slab.h>
1da177e4 64
457c4cbc 65#include <net/net_namespace.h>
1da177e4 66#include <net/icmp.h>
304a1618 67#include <net/inet_hashtables.h>
1da177e4 68#include <net/tcp.h>
20380731 69#include <net/transp_v6.h>
1da177e4
LT
70#include <net/ipv6.h>
71#include <net/inet_common.h>
6d6ee43e 72#include <net/timewait_sock.h>
1da177e4 73#include <net/xfrm.h>
1a2449a8 74#include <net/netdma.h>
1da177e4
LT
75
76#include <linux/inet.h>
77#include <linux/ipv6.h>
78#include <linux/stddef.h>
79#include <linux/proc_fs.h>
80#include <linux/seq_file.h>
81
cfb6eeb4
YH
82#include <linux/crypto.h>
83#include <linux/scatterlist.h>
84
ab32ea5d
BH
85int sysctl_tcp_tw_reuse __read_mostly;
86int sysctl_tcp_low_latency __read_mostly;
1da177e4 87
1da177e4 88
cfb6eeb4 89#ifdef CONFIG_TCP_MD5SIG
7174259e
ACM
90static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
91 __be32 addr);
49a72dfb
AL
92static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
93 __be32 daddr, __be32 saddr, struct tcphdr *th);
9501f972
YH
94#else
95static inline
96struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
97{
98 return NULL;
99}
cfb6eeb4
YH
100#endif
101
5caea4ea 102struct inet_hashinfo tcp_hashinfo;
1da177e4 103
a94f723d 104static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
1da177e4 105{
eddc9ec5
ACM
106 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
107 ip_hdr(skb)->saddr,
aa8223c7
ACM
108 tcp_hdr(skb)->dest,
109 tcp_hdr(skb)->source);
1da177e4
LT
110}
111
6d6ee43e
ACM
112int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
113{
114 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
115 struct tcp_sock *tp = tcp_sk(sk);
116
117 /* With PAWS, it is safe from the viewpoint
118 of data integrity. Even without PAWS it is safe provided sequence
119 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
120
121 Actually, the idea is close to VJ's one, only timestamp cache is
122 held not per host, but per port pair and TW bucket is used as state
123 holder.
124
125 If TW bucket has been already destroyed we fall back to VJ's scheme
126 and use initial timestamp retrieved from peer table.
127 */
128 if (tcptw->tw_ts_recent_stamp &&
129 (twp == NULL || (sysctl_tcp_tw_reuse &&
9d729f72 130 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
6d6ee43e
ACM
131 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
132 if (tp->write_seq == 0)
133 tp->write_seq = 1;
134 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
135 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
136 sock_hold(sktw);
137 return 1;
138 }
139
140 return 0;
141}
142
143EXPORT_SYMBOL_GPL(tcp_twsk_unique);
144
1da177e4
LT
145/* This will initiate an outgoing connection. */
146int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
147{
148 struct inet_sock *inet = inet_sk(sk);
149 struct tcp_sock *tp = tcp_sk(sk);
150 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
151 struct rtable *rt;
bada8adc 152 __be32 daddr, nexthop;
1da177e4
LT
153 int tmp;
154 int err;
155
156 if (addr_len < sizeof(struct sockaddr_in))
157 return -EINVAL;
158
159 if (usin->sin_family != AF_INET)
160 return -EAFNOSUPPORT;
161
162 nexthop = daddr = usin->sin_addr.s_addr;
163 if (inet->opt && inet->opt->srr) {
164 if (!daddr)
165 return -EINVAL;
166 nexthop = inet->opt->faddr;
167 }
168
c720c7e8 169 tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr,
1da177e4
LT
170 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
171 IPPROTO_TCP,
c720c7e8 172 inet->inet_sport, usin->sin_port, sk, 1);
584bdf8c
WD
173 if (tmp < 0) {
174 if (tmp == -ENETUNREACH)
7c73a6fa 175 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
1da177e4 176 return tmp;
584bdf8c 177 }
1da177e4
LT
178
179 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
180 ip_rt_put(rt);
181 return -ENETUNREACH;
182 }
183
184 if (!inet->opt || !inet->opt->srr)
185 daddr = rt->rt_dst;
186
c720c7e8
ED
187 if (!inet->inet_saddr)
188 inet->inet_saddr = rt->rt_src;
189 inet->inet_rcv_saddr = inet->inet_saddr;
1da177e4 190
c720c7e8 191 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
1da177e4
LT
192 /* Reset inherited state */
193 tp->rx_opt.ts_recent = 0;
194 tp->rx_opt.ts_recent_stamp = 0;
195 tp->write_seq = 0;
196 }
197
295ff7ed 198 if (tcp_death_row.sysctl_tw_recycle &&
1da177e4
LT
199 !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
200 struct inet_peer *peer = rt_get_peer(rt);
7174259e
ACM
201 /*
202 * VJ's idea. We save last timestamp seen from
203 * the destination in peer table, when entering state
204 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
205 * when trying new connection.
1da177e4 206 */
317fe0e6
ED
207 if (peer) {
208 inet_peer_refcheck(peer);
209 if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
210 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
211 tp->rx_opt.ts_recent = peer->tcp_ts;
212 }
1da177e4
LT
213 }
214 }
215
c720c7e8
ED
216 inet->inet_dport = usin->sin_port;
217 inet->inet_daddr = daddr;
1da177e4 218
d83d8461 219 inet_csk(sk)->icsk_ext_hdr_len = 0;
1da177e4 220 if (inet->opt)
d83d8461 221 inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
1da177e4 222
bee7ca9e 223 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
1da177e4
LT
224
225 /* Socket identity is still unknown (sport may be zero).
226 * However we set state to SYN-SENT and not releasing socket
227 * lock select source port, enter ourselves into the hash tables and
228 * complete initialization after this.
229 */
230 tcp_set_state(sk, TCP_SYN_SENT);
a7f5e7f1 231 err = inet_hash_connect(&tcp_death_row, sk);
1da177e4
LT
232 if (err)
233 goto failure;
234
7174259e 235 err = ip_route_newports(&rt, IPPROTO_TCP,
c720c7e8 236 inet->inet_sport, inet->inet_dport, sk);
1da177e4
LT
237 if (err)
238 goto failure;
239
240 /* OK, now commit destination to socket. */
bcd76111 241 sk->sk_gso_type = SKB_GSO_TCPV4;
d8d1f30b 242 sk_setup_caps(sk, &rt->dst);
1da177e4
LT
243
244 if (!tp->write_seq)
c720c7e8
ED
245 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
246 inet->inet_daddr,
247 inet->inet_sport,
1da177e4
LT
248 usin->sin_port);
249
c720c7e8 250 inet->inet_id = tp->write_seq ^ jiffies;
1da177e4
LT
251
252 err = tcp_connect(sk);
253 rt = NULL;
254 if (err)
255 goto failure;
256
257 return 0;
258
259failure:
7174259e
ACM
260 /*
261 * This unhashes the socket and releases the local port,
262 * if necessary.
263 */
1da177e4
LT
264 tcp_set_state(sk, TCP_CLOSE);
265 ip_rt_put(rt);
266 sk->sk_route_caps = 0;
c720c7e8 267 inet->inet_dport = 0;
1da177e4
LT
268 return err;
269}
270
1da177e4
LT
271/*
272 * This routine does path mtu discovery as defined in RFC1191.
273 */
40efc6fa 274static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
1da177e4
LT
275{
276 struct dst_entry *dst;
277 struct inet_sock *inet = inet_sk(sk);
1da177e4
LT
278
279 /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
280 * send out by Linux are always <576bytes so they should go through
281 * unfragmented).
282 */
283 if (sk->sk_state == TCP_LISTEN)
284 return;
285
286 /* We don't check in the destentry if pmtu discovery is forbidden
287 * on this route. We just assume that no packet_to_big packets
288 * are send back when pmtu discovery is not active.
e905a9ed 289 * There is a small race when the user changes this flag in the
1da177e4
LT
290 * route, but I think that's acceptable.
291 */
292 if ((dst = __sk_dst_check(sk, 0)) == NULL)
293 return;
294
295 dst->ops->update_pmtu(dst, mtu);
296
297 /* Something is about to be wrong... Remember soft error
298 * for the case, if this connection will not able to recover.
299 */
300 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
301 sk->sk_err_soft = EMSGSIZE;
302
303 mtu = dst_mtu(dst);
304
305 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
d83d8461 306 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
1da177e4
LT
307 tcp_sync_mss(sk, mtu);
308
309 /* Resend the TCP packet because it's
310 * clear that the old packet has been
311 * dropped. This is the new "fast" path mtu
312 * discovery.
313 */
314 tcp_simple_retransmit(sk);
315 } /* else let the usual retransmit timer handle it */
316}
317
318/*
319 * This routine is called by the ICMP module when it gets some
320 * sort of error condition. If err < 0 then the socket should
321 * be closed and the error returned to the user. If err > 0
322 * it's just the icmp type << 8 | icmp code. After adjustment
323 * header points to the first 8 bytes of the tcp header. We need
324 * to find the appropriate port.
325 *
326 * The locking strategy used here is very "optimistic". When
327 * someone else accesses the socket the ICMP is just dropped
328 * and for some paths there is no check at all.
329 * A more general error queue to queue errors for later handling
330 * is probably better.
331 *
332 */
333
4d1a2d9e 334void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
1da177e4 335{
4d1a2d9e
DL
336 struct iphdr *iph = (struct iphdr *)icmp_skb->data;
337 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
f1ecd5d9 338 struct inet_connection_sock *icsk;
1da177e4
LT
339 struct tcp_sock *tp;
340 struct inet_sock *inet;
4d1a2d9e
DL
341 const int type = icmp_hdr(icmp_skb)->type;
342 const int code = icmp_hdr(icmp_skb)->code;
1da177e4 343 struct sock *sk;
f1ecd5d9 344 struct sk_buff *skb;
1da177e4 345 __u32 seq;
f1ecd5d9 346 __u32 remaining;
1da177e4 347 int err;
4d1a2d9e 348 struct net *net = dev_net(icmp_skb->dev);
1da177e4 349
4d1a2d9e 350 if (icmp_skb->len < (iph->ihl << 2) + 8) {
dcfc23ca 351 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
352 return;
353 }
354
fd54d716 355 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
4d1a2d9e 356 iph->saddr, th->source, inet_iif(icmp_skb));
1da177e4 357 if (!sk) {
dcfc23ca 358 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
359 return;
360 }
361 if (sk->sk_state == TCP_TIME_WAIT) {
9469c7b4 362 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
363 return;
364 }
365
366 bh_lock_sock(sk);
367 /* If too many ICMPs get dropped on busy
368 * servers this needs to be solved differently.
369 */
370 if (sock_owned_by_user(sk))
de0744af 371 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
1da177e4
LT
372
373 if (sk->sk_state == TCP_CLOSE)
374 goto out;
375
97e3ecd1 376 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
377 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
378 goto out;
379 }
380
f1ecd5d9 381 icsk = inet_csk(sk);
1da177e4
LT
382 tp = tcp_sk(sk);
383 seq = ntohl(th->seq);
384 if (sk->sk_state != TCP_LISTEN &&
385 !between(seq, tp->snd_una, tp->snd_nxt)) {
de0744af 386 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
387 goto out;
388 }
389
390 switch (type) {
391 case ICMP_SOURCE_QUENCH:
392 /* Just silently ignore these. */
393 goto out;
394 case ICMP_PARAMETERPROB:
395 err = EPROTO;
396 break;
397 case ICMP_DEST_UNREACH:
398 if (code > NR_ICMP_UNREACH)
399 goto out;
400
401 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
402 if (!sock_owned_by_user(sk))
403 do_pmtu_discovery(sk, iph, info);
404 goto out;
405 }
406
407 err = icmp_err_convert[code].errno;
f1ecd5d9
DL
408 /* check if icmp_skb allows revert of backoff
409 * (see draft-zimmermann-tcp-lcd) */
410 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
411 break;
412 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
413 !icsk->icsk_backoff)
414 break;
415
416 icsk->icsk_backoff--;
417 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) <<
418 icsk->icsk_backoff;
419 tcp_bound_rto(sk);
420
421 skb = tcp_write_queue_head(sk);
422 BUG_ON(!skb);
423
424 remaining = icsk->icsk_rto - min(icsk->icsk_rto,
425 tcp_time_stamp - TCP_SKB_CB(skb)->when);
426
427 if (remaining) {
428 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
429 remaining, TCP_RTO_MAX);
430 } else if (sock_owned_by_user(sk)) {
431 /* RTO revert clocked out retransmission,
432 * but socket is locked. Will defer. */
433 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
434 HZ/20, TCP_RTO_MAX);
435 } else {
436 /* RTO revert clocked out retransmission.
437 * Will retransmit now */
438 tcp_retransmit_timer(sk);
439 }
440
1da177e4
LT
441 break;
442 case ICMP_TIME_EXCEEDED:
443 err = EHOSTUNREACH;
444 break;
445 default:
446 goto out;
447 }
448
449 switch (sk->sk_state) {
60236fdd 450 struct request_sock *req, **prev;
1da177e4
LT
451 case TCP_LISTEN:
452 if (sock_owned_by_user(sk))
453 goto out;
454
463c84b9
ACM
455 req = inet_csk_search_req(sk, &prev, th->dest,
456 iph->daddr, iph->saddr);
1da177e4
LT
457 if (!req)
458 goto out;
459
460 /* ICMPs are not backlogged, hence we cannot get
461 an established socket here.
462 */
547b792c 463 WARN_ON(req->sk);
1da177e4 464
2e6599cb 465 if (seq != tcp_rsk(req)->snt_isn) {
de0744af 466 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
467 goto out;
468 }
469
470 /*
471 * Still in SYN_RECV, just remove it silently.
472 * There is no good way to pass the error to the newly
473 * created socket, and POSIX does not want network
474 * errors returned from accept().
475 */
463c84b9 476 inet_csk_reqsk_queue_drop(sk, req, prev);
1da177e4
LT
477 goto out;
478
479 case TCP_SYN_SENT:
480 case TCP_SYN_RECV: /* Cannot happen.
481 It can f.e. if SYNs crossed.
482 */
483 if (!sock_owned_by_user(sk)) {
1da177e4
LT
484 sk->sk_err = err;
485
486 sk->sk_error_report(sk);
487
488 tcp_done(sk);
489 } else {
490 sk->sk_err_soft = err;
491 }
492 goto out;
493 }
494
495 /* If we've already connected we will keep trying
496 * until we time out, or the user gives up.
497 *
498 * rfc1122 4.2.3.9 allows to consider as hard errors
499 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
500 * but it is obsoleted by pmtu discovery).
501 *
502 * Note, that in modern internet, where routing is unreliable
503 * and in each dark corner broken firewalls sit, sending random
504 * errors ordered by their masters even this two messages finally lose
505 * their original sense (even Linux sends invalid PORT_UNREACHs)
506 *
507 * Now we are in compliance with RFCs.
508 * --ANK (980905)
509 */
510
511 inet = inet_sk(sk);
512 if (!sock_owned_by_user(sk) && inet->recverr) {
513 sk->sk_err = err;
514 sk->sk_error_report(sk);
515 } else { /* Only an error on timeout */
516 sk->sk_err_soft = err;
517 }
518
519out:
520 bh_unlock_sock(sk);
521 sock_put(sk);
522}
523
419f9f89
HX
524static void __tcp_v4_send_check(struct sk_buff *skb,
525 __be32 saddr, __be32 daddr)
1da177e4 526{
aa8223c7 527 struct tcphdr *th = tcp_hdr(skb);
1da177e4 528
84fa7933 529 if (skb->ip_summed == CHECKSUM_PARTIAL) {
419f9f89 530 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
663ead3b 531 skb->csum_start = skb_transport_header(skb) - skb->head;
ff1dcadb 532 skb->csum_offset = offsetof(struct tcphdr, check);
1da177e4 533 } else {
419f9f89 534 th->check = tcp_v4_check(skb->len, saddr, daddr,
07f0757a 535 csum_partial(th,
1da177e4
LT
536 th->doff << 2,
537 skb->csum));
538 }
539}
540
419f9f89 541/* This routine computes an IPv4 TCP checksum. */
bb296246 542void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
419f9f89
HX
543{
544 struct inet_sock *inet = inet_sk(sk);
545
546 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
547}
548
a430a43d
HX
549int tcp_v4_gso_send_check(struct sk_buff *skb)
550{
eddc9ec5 551 const struct iphdr *iph;
a430a43d
HX
552 struct tcphdr *th;
553
554 if (!pskb_may_pull(skb, sizeof(*th)))
555 return -EINVAL;
556
eddc9ec5 557 iph = ip_hdr(skb);
aa8223c7 558 th = tcp_hdr(skb);
a430a43d
HX
559
560 th->check = 0;
84fa7933 561 skb->ip_summed = CHECKSUM_PARTIAL;
419f9f89 562 __tcp_v4_send_check(skb, iph->saddr, iph->daddr);
a430a43d
HX
563 return 0;
564}
565
1da177e4
LT
566/*
567 * This routine will send an RST to the other tcp.
568 *
569 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
570 * for reset.
571 * Answer: if a packet caused RST, it is not for a socket
572 * existing in our system, if it is matched to a socket,
573 * it is just duplicate segment or bug in other side's TCP.
574 * So that we build reply only basing on parameters
575 * arrived with segment.
576 * Exception: precedence violation. We do not implement it in any case.
577 */
578
cfb6eeb4 579static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
1da177e4 580{
aa8223c7 581 struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
582 struct {
583 struct tcphdr th;
584#ifdef CONFIG_TCP_MD5SIG
714e85be 585 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
cfb6eeb4
YH
586#endif
587 } rep;
1da177e4 588 struct ip_reply_arg arg;
cfb6eeb4
YH
589#ifdef CONFIG_TCP_MD5SIG
590 struct tcp_md5sig_key *key;
591#endif
a86b1e30 592 struct net *net;
1da177e4
LT
593
594 /* Never send a reset in response to a reset. */
595 if (th->rst)
596 return;
597
511c3f92 598 if (skb_rtable(skb)->rt_type != RTN_LOCAL)
1da177e4
LT
599 return;
600
601 /* Swap the send and the receive. */
cfb6eeb4
YH
602 memset(&rep, 0, sizeof(rep));
603 rep.th.dest = th->source;
604 rep.th.source = th->dest;
605 rep.th.doff = sizeof(struct tcphdr) / 4;
606 rep.th.rst = 1;
1da177e4
LT
607
608 if (th->ack) {
cfb6eeb4 609 rep.th.seq = th->ack_seq;
1da177e4 610 } else {
cfb6eeb4
YH
611 rep.th.ack = 1;
612 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
613 skb->len - (th->doff << 2));
1da177e4
LT
614 }
615
7174259e 616 memset(&arg, 0, sizeof(arg));
cfb6eeb4
YH
617 arg.iov[0].iov_base = (unsigned char *)&rep;
618 arg.iov[0].iov_len = sizeof(rep.th);
619
620#ifdef CONFIG_TCP_MD5SIG
eddc9ec5 621 key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL;
cfb6eeb4
YH
622 if (key) {
623 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
624 (TCPOPT_NOP << 16) |
625 (TCPOPT_MD5SIG << 8) |
626 TCPOLEN_MD5SIG);
627 /* Update length and the length the header thinks exists */
628 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
629 rep.th.doff = arg.iov[0].iov_len / 4;
630
49a72dfb 631 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
78e645cb
IJ
632 key, ip_hdr(skb)->saddr,
633 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
634 }
635#endif
eddc9ec5
ACM
636 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
637 ip_hdr(skb)->saddr, /* XXX */
52cd5750 638 arg.iov[0].iov_len, IPPROTO_TCP, 0);
1da177e4 639 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
88ef4a5a 640 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
1da177e4 641
adf30907 642 net = dev_net(skb_dst(skb)->dev);
a86b1e30 643 ip_send_reply(net->ipv4.tcp_sock, skb,
7feb49c8 644 &arg, arg.iov[0].iov_len);
1da177e4 645
63231bdd
PE
646 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
647 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
1da177e4
LT
648}
649
650/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
651 outside socket context is ugly, certainly. What can I do?
652 */
653
9501f972
YH
654static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
655 u32 win, u32 ts, int oif,
88ef4a5a
KK
656 struct tcp_md5sig_key *key,
657 int reply_flags)
1da177e4 658{
aa8223c7 659 struct tcphdr *th = tcp_hdr(skb);
1da177e4
LT
660 struct {
661 struct tcphdr th;
714e85be 662 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
cfb6eeb4 663#ifdef CONFIG_TCP_MD5SIG
714e85be 664 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
cfb6eeb4
YH
665#endif
666 ];
1da177e4
LT
667 } rep;
668 struct ip_reply_arg arg;
adf30907 669 struct net *net = dev_net(skb_dst(skb)->dev);
1da177e4
LT
670
671 memset(&rep.th, 0, sizeof(struct tcphdr));
7174259e 672 memset(&arg, 0, sizeof(arg));
1da177e4
LT
673
674 arg.iov[0].iov_base = (unsigned char *)&rep;
675 arg.iov[0].iov_len = sizeof(rep.th);
676 if (ts) {
cfb6eeb4
YH
677 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
678 (TCPOPT_TIMESTAMP << 8) |
679 TCPOLEN_TIMESTAMP);
680 rep.opt[1] = htonl(tcp_time_stamp);
681 rep.opt[2] = htonl(ts);
cb48cfe8 682 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
1da177e4
LT
683 }
684
685 /* Swap the send and the receive. */
686 rep.th.dest = th->source;
687 rep.th.source = th->dest;
688 rep.th.doff = arg.iov[0].iov_len / 4;
689 rep.th.seq = htonl(seq);
690 rep.th.ack_seq = htonl(ack);
691 rep.th.ack = 1;
692 rep.th.window = htons(win);
693
cfb6eeb4 694#ifdef CONFIG_TCP_MD5SIG
cfb6eeb4
YH
695 if (key) {
696 int offset = (ts) ? 3 : 0;
697
698 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
699 (TCPOPT_NOP << 16) |
700 (TCPOPT_MD5SIG << 8) |
701 TCPOLEN_MD5SIG);
702 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
703 rep.th.doff = arg.iov[0].iov_len/4;
704
49a72dfb 705 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
90b7e112
AL
706 key, ip_hdr(skb)->saddr,
707 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
708 }
709#endif
88ef4a5a 710 arg.flags = reply_flags;
eddc9ec5
ACM
711 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
712 ip_hdr(skb)->saddr, /* XXX */
1da177e4
LT
713 arg.iov[0].iov_len, IPPROTO_TCP, 0);
714 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
9501f972
YH
715 if (oif)
716 arg.bound_dev_if = oif;
1da177e4 717
a86b1e30 718 ip_send_reply(net->ipv4.tcp_sock, skb,
7feb49c8 719 &arg, arg.iov[0].iov_len);
1da177e4 720
63231bdd 721 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
1da177e4
LT
722}
723
724static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
725{
8feaf0c0 726 struct inet_timewait_sock *tw = inet_twsk(sk);
cfb6eeb4 727 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1da177e4 728
9501f972 729 tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
7174259e 730 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
9501f972
YH
731 tcptw->tw_ts_recent,
732 tw->tw_bound_dev_if,
88ef4a5a
KK
733 tcp_twsk_md5_key(tcptw),
734 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0
9501f972 735 );
1da177e4 736
8feaf0c0 737 inet_twsk_put(tw);
1da177e4
LT
738}
739
6edafaaf 740static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
7174259e 741 struct request_sock *req)
1da177e4 742{
9501f972 743 tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
cfb6eeb4 744 tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
9501f972
YH
745 req->ts_recent,
746 0,
88ef4a5a
KK
747 tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr),
748 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0);
1da177e4
LT
749}
750
1da177e4 751/*
9bf1d83e 752 * Send a SYN-ACK after having received a SYN.
60236fdd 753 * This still operates on a request_sock only, not on a big
1da177e4
LT
754 * socket.
755 */
72659ecc
OP
756static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
757 struct request_sock *req,
758 struct request_values *rvp)
1da177e4 759{
2e6599cb 760 const struct inet_request_sock *ireq = inet_rsk(req);
1da177e4
LT
761 int err = -1;
762 struct sk_buff * skb;
763
764 /* First, grab a route. */
463c84b9 765 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
fd80eb94 766 return -1;
1da177e4 767
e6b4d113 768 skb = tcp_make_synack(sk, dst, req, rvp);
1da177e4
LT
769
770 if (skb) {
419f9f89 771 __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
1da177e4 772
2e6599cb
ACM
773 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
774 ireq->rmt_addr,
775 ireq->opt);
b9df3cb8 776 err = net_xmit_eval(err);
1da177e4
LT
777 }
778
1da177e4
LT
779 dst_release(dst);
780 return err;
781}
782
72659ecc 783static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
e6b4d113 784 struct request_values *rvp)
fd80eb94 785{
72659ecc
OP
786 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
787 return tcp_v4_send_synack(sk, NULL, req, rvp);
fd80eb94
DL
788}
789
1da177e4 790/*
60236fdd 791 * IPv4 request_sock destructor.
1da177e4 792 */
60236fdd 793static void tcp_v4_reqsk_destructor(struct request_sock *req)
1da177e4 794{
a51482bd 795 kfree(inet_rsk(req)->opt);
1da177e4
LT
796}
797
2a1d4bd4 798static void syn_flood_warning(const struct sk_buff *skb)
1da177e4 799{
2a1d4bd4 800 const char *msg;
1da177e4 801
2a1d4bd4
FW
802#ifdef CONFIG_SYN_COOKIES
803 if (sysctl_tcp_syncookies)
804 msg = "Sending cookies";
805 else
80e40daa 806#endif
2a1d4bd4
FW
807 msg = "Dropping request";
808
809 pr_info("TCP: Possible SYN flooding on port %d. %s.\n",
810 ntohs(tcp_hdr(skb)->dest), msg);
811}
1da177e4
LT
812
813/*
60236fdd 814 * Save and compile IPv4 options into the request_sock if needed.
1da177e4 815 */
40efc6fa
SH
816static struct ip_options *tcp_v4_save_options(struct sock *sk,
817 struct sk_buff *skb)
1da177e4
LT
818{
819 struct ip_options *opt = &(IPCB(skb)->opt);
820 struct ip_options *dopt = NULL;
821
822 if (opt && opt->optlen) {
823 int opt_size = optlength(opt);
824 dopt = kmalloc(opt_size, GFP_ATOMIC);
825 if (dopt) {
826 if (ip_options_echo(dopt, skb)) {
827 kfree(dopt);
828 dopt = NULL;
829 }
830 }
831 }
832 return dopt;
833}
834
cfb6eeb4
YH
835#ifdef CONFIG_TCP_MD5SIG
836/*
837 * RFC2385 MD5 checksumming requires a mapping of
838 * IP address->MD5 Key.
839 * We need to maintain these in the sk structure.
840 */
841
842/* Find the Key structure for an address. */
7174259e
ACM
843static struct tcp_md5sig_key *
844 tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
cfb6eeb4
YH
845{
846 struct tcp_sock *tp = tcp_sk(sk);
847 int i;
848
849 if (!tp->md5sig_info || !tp->md5sig_info->entries4)
850 return NULL;
851 for (i = 0; i < tp->md5sig_info->entries4; i++) {
852 if (tp->md5sig_info->keys4[i].addr == addr)
f8ab18d2 853 return &tp->md5sig_info->keys4[i].base;
cfb6eeb4
YH
854 }
855 return NULL;
856}
857
858struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
859 struct sock *addr_sk)
860{
c720c7e8 861 return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr);
cfb6eeb4
YH
862}
863
864EXPORT_SYMBOL(tcp_v4_md5_lookup);
865
f5b99bcd
AB
866static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
867 struct request_sock *req)
cfb6eeb4
YH
868{
869 return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
870}
871
872/* This can be called on a newly created socket, from other files */
873int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
874 u8 *newkey, u8 newkeylen)
875{
876 /* Add Key to the list */
b0a713e9 877 struct tcp_md5sig_key *key;
cfb6eeb4
YH
878 struct tcp_sock *tp = tcp_sk(sk);
879 struct tcp4_md5sig_key *keys;
880
b0a713e9 881 key = tcp_v4_md5_do_lookup(sk, addr);
cfb6eeb4
YH
882 if (key) {
883 /* Pre-existing entry - just update that one. */
b0a713e9
MD
884 kfree(key->key);
885 key->key = newkey;
886 key->keylen = newkeylen;
cfb6eeb4 887 } else {
f6685938
ACM
888 struct tcp_md5sig_info *md5sig;
889
cfb6eeb4 890 if (!tp->md5sig_info) {
f6685938
ACM
891 tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info),
892 GFP_ATOMIC);
cfb6eeb4
YH
893 if (!tp->md5sig_info) {
894 kfree(newkey);
895 return -ENOMEM;
896 }
a465419b 897 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
cfb6eeb4 898 }
aa133076 899 if (tcp_alloc_md5sig_pool(sk) == NULL) {
cfb6eeb4
YH
900 kfree(newkey);
901 return -ENOMEM;
902 }
f6685938
ACM
903 md5sig = tp->md5sig_info;
904
905 if (md5sig->alloced4 == md5sig->entries4) {
906 keys = kmalloc((sizeof(*keys) *
e905a9ed 907 (md5sig->entries4 + 1)), GFP_ATOMIC);
cfb6eeb4
YH
908 if (!keys) {
909 kfree(newkey);
910 tcp_free_md5sig_pool();
911 return -ENOMEM;
912 }
913
f6685938
ACM
914 if (md5sig->entries4)
915 memcpy(keys, md5sig->keys4,
916 sizeof(*keys) * md5sig->entries4);
cfb6eeb4
YH
917
918 /* Free old key list, and reference new one */
a80cc20d 919 kfree(md5sig->keys4);
f6685938
ACM
920 md5sig->keys4 = keys;
921 md5sig->alloced4++;
cfb6eeb4 922 }
f6685938 923 md5sig->entries4++;
f8ab18d2
DM
924 md5sig->keys4[md5sig->entries4 - 1].addr = addr;
925 md5sig->keys4[md5sig->entries4 - 1].base.key = newkey;
926 md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen;
cfb6eeb4
YH
927 }
928 return 0;
929}
930
931EXPORT_SYMBOL(tcp_v4_md5_do_add);
932
933static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
934 u8 *newkey, u8 newkeylen)
935{
c720c7e8 936 return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->inet_daddr,
cfb6eeb4
YH
937 newkey, newkeylen);
938}
939
940int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
941{
942 struct tcp_sock *tp = tcp_sk(sk);
943 int i;
944
945 for (i = 0; i < tp->md5sig_info->entries4; i++) {
946 if (tp->md5sig_info->keys4[i].addr == addr) {
947 /* Free the key */
f8ab18d2 948 kfree(tp->md5sig_info->keys4[i].base.key);
cfb6eeb4
YH
949 tp->md5sig_info->entries4--;
950
951 if (tp->md5sig_info->entries4 == 0) {
952 kfree(tp->md5sig_info->keys4);
953 tp->md5sig_info->keys4 = NULL;
8228a18d 954 tp->md5sig_info->alloced4 = 0;
7174259e 955 } else if (tp->md5sig_info->entries4 != i) {
cfb6eeb4 956 /* Need to do some manipulation */
354faf09
YH
957 memmove(&tp->md5sig_info->keys4[i],
958 &tp->md5sig_info->keys4[i+1],
959 (tp->md5sig_info->entries4 - i) *
960 sizeof(struct tcp4_md5sig_key));
cfb6eeb4
YH
961 }
962 tcp_free_md5sig_pool();
963 return 0;
964 }
965 }
966 return -ENOENT;
967}
968
969EXPORT_SYMBOL(tcp_v4_md5_do_del);
970
7174259e 971static void tcp_v4_clear_md5_list(struct sock *sk)
cfb6eeb4
YH
972{
973 struct tcp_sock *tp = tcp_sk(sk);
974
975 /* Free each key, then the set of key keys,
976 * the crypto element, and then decrement our
977 * hold on the last resort crypto.
978 */
979 if (tp->md5sig_info->entries4) {
980 int i;
981 for (i = 0; i < tp->md5sig_info->entries4; i++)
f8ab18d2 982 kfree(tp->md5sig_info->keys4[i].base.key);
cfb6eeb4
YH
983 tp->md5sig_info->entries4 = 0;
984 tcp_free_md5sig_pool();
985 }
986 if (tp->md5sig_info->keys4) {
987 kfree(tp->md5sig_info->keys4);
988 tp->md5sig_info->keys4 = NULL;
989 tp->md5sig_info->alloced4 = 0;
990 }
991}
992
7174259e
ACM
993static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
994 int optlen)
cfb6eeb4
YH
995{
996 struct tcp_md5sig cmd;
997 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
998 u8 *newkey;
999
1000 if (optlen < sizeof(cmd))
1001 return -EINVAL;
1002
7174259e 1003 if (copy_from_user(&cmd, optval, sizeof(cmd)))
cfb6eeb4
YH
1004 return -EFAULT;
1005
1006 if (sin->sin_family != AF_INET)
1007 return -EINVAL;
1008
1009 if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
1010 if (!tcp_sk(sk)->md5sig_info)
1011 return -ENOENT;
1012 return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
1013 }
1014
1015 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1016 return -EINVAL;
1017
1018 if (!tcp_sk(sk)->md5sig_info) {
1019 struct tcp_sock *tp = tcp_sk(sk);
aa133076 1020 struct tcp_md5sig_info *p;
cfb6eeb4 1021
aa133076 1022 p = kzalloc(sizeof(*p), sk->sk_allocation);
cfb6eeb4
YH
1023 if (!p)
1024 return -EINVAL;
1025
1026 tp->md5sig_info = p;
a465419b 1027 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
cfb6eeb4
YH
1028 }
1029
aa133076 1030 newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation);
cfb6eeb4
YH
1031 if (!newkey)
1032 return -ENOMEM;
cfb6eeb4
YH
1033 return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
1034 newkey, cmd.tcpm_keylen);
1035}
1036
49a72dfb
AL
1037static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1038 __be32 daddr, __be32 saddr, int nbytes)
cfb6eeb4 1039{
cfb6eeb4 1040 struct tcp4_pseudohdr *bp;
49a72dfb 1041 struct scatterlist sg;
cfb6eeb4
YH
1042
1043 bp = &hp->md5_blk.ip4;
cfb6eeb4
YH
1044
1045 /*
49a72dfb 1046 * 1. the TCP pseudo-header (in the order: source IP address,
cfb6eeb4
YH
1047 * destination IP address, zero-padded protocol number, and
1048 * segment length)
1049 */
1050 bp->saddr = saddr;
1051 bp->daddr = daddr;
1052 bp->pad = 0;
076fb722 1053 bp->protocol = IPPROTO_TCP;
49a72dfb 1054 bp->len = cpu_to_be16(nbytes);
c7da57a1 1055
49a72dfb
AL
1056 sg_init_one(&sg, bp, sizeof(*bp));
1057 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1058}
1059
1060static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
1061 __be32 daddr, __be32 saddr, struct tcphdr *th)
1062{
1063 struct tcp_md5sig_pool *hp;
1064 struct hash_desc *desc;
1065
1066 hp = tcp_get_md5sig_pool();
1067 if (!hp)
1068 goto clear_hash_noput;
1069 desc = &hp->md5_desc;
1070
1071 if (crypto_hash_init(desc))
1072 goto clear_hash;
1073 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1074 goto clear_hash;
1075 if (tcp_md5_hash_header(hp, th))
1076 goto clear_hash;
1077 if (tcp_md5_hash_key(hp, key))
1078 goto clear_hash;
1079 if (crypto_hash_final(desc, md5_hash))
cfb6eeb4
YH
1080 goto clear_hash;
1081
cfb6eeb4 1082 tcp_put_md5sig_pool();
cfb6eeb4 1083 return 0;
49a72dfb 1084
cfb6eeb4
YH
1085clear_hash:
1086 tcp_put_md5sig_pool();
1087clear_hash_noput:
1088 memset(md5_hash, 0, 16);
49a72dfb 1089 return 1;
cfb6eeb4
YH
1090}
1091
49a72dfb
AL
1092int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1093 struct sock *sk, struct request_sock *req,
1094 struct sk_buff *skb)
cfb6eeb4 1095{
49a72dfb
AL
1096 struct tcp_md5sig_pool *hp;
1097 struct hash_desc *desc;
1098 struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
1099 __be32 saddr, daddr;
1100
1101 if (sk) {
c720c7e8
ED
1102 saddr = inet_sk(sk)->inet_saddr;
1103 daddr = inet_sk(sk)->inet_daddr;
49a72dfb
AL
1104 } else if (req) {
1105 saddr = inet_rsk(req)->loc_addr;
1106 daddr = inet_rsk(req)->rmt_addr;
cfb6eeb4 1107 } else {
49a72dfb
AL
1108 const struct iphdr *iph = ip_hdr(skb);
1109 saddr = iph->saddr;
1110 daddr = iph->daddr;
cfb6eeb4 1111 }
49a72dfb
AL
1112
1113 hp = tcp_get_md5sig_pool();
1114 if (!hp)
1115 goto clear_hash_noput;
1116 desc = &hp->md5_desc;
1117
1118 if (crypto_hash_init(desc))
1119 goto clear_hash;
1120
1121 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1122 goto clear_hash;
1123 if (tcp_md5_hash_header(hp, th))
1124 goto clear_hash;
1125 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1126 goto clear_hash;
1127 if (tcp_md5_hash_key(hp, key))
1128 goto clear_hash;
1129 if (crypto_hash_final(desc, md5_hash))
1130 goto clear_hash;
1131
1132 tcp_put_md5sig_pool();
1133 return 0;
1134
1135clear_hash:
1136 tcp_put_md5sig_pool();
1137clear_hash_noput:
1138 memset(md5_hash, 0, 16);
1139 return 1;
cfb6eeb4
YH
1140}
1141
49a72dfb 1142EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
cfb6eeb4 1143
7174259e 1144static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
cfb6eeb4
YH
1145{
1146 /*
1147 * This gets called for each TCP segment that arrives
1148 * so we want to be efficient.
1149 * We have 3 drop cases:
1150 * o No MD5 hash and one expected.
1151 * o MD5 hash and we're not expecting one.
1152 * o MD5 hash and its wrong.
1153 */
1154 __u8 *hash_location = NULL;
1155 struct tcp_md5sig_key *hash_expected;
eddc9ec5 1156 const struct iphdr *iph = ip_hdr(skb);
aa8223c7 1157 struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4 1158 int genhash;
cfb6eeb4
YH
1159 unsigned char newhash[16];
1160
1161 hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
7d5d5525 1162 hash_location = tcp_parse_md5sig_option(th);
cfb6eeb4 1163
cfb6eeb4
YH
1164 /* We've parsed the options - do we have a hash? */
1165 if (!hash_expected && !hash_location)
1166 return 0;
1167
1168 if (hash_expected && !hash_location) {
785957d3 1169 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
cfb6eeb4
YH
1170 return 1;
1171 }
1172
1173 if (!hash_expected && hash_location) {
785957d3 1174 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
cfb6eeb4
YH
1175 return 1;
1176 }
1177
1178 /* Okay, so this is hash_expected and hash_location -
1179 * so we need to calculate the checksum.
1180 */
49a72dfb
AL
1181 genhash = tcp_v4_md5_hash_skb(newhash,
1182 hash_expected,
1183 NULL, NULL, skb);
cfb6eeb4
YH
1184
1185 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1186 if (net_ratelimit()) {
673d57e7
HH
1187 printk(KERN_INFO "MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1188 &iph->saddr, ntohs(th->source),
1189 &iph->daddr, ntohs(th->dest),
cfb6eeb4 1190 genhash ? " tcp_v4_calc_md5_hash failed" : "");
cfb6eeb4
YH
1191 }
1192 return 1;
1193 }
1194 return 0;
1195}
1196
1197#endif
1198
72a3effa 1199struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1da177e4 1200 .family = PF_INET,
2e6599cb 1201 .obj_size = sizeof(struct tcp_request_sock),
72659ecc 1202 .rtx_syn_ack = tcp_v4_rtx_synack,
60236fdd
ACM
1203 .send_ack = tcp_v4_reqsk_send_ack,
1204 .destructor = tcp_v4_reqsk_destructor,
1da177e4 1205 .send_reset = tcp_v4_send_reset,
72659ecc 1206 .syn_ack_timeout = tcp_syn_ack_timeout,
1da177e4
LT
1207};
1208
cfb6eeb4 1209#ifdef CONFIG_TCP_MD5SIG
b2e4b3de 1210static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
cfb6eeb4 1211 .md5_lookup = tcp_v4_reqsk_md5_lookup,
e3afe7b7 1212 .calc_md5_hash = tcp_v4_md5_hash_skb,
cfb6eeb4 1213};
b6332e6c 1214#endif
cfb6eeb4 1215
6d6ee43e
ACM
1216static struct timewait_sock_ops tcp_timewait_sock_ops = {
1217 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1218 .twsk_unique = tcp_twsk_unique,
cfb6eeb4 1219 .twsk_destructor= tcp_twsk_destructor,
6d6ee43e
ACM
1220};
1221
1da177e4
LT
1222int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1223{
4957faad 1224 struct tcp_extend_values tmp_ext;
1da177e4 1225 struct tcp_options_received tmp_opt;
4957faad 1226 u8 *hash_location;
60236fdd 1227 struct request_sock *req;
e6b4d113 1228 struct inet_request_sock *ireq;
4957faad 1229 struct tcp_sock *tp = tcp_sk(sk);
e6b4d113 1230 struct dst_entry *dst = NULL;
eddc9ec5
ACM
1231 __be32 saddr = ip_hdr(skb)->saddr;
1232 __be32 daddr = ip_hdr(skb)->daddr;
1da177e4 1233 __u32 isn = TCP_SKB_CB(skb)->when;
1da177e4
LT
1234#ifdef CONFIG_SYN_COOKIES
1235 int want_cookie = 0;
1236#else
1237#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
1238#endif
1239
1240 /* Never answer to SYNs send to broadcast or multicast */
511c3f92 1241 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1da177e4
LT
1242 goto drop;
1243
1244 /* TW buckets are converted to open requests without
1245 * limitations, they conserve resources and peer is
1246 * evidently real one.
1247 */
463c84b9 1248 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
2a1d4bd4
FW
1249 if (net_ratelimit())
1250 syn_flood_warning(skb);
1da177e4
LT
1251#ifdef CONFIG_SYN_COOKIES
1252 if (sysctl_tcp_syncookies) {
1253 want_cookie = 1;
1254 } else
1255#endif
1256 goto drop;
1257 }
1258
1259 /* Accept backlog is full. If we have already queued enough
1260 * of warm entries in syn queue, drop request. It is better than
1261 * clogging syn queue with openreqs with exponentially increasing
1262 * timeout.
1263 */
463c84b9 1264 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1da177e4
LT
1265 goto drop;
1266
ce4a7d0d 1267 req = inet_reqsk_alloc(&tcp_request_sock_ops);
1da177e4
LT
1268 if (!req)
1269 goto drop;
1270
cfb6eeb4
YH
1271#ifdef CONFIG_TCP_MD5SIG
1272 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1273#endif
1274
1da177e4 1275 tcp_clear_options(&tmp_opt);
bee7ca9e 1276 tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
4957faad 1277 tmp_opt.user_mss = tp->rx_opt.user_mss;
bb5b7c11 1278 tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
4957faad
WAS
1279
1280 if (tmp_opt.cookie_plus > 0 &&
1281 tmp_opt.saw_tstamp &&
1282 !tp->rx_opt.cookie_out_never &&
1283 (sysctl_tcp_cookie_size > 0 ||
1284 (tp->cookie_values != NULL &&
1285 tp->cookie_values->cookie_desired > 0))) {
1286 u8 *c;
1287 u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1288 int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1289
1290 if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1291 goto drop_and_release;
1292
1293 /* Secret recipe starts with IP addresses */
0eae88f3
ED
1294 *mess++ ^= (__force u32)daddr;
1295 *mess++ ^= (__force u32)saddr;
1da177e4 1296
4957faad
WAS
1297 /* plus variable length Initiator Cookie */
1298 c = (u8 *)mess;
1299 while (l-- > 0)
1300 *c++ ^= *hash_location++;
1301
1302#ifdef CONFIG_SYN_COOKIES
1303 want_cookie = 0; /* not our kind of cookie */
1304#endif
1305 tmp_ext.cookie_out_never = 0; /* false */
1306 tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1307 } else if (!tp->rx_opt.cookie_in_always) {
1308 /* redundant indications, but ensure initialization. */
1309 tmp_ext.cookie_out_never = 1; /* true */
1310 tmp_ext.cookie_plus = 0;
1311 } else {
1312 goto drop_and_release;
1313 }
1314 tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1da177e4 1315
4dfc2817 1316 if (want_cookie && !tmp_opt.saw_tstamp)
1da177e4 1317 tcp_clear_options(&tmp_opt);
1da177e4 1318
1da177e4 1319 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1da177e4
LT
1320 tcp_openreq_init(req, &tmp_opt, skb);
1321
bb5b7c11
DM
1322 ireq = inet_rsk(req);
1323 ireq->loc_addr = daddr;
1324 ireq->rmt_addr = saddr;
1325 ireq->no_srccheck = inet_sk(sk)->transparent;
1326 ireq->opt = tcp_v4_save_options(sk, skb);
1327
284904aa 1328 if (security_inet_conn_request(sk, skb, req))
bb5b7c11 1329 goto drop_and_free;
284904aa 1330
172d69e6 1331 if (!want_cookie || tmp_opt.tstamp_ok)
aa8223c7 1332 TCP_ECN_create_request(req, tcp_hdr(skb));
1da177e4
LT
1333
1334 if (want_cookie) {
1da177e4 1335 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
172d69e6 1336 req->cookie_ts = tmp_opt.tstamp_ok;
1da177e4
LT
1337 } else if (!isn) {
1338 struct inet_peer *peer = NULL;
1339
1340 /* VJ's idea. We save last timestamp seen
1341 * from the destination in peer table, when entering
1342 * state TIME-WAIT, and check against it before
1343 * accepting new connection request.
1344 *
1345 * If "isn" is not zero, this request hit alive
1346 * timewait bucket, so that all the necessary checks
1347 * are made in the function processing timewait state.
1348 */
1349 if (tmp_opt.saw_tstamp &&
295ff7ed 1350 tcp_death_row.sysctl_tw_recycle &&
bb5b7c11 1351 (dst = inet_csk_route_req(sk, req)) != NULL &&
1da177e4
LT
1352 (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
1353 peer->v4daddr == saddr) {
317fe0e6 1354 inet_peer_refcheck(peer);
2c1409a0 1355 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1da177e4
LT
1356 (s32)(peer->tcp_ts - req->ts_recent) >
1357 TCP_PAWS_WINDOW) {
de0744af 1358 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
7cd04fa7 1359 goto drop_and_release;
1da177e4
LT
1360 }
1361 }
1362 /* Kill the following clause, if you dislike this way. */
1363 else if (!sysctl_tcp_syncookies &&
463c84b9 1364 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1da177e4
LT
1365 (sysctl_max_syn_backlog >> 2)) &&
1366 (!peer || !peer->tcp_ts_stamp) &&
1367 (!dst || !dst_metric(dst, RTAX_RTT))) {
1368 /* Without syncookies last quarter of
1369 * backlog is filled with destinations,
1370 * proven to be alive.
1371 * It means that we continue to communicate
1372 * to destinations, already remembered
1373 * to the moment of synflood.
1374 */
673d57e7
HH
1375 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n",
1376 &saddr, ntohs(tcp_hdr(skb)->source));
7cd04fa7 1377 goto drop_and_release;
1da177e4
LT
1378 }
1379
a94f723d 1380 isn = tcp_v4_init_sequence(skb);
1da177e4 1381 }
2e6599cb 1382 tcp_rsk(req)->snt_isn = isn;
1da177e4 1383
72659ecc
OP
1384 if (tcp_v4_send_synack(sk, dst, req,
1385 (struct request_values *)&tmp_ext) ||
4957faad 1386 want_cookie)
1da177e4
LT
1387 goto drop_and_free;
1388
7cd04fa7 1389 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1da177e4
LT
1390 return 0;
1391
7cd04fa7
DL
1392drop_and_release:
1393 dst_release(dst);
1da177e4 1394drop_and_free:
60236fdd 1395 reqsk_free(req);
1da177e4 1396drop:
1da177e4
LT
1397 return 0;
1398}
1399
1400
1401/*
1402 * The three way handshake has completed - we got a valid synack -
1403 * now create the new socket.
1404 */
1405struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
60236fdd 1406 struct request_sock *req,
1da177e4
LT
1407 struct dst_entry *dst)
1408{
2e6599cb 1409 struct inet_request_sock *ireq;
1da177e4
LT
1410 struct inet_sock *newinet;
1411 struct tcp_sock *newtp;
1412 struct sock *newsk;
cfb6eeb4
YH
1413#ifdef CONFIG_TCP_MD5SIG
1414 struct tcp_md5sig_key *key;
1415#endif
1da177e4
LT
1416
1417 if (sk_acceptq_is_full(sk))
1418 goto exit_overflow;
1419
463c84b9 1420 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
1da177e4
LT
1421 goto exit;
1422
1423 newsk = tcp_create_openreq_child(sk, req, skb);
1424 if (!newsk)
1425 goto exit;
1426
bcd76111 1427 newsk->sk_gso_type = SKB_GSO_TCPV4;
6cbb0df7 1428 sk_setup_caps(newsk, dst);
1da177e4
LT
1429
1430 newtp = tcp_sk(newsk);
1431 newinet = inet_sk(newsk);
2e6599cb 1432 ireq = inet_rsk(req);
c720c7e8
ED
1433 newinet->inet_daddr = ireq->rmt_addr;
1434 newinet->inet_rcv_saddr = ireq->loc_addr;
1435 newinet->inet_saddr = ireq->loc_addr;
2e6599cb
ACM
1436 newinet->opt = ireq->opt;
1437 ireq->opt = NULL;
463c84b9 1438 newinet->mc_index = inet_iif(skb);
eddc9ec5 1439 newinet->mc_ttl = ip_hdr(skb)->ttl;
d83d8461 1440 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1da177e4 1441 if (newinet->opt)
d83d8461 1442 inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
c720c7e8 1443 newinet->inet_id = newtp->write_seq ^ jiffies;
1da177e4 1444
5d424d5a 1445 tcp_mtup_init(newsk);
1da177e4
LT
1446 tcp_sync_mss(newsk, dst_mtu(dst));
1447 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
f5fff5dc
TQ
1448 if (tcp_sk(sk)->rx_opt.user_mss &&
1449 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1450 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1451
1da177e4
LT
1452 tcp_initialize_rcv_mss(newsk);
1453
cfb6eeb4
YH
1454#ifdef CONFIG_TCP_MD5SIG
1455 /* Copy over the MD5 key from the original socket */
c720c7e8
ED
1456 key = tcp_v4_md5_do_lookup(sk, newinet->inet_daddr);
1457 if (key != NULL) {
cfb6eeb4
YH
1458 /*
1459 * We're using one, so create a matching key
1460 * on the newsk structure. If we fail to get
1461 * memory, then we end up not copying the key
1462 * across. Shucks.
1463 */
f6685938
ACM
1464 char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1465 if (newkey != NULL)
c720c7e8 1466 tcp_v4_md5_do_add(newsk, newinet->inet_daddr,
cfb6eeb4 1467 newkey, key->keylen);
a465419b 1468 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
cfb6eeb4
YH
1469 }
1470#endif
1471
9327f705 1472 __inet_hash_nolisten(newsk, NULL);
ab1e0a13 1473 __inet_inherit_port(sk, newsk);
1da177e4
LT
1474
1475 return newsk;
1476
1477exit_overflow:
de0744af 1478 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1da177e4 1479exit:
de0744af 1480 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4
LT
1481 dst_release(dst);
1482 return NULL;
1483}
1484
1485static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1486{
aa8223c7 1487 struct tcphdr *th = tcp_hdr(skb);
eddc9ec5 1488 const struct iphdr *iph = ip_hdr(skb);
1da177e4 1489 struct sock *nsk;
60236fdd 1490 struct request_sock **prev;
1da177e4 1491 /* Find possible connection requests. */
463c84b9
ACM
1492 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1493 iph->saddr, iph->daddr);
1da177e4
LT
1494 if (req)
1495 return tcp_check_req(sk, skb, req, prev);
1496
3b1e0a65 1497 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
c67499c0 1498 th->source, iph->daddr, th->dest, inet_iif(skb));
1da177e4
LT
1499
1500 if (nsk) {
1501 if (nsk->sk_state != TCP_TIME_WAIT) {
1502 bh_lock_sock(nsk);
1503 return nsk;
1504 }
9469c7b4 1505 inet_twsk_put(inet_twsk(nsk));
1da177e4
LT
1506 return NULL;
1507 }
1508
1509#ifdef CONFIG_SYN_COOKIES
af9b4738 1510 if (!th->syn)
1da177e4
LT
1511 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1512#endif
1513 return sk;
1514}
1515
b51655b9 1516static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1da177e4 1517{
eddc9ec5
ACM
1518 const struct iphdr *iph = ip_hdr(skb);
1519
84fa7933 1520 if (skb->ip_summed == CHECKSUM_COMPLETE) {
eddc9ec5
ACM
1521 if (!tcp_v4_check(skb->len, iph->saddr,
1522 iph->daddr, skb->csum)) {
fb286bb2 1523 skb->ip_summed = CHECKSUM_UNNECESSARY;
1da177e4 1524 return 0;
fb286bb2 1525 }
1da177e4 1526 }
fb286bb2 1527
eddc9ec5 1528 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
fb286bb2
HX
1529 skb->len, IPPROTO_TCP, 0);
1530
1da177e4 1531 if (skb->len <= 76) {
fb286bb2 1532 return __skb_checksum_complete(skb);
1da177e4
LT
1533 }
1534 return 0;
1535}
1536
1537
1538/* The socket must have it's spinlock held when we get
1539 * here.
1540 *
1541 * We have a potential double-lock case here, so even when
1542 * doing backlog processing we use the BH locking scheme.
1543 * This is because we cannot sleep with the original spinlock
1544 * held.
1545 */
1546int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1547{
cfb6eeb4
YH
1548 struct sock *rsk;
1549#ifdef CONFIG_TCP_MD5SIG
1550 /*
1551 * We really want to reject the packet as early as possible
1552 * if:
1553 * o We're expecting an MD5'd packet and this is no MD5 tcp option
1554 * o There is an MD5 option and we're not expecting one
1555 */
7174259e 1556 if (tcp_v4_inbound_md5_hash(sk, skb))
cfb6eeb4
YH
1557 goto discard;
1558#endif
1559
1da177e4 1560 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
ca55158c 1561 sock_rps_save_rxhash(sk, skb->rxhash);
1da177e4 1562 TCP_CHECK_TIMER(sk);
aa8223c7 1563 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
cfb6eeb4 1564 rsk = sk;
1da177e4 1565 goto reset;
cfb6eeb4 1566 }
1da177e4
LT
1567 TCP_CHECK_TIMER(sk);
1568 return 0;
1569 }
1570
ab6a5bb6 1571 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1da177e4
LT
1572 goto csum_err;
1573
1574 if (sk->sk_state == TCP_LISTEN) {
1575 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1576 if (!nsk)
1577 goto discard;
1578
1579 if (nsk != sk) {
cfb6eeb4
YH
1580 if (tcp_child_process(sk, nsk, skb)) {
1581 rsk = nsk;
1da177e4 1582 goto reset;
cfb6eeb4 1583 }
1da177e4
LT
1584 return 0;
1585 }
ca55158c
ED
1586 } else
1587 sock_rps_save_rxhash(sk, skb->rxhash);
1588
1da177e4
LT
1589
1590 TCP_CHECK_TIMER(sk);
aa8223c7 1591 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
cfb6eeb4 1592 rsk = sk;
1da177e4 1593 goto reset;
cfb6eeb4 1594 }
1da177e4
LT
1595 TCP_CHECK_TIMER(sk);
1596 return 0;
1597
1598reset:
cfb6eeb4 1599 tcp_v4_send_reset(rsk, skb);
1da177e4
LT
1600discard:
1601 kfree_skb(skb);
1602 /* Be careful here. If this function gets more complicated and
1603 * gcc suffers from register pressure on the x86, sk (in %ebx)
1604 * might be destroyed here. This current version compiles correctly,
1605 * but you have been warned.
1606 */
1607 return 0;
1608
1609csum_err:
63231bdd 1610 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1da177e4
LT
1611 goto discard;
1612}
1613
1614/*
1615 * From tcp_input.c
1616 */
1617
1618int tcp_v4_rcv(struct sk_buff *skb)
1619{
eddc9ec5 1620 const struct iphdr *iph;
1da177e4
LT
1621 struct tcphdr *th;
1622 struct sock *sk;
1623 int ret;
a86b1e30 1624 struct net *net = dev_net(skb->dev);
1da177e4
LT
1625
1626 if (skb->pkt_type != PACKET_HOST)
1627 goto discard_it;
1628
1629 /* Count it even if it's bad */
63231bdd 1630 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1da177e4
LT
1631
1632 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1633 goto discard_it;
1634
aa8223c7 1635 th = tcp_hdr(skb);
1da177e4
LT
1636
1637 if (th->doff < sizeof(struct tcphdr) / 4)
1638 goto bad_packet;
1639 if (!pskb_may_pull(skb, th->doff * 4))
1640 goto discard_it;
1641
1642 /* An explanation is required here, I think.
1643 * Packet length and doff are validated by header prediction,
caa20d9a 1644 * provided case of th->doff==0 is eliminated.
1da177e4 1645 * So, we defer the checks. */
60476372 1646 if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1da177e4
LT
1647 goto bad_packet;
1648
aa8223c7 1649 th = tcp_hdr(skb);
eddc9ec5 1650 iph = ip_hdr(skb);
1da177e4
LT
1651 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1652 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1653 skb->len - th->doff * 4);
1654 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1655 TCP_SKB_CB(skb)->when = 0;
eddc9ec5 1656 TCP_SKB_CB(skb)->flags = iph->tos;
1da177e4
LT
1657 TCP_SKB_CB(skb)->sacked = 0;
1658
9a1f27c4 1659 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1da177e4
LT
1660 if (!sk)
1661 goto no_tcp_socket;
1662
bb134d5d
ED
1663process:
1664 if (sk->sk_state == TCP_TIME_WAIT)
1665 goto do_time_wait;
1666
6cce09f8
ED
1667 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1668 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
d218d111 1669 goto discard_and_relse;
6cce09f8 1670 }
d218d111 1671
1da177e4
LT
1672 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1673 goto discard_and_relse;
b59c2701 1674 nf_reset(skb);
1da177e4 1675
fda9ef5d 1676 if (sk_filter(sk, skb))
1da177e4
LT
1677 goto discard_and_relse;
1678
1679 skb->dev = NULL;
1680
c6366184 1681 bh_lock_sock_nested(sk);
1da177e4
LT
1682 ret = 0;
1683 if (!sock_owned_by_user(sk)) {
1a2449a8
CL
1684#ifdef CONFIG_NET_DMA
1685 struct tcp_sock *tp = tcp_sk(sk);
1686 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
f67b4599 1687 tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
1a2449a8 1688 if (tp->ucopy.dma_chan)
1da177e4 1689 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8
CL
1690 else
1691#endif
1692 {
1693 if (!tcp_prequeue(sk, skb))
ae8d7f88 1694 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8 1695 }
6cce09f8 1696 } else if (unlikely(sk_add_backlog(sk, skb))) {
6b03a53a 1697 bh_unlock_sock(sk);
6cce09f8 1698 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
6b03a53a
ZY
1699 goto discard_and_relse;
1700 }
1da177e4
LT
1701 bh_unlock_sock(sk);
1702
1703 sock_put(sk);
1704
1705 return ret;
1706
1707no_tcp_socket:
1708 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1709 goto discard_it;
1710
1711 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1712bad_packet:
63231bdd 1713 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1da177e4 1714 } else {
cfb6eeb4 1715 tcp_v4_send_reset(NULL, skb);
1da177e4
LT
1716 }
1717
1718discard_it:
1719 /* Discard frame. */
1720 kfree_skb(skb);
e905a9ed 1721 return 0;
1da177e4
LT
1722
1723discard_and_relse:
1724 sock_put(sk);
1725 goto discard_it;
1726
1727do_time_wait:
1728 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
9469c7b4 1729 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1730 goto discard_it;
1731 }
1732
1733 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
63231bdd 1734 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
9469c7b4 1735 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1736 goto discard_it;
1737 }
9469c7b4 1738 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1da177e4 1739 case TCP_TW_SYN: {
c346dca1 1740 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
c67499c0 1741 &tcp_hashinfo,
eddc9ec5 1742 iph->daddr, th->dest,
463c84b9 1743 inet_iif(skb));
1da177e4 1744 if (sk2) {
9469c7b4
YH
1745 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1746 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1747 sk = sk2;
1748 goto process;
1749 }
1750 /* Fall through to ACK */
1751 }
1752 case TCP_TW_ACK:
1753 tcp_v4_timewait_ack(sk, skb);
1754 break;
1755 case TCP_TW_RST:
1756 goto no_tcp_socket;
1757 case TCP_TW_SUCCESS:;
1758 }
1759 goto discard_it;
1760}
1761
1da177e4
LT
1762/* VJ's idea. Save last timestamp seen from this destination
1763 * and hold it at least for normal timewait interval to use for duplicate
1764 * segment detection in subsequent connections, before they enter synchronized
1765 * state.
1766 */
1767
1768int tcp_v4_remember_stamp(struct sock *sk)
1769{
1770 struct inet_sock *inet = inet_sk(sk);
1771 struct tcp_sock *tp = tcp_sk(sk);
1772 struct rtable *rt = (struct rtable *)__sk_dst_get(sk);
1773 struct inet_peer *peer = NULL;
1774 int release_it = 0;
1775
c720c7e8
ED
1776 if (!rt || rt->rt_dst != inet->inet_daddr) {
1777 peer = inet_getpeer(inet->inet_daddr, 1);
1da177e4
LT
1778 release_it = 1;
1779 } else {
1780 if (!rt->peer)
1781 rt_bind_peer(rt, 1);
1782 peer = rt->peer;
1783 }
1784
1785 if (peer) {
1786 if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
2c1409a0
ED
1787 ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL &&
1788 peer->tcp_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) {
1789 peer->tcp_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp;
1da177e4
LT
1790 peer->tcp_ts = tp->rx_opt.ts_recent;
1791 }
1792 if (release_it)
1793 inet_putpeer(peer);
1794 return 1;
1795 }
1796
1797 return 0;
1798}
1799
8feaf0c0 1800int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
1da177e4 1801{
8feaf0c0 1802 struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
1da177e4
LT
1803
1804 if (peer) {
8feaf0c0
ACM
1805 const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
1806
1807 if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
2c1409a0
ED
1808 ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL &&
1809 peer->tcp_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) {
1810 peer->tcp_ts_stamp = (u32)tcptw->tw_ts_recent_stamp;
8feaf0c0 1811 peer->tcp_ts = tcptw->tw_ts_recent;
1da177e4
LT
1812 }
1813 inet_putpeer(peer);
1814 return 1;
1815 }
1816
1817 return 0;
1818}
1819
3b401a81 1820const struct inet_connection_sock_af_ops ipv4_specific = {
543d9cfe
ACM
1821 .queue_xmit = ip_queue_xmit,
1822 .send_check = tcp_v4_send_check,
1823 .rebuild_header = inet_sk_rebuild_header,
1824 .conn_request = tcp_v4_conn_request,
1825 .syn_recv_sock = tcp_v4_syn_recv_sock,
1826 .remember_stamp = tcp_v4_remember_stamp,
1827 .net_header_len = sizeof(struct iphdr),
1828 .setsockopt = ip_setsockopt,
1829 .getsockopt = ip_getsockopt,
1830 .addr2sockaddr = inet_csk_addr2sockaddr,
1831 .sockaddr_len = sizeof(struct sockaddr_in),
ab1e0a13 1832 .bind_conflict = inet_csk_bind_conflict,
3fdadf7d 1833#ifdef CONFIG_COMPAT
543d9cfe
ACM
1834 .compat_setsockopt = compat_ip_setsockopt,
1835 .compat_getsockopt = compat_ip_getsockopt,
3fdadf7d 1836#endif
1da177e4
LT
1837};
1838
cfb6eeb4 1839#ifdef CONFIG_TCP_MD5SIG
b2e4b3de 1840static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
cfb6eeb4 1841 .md5_lookup = tcp_v4_md5_lookup,
49a72dfb 1842 .calc_md5_hash = tcp_v4_md5_hash_skb,
cfb6eeb4
YH
1843 .md5_add = tcp_v4_md5_add_func,
1844 .md5_parse = tcp_v4_parse_md5_keys,
cfb6eeb4 1845};
b6332e6c 1846#endif
cfb6eeb4 1847
1da177e4
LT
1848/* NOTE: A lot of things set to zero explicitly by call to
1849 * sk_alloc() so need not be done here.
1850 */
1851static int tcp_v4_init_sock(struct sock *sk)
1852{
6687e988 1853 struct inet_connection_sock *icsk = inet_csk(sk);
1da177e4
LT
1854 struct tcp_sock *tp = tcp_sk(sk);
1855
1856 skb_queue_head_init(&tp->out_of_order_queue);
1857 tcp_init_xmit_timers(sk);
1858 tcp_prequeue_init(tp);
1859
6687e988 1860 icsk->icsk_rto = TCP_TIMEOUT_INIT;
1da177e4
LT
1861 tp->mdev = TCP_TIMEOUT_INIT;
1862
1863 /* So many TCP implementations out there (incorrectly) count the
1864 * initial SYN frame in their delayed-ACK and congestion control
1865 * algorithms that we must have the following bandaid to talk
1866 * efficiently to them. -DaveM
1867 */
1868 tp->snd_cwnd = 2;
1869
1870 /* See draft-stevens-tcpca-spec-01 for discussion of the
1871 * initialization of these values.
1872 */
0b6a05c1 1873 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
1da177e4 1874 tp->snd_cwnd_clamp = ~0;
bee7ca9e 1875 tp->mss_cache = TCP_MSS_DEFAULT;
1da177e4
LT
1876
1877 tp->reordering = sysctl_tcp_reordering;
6687e988 1878 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1da177e4
LT
1879
1880 sk->sk_state = TCP_CLOSE;
1881
1882 sk->sk_write_space = sk_stream_write_space;
1883 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1884
8292a17a 1885 icsk->icsk_af_ops = &ipv4_specific;
d83d8461 1886 icsk->icsk_sync_mss = tcp_sync_mss;
cfb6eeb4
YH
1887#ifdef CONFIG_TCP_MD5SIG
1888 tp->af_specific = &tcp_sock_ipv4_specific;
1889#endif
1da177e4 1890
435cf559
WAS
1891 /* TCP Cookie Transactions */
1892 if (sysctl_tcp_cookie_size > 0) {
1893 /* Default, cookies without s_data_payload. */
1894 tp->cookie_values =
1895 kzalloc(sizeof(*tp->cookie_values),
1896 sk->sk_allocation);
1897 if (tp->cookie_values != NULL)
1898 kref_init(&tp->cookie_values->kref);
1899 }
1900 /* Presumed zeroed, in order of appearance:
1901 * cookie_in_always, cookie_out_never,
1902 * s_data_constant, s_data_in, s_data_out
1903 */
1da177e4
LT
1904 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1905 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1906
eb4dea58 1907 local_bh_disable();
1748376b 1908 percpu_counter_inc(&tcp_sockets_allocated);
eb4dea58 1909 local_bh_enable();
1da177e4
LT
1910
1911 return 0;
1912}
1913
7d06b2e0 1914void tcp_v4_destroy_sock(struct sock *sk)
1da177e4
LT
1915{
1916 struct tcp_sock *tp = tcp_sk(sk);
1917
1918 tcp_clear_xmit_timers(sk);
1919
6687e988 1920 tcp_cleanup_congestion_control(sk);
317a76f9 1921
1da177e4 1922 /* Cleanup up the write buffer. */
fe067e8a 1923 tcp_write_queue_purge(sk);
1da177e4
LT
1924
1925 /* Cleans up our, hopefully empty, out_of_order_queue. */
e905a9ed 1926 __skb_queue_purge(&tp->out_of_order_queue);
1da177e4 1927
cfb6eeb4
YH
1928#ifdef CONFIG_TCP_MD5SIG
1929 /* Clean up the MD5 key list, if any */
1930 if (tp->md5sig_info) {
1931 tcp_v4_clear_md5_list(sk);
1932 kfree(tp->md5sig_info);
1933 tp->md5sig_info = NULL;
1934 }
1935#endif
1936
1a2449a8
CL
1937#ifdef CONFIG_NET_DMA
1938 /* Cleans up our sk_async_wait_queue */
e905a9ed 1939 __skb_queue_purge(&sk->sk_async_wait_queue);
1a2449a8
CL
1940#endif
1941
1da177e4
LT
1942 /* Clean prequeue, it must be empty really */
1943 __skb_queue_purge(&tp->ucopy.prequeue);
1944
1945 /* Clean up a referenced TCP bind bucket. */
463c84b9 1946 if (inet_csk(sk)->icsk_bind_hash)
ab1e0a13 1947 inet_put_port(sk);
1da177e4
LT
1948
1949 /*
1950 * If sendmsg cached page exists, toss it.
1951 */
1952 if (sk->sk_sndmsg_page) {
1953 __free_page(sk->sk_sndmsg_page);
1954 sk->sk_sndmsg_page = NULL;
1955 }
1956
435cf559
WAS
1957 /* TCP Cookie Transactions */
1958 if (tp->cookie_values != NULL) {
1959 kref_put(&tp->cookie_values->kref,
1960 tcp_cookie_values_release);
1961 tp->cookie_values = NULL;
1962 }
1963
1748376b 1964 percpu_counter_dec(&tcp_sockets_allocated);
1da177e4
LT
1965}
1966
1967EXPORT_SYMBOL(tcp_v4_destroy_sock);
1968
1969#ifdef CONFIG_PROC_FS
1970/* Proc filesystem TCP sock list dumping. */
1971
3ab5aee7 1972static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
1da177e4 1973{
3ab5aee7 1974 return hlist_nulls_empty(head) ? NULL :
8feaf0c0 1975 list_entry(head->first, struct inet_timewait_sock, tw_node);
1da177e4
LT
1976}
1977
8feaf0c0 1978static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1da177e4 1979{
3ab5aee7
ED
1980 return !is_a_nulls(tw->tw_node.next) ?
1981 hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1da177e4
LT
1982}
1983
a8b690f9
TH
1984/*
1985 * Get next listener socket follow cur. If cur is NULL, get first socket
1986 * starting from bucket given in st->bucket; when st->bucket is zero the
1987 * very first socket in the hash table is returned.
1988 */
1da177e4
LT
1989static void *listening_get_next(struct seq_file *seq, void *cur)
1990{
463c84b9 1991 struct inet_connection_sock *icsk;
c25eb3bf 1992 struct hlist_nulls_node *node;
1da177e4 1993 struct sock *sk = cur;
5caea4ea 1994 struct inet_listen_hashbucket *ilb;
5799de0b 1995 struct tcp_iter_state *st = seq->private;
a4146b1b 1996 struct net *net = seq_file_net(seq);
1da177e4
LT
1997
1998 if (!sk) {
a8b690f9 1999 ilb = &tcp_hashinfo.listening_hash[st->bucket];
5caea4ea 2000 spin_lock_bh(&ilb->lock);
c25eb3bf 2001 sk = sk_nulls_head(&ilb->head);
a8b690f9 2002 st->offset = 0;
1da177e4
LT
2003 goto get_sk;
2004 }
5caea4ea 2005 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1da177e4 2006 ++st->num;
a8b690f9 2007 ++st->offset;
1da177e4
LT
2008
2009 if (st->state == TCP_SEQ_STATE_OPENREQ) {
60236fdd 2010 struct request_sock *req = cur;
1da177e4 2011
72a3effa 2012 icsk = inet_csk(st->syn_wait_sk);
1da177e4
LT
2013 req = req->dl_next;
2014 while (1) {
2015 while (req) {
bdccc4ca 2016 if (req->rsk_ops->family == st->family) {
1da177e4
LT
2017 cur = req;
2018 goto out;
2019 }
2020 req = req->dl_next;
2021 }
a8b690f9 2022 st->offset = 0;
72a3effa 2023 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
1da177e4
LT
2024 break;
2025get_req:
463c84b9 2026 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
1da177e4
LT
2027 }
2028 sk = sk_next(st->syn_wait_sk);
2029 st->state = TCP_SEQ_STATE_LISTENING;
463c84b9 2030 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2031 } else {
e905a9ed 2032 icsk = inet_csk(sk);
463c84b9
ACM
2033 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2034 if (reqsk_queue_len(&icsk->icsk_accept_queue))
1da177e4 2035 goto start_req;
463c84b9 2036 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4
LT
2037 sk = sk_next(sk);
2038 }
2039get_sk:
c25eb3bf 2040 sk_nulls_for_each_from(sk, node) {
878628fb 2041 if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) {
1da177e4
LT
2042 cur = sk;
2043 goto out;
2044 }
e905a9ed 2045 icsk = inet_csk(sk);
463c84b9
ACM
2046 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2047 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
1da177e4
LT
2048start_req:
2049 st->uid = sock_i_uid(sk);
2050 st->syn_wait_sk = sk;
2051 st->state = TCP_SEQ_STATE_OPENREQ;
2052 st->sbucket = 0;
2053 goto get_req;
2054 }
463c84b9 2055 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2056 }
5caea4ea 2057 spin_unlock_bh(&ilb->lock);
a8b690f9 2058 st->offset = 0;
0f7ff927 2059 if (++st->bucket < INET_LHTABLE_SIZE) {
5caea4ea
ED
2060 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2061 spin_lock_bh(&ilb->lock);
c25eb3bf 2062 sk = sk_nulls_head(&ilb->head);
1da177e4
LT
2063 goto get_sk;
2064 }
2065 cur = NULL;
2066out:
2067 return cur;
2068}
2069
2070static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2071{
a8b690f9
TH
2072 struct tcp_iter_state *st = seq->private;
2073 void *rc;
2074
2075 st->bucket = 0;
2076 st->offset = 0;
2077 rc = listening_get_next(seq, NULL);
1da177e4
LT
2078
2079 while (rc && *pos) {
2080 rc = listening_get_next(seq, rc);
2081 --*pos;
2082 }
2083 return rc;
2084}
2085
6eac5604
AK
2086static inline int empty_bucket(struct tcp_iter_state *st)
2087{
3ab5aee7
ED
2088 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
2089 hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
6eac5604
AK
2090}
2091
a8b690f9
TH
2092/*
2093 * Get first established socket starting from bucket given in st->bucket.
2094 * If st->bucket is zero, the very first socket in the hash is returned.
2095 */
1da177e4
LT
2096static void *established_get_first(struct seq_file *seq)
2097{
5799de0b 2098 struct tcp_iter_state *st = seq->private;
a4146b1b 2099 struct net *net = seq_file_net(seq);
1da177e4
LT
2100 void *rc = NULL;
2101
a8b690f9
TH
2102 st->offset = 0;
2103 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
1da177e4 2104 struct sock *sk;
3ab5aee7 2105 struct hlist_nulls_node *node;
8feaf0c0 2106 struct inet_timewait_sock *tw;
9db66bdc 2107 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
1da177e4 2108
6eac5604
AK
2109 /* Lockless fast path for the common case of empty buckets */
2110 if (empty_bucket(st))
2111 continue;
2112
9db66bdc 2113 spin_lock_bh(lock);
3ab5aee7 2114 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
f40c8174 2115 if (sk->sk_family != st->family ||
878628fb 2116 !net_eq(sock_net(sk), net)) {
1da177e4
LT
2117 continue;
2118 }
2119 rc = sk;
2120 goto out;
2121 }
2122 st->state = TCP_SEQ_STATE_TIME_WAIT;
8feaf0c0 2123 inet_twsk_for_each(tw, node,
dbca9b27 2124 &tcp_hashinfo.ehash[st->bucket].twchain) {
28518fc1 2125 if (tw->tw_family != st->family ||
878628fb 2126 !net_eq(twsk_net(tw), net)) {
1da177e4
LT
2127 continue;
2128 }
2129 rc = tw;
2130 goto out;
2131 }
9db66bdc 2132 spin_unlock_bh(lock);
1da177e4
LT
2133 st->state = TCP_SEQ_STATE_ESTABLISHED;
2134 }
2135out:
2136 return rc;
2137}
2138
2139static void *established_get_next(struct seq_file *seq, void *cur)
2140{
2141 struct sock *sk = cur;
8feaf0c0 2142 struct inet_timewait_sock *tw;
3ab5aee7 2143 struct hlist_nulls_node *node;
5799de0b 2144 struct tcp_iter_state *st = seq->private;
a4146b1b 2145 struct net *net = seq_file_net(seq);
1da177e4
LT
2146
2147 ++st->num;
a8b690f9 2148 ++st->offset;
1da177e4
LT
2149
2150 if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2151 tw = cur;
2152 tw = tw_next(tw);
2153get_tw:
878628fb 2154 while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
1da177e4
LT
2155 tw = tw_next(tw);
2156 }
2157 if (tw) {
2158 cur = tw;
2159 goto out;
2160 }
9db66bdc 2161 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1da177e4
LT
2162 st->state = TCP_SEQ_STATE_ESTABLISHED;
2163
6eac5604 2164 /* Look for next non empty bucket */
a8b690f9 2165 st->offset = 0;
f373b53b 2166 while (++st->bucket <= tcp_hashinfo.ehash_mask &&
6eac5604
AK
2167 empty_bucket(st))
2168 ;
f373b53b 2169 if (st->bucket > tcp_hashinfo.ehash_mask)
6eac5604
AK
2170 return NULL;
2171
9db66bdc 2172 spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
3ab5aee7 2173 sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
1da177e4 2174 } else
3ab5aee7 2175 sk = sk_nulls_next(sk);
1da177e4 2176
3ab5aee7 2177 sk_nulls_for_each_from(sk, node) {
878628fb 2178 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
1da177e4
LT
2179 goto found;
2180 }
2181
2182 st->state = TCP_SEQ_STATE_TIME_WAIT;
dbca9b27 2183 tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
1da177e4
LT
2184 goto get_tw;
2185found:
2186 cur = sk;
2187out:
2188 return cur;
2189}
2190
2191static void *established_get_idx(struct seq_file *seq, loff_t pos)
2192{
a8b690f9
TH
2193 struct tcp_iter_state *st = seq->private;
2194 void *rc;
2195
2196 st->bucket = 0;
2197 rc = established_get_first(seq);
1da177e4
LT
2198
2199 while (rc && pos) {
2200 rc = established_get_next(seq, rc);
2201 --pos;
7174259e 2202 }
1da177e4
LT
2203 return rc;
2204}
2205
2206static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2207{
2208 void *rc;
5799de0b 2209 struct tcp_iter_state *st = seq->private;
1da177e4 2210
1da177e4
LT
2211 st->state = TCP_SEQ_STATE_LISTENING;
2212 rc = listening_get_idx(seq, &pos);
2213
2214 if (!rc) {
1da177e4
LT
2215 st->state = TCP_SEQ_STATE_ESTABLISHED;
2216 rc = established_get_idx(seq, pos);
2217 }
2218
2219 return rc;
2220}
2221
a8b690f9
TH
2222static void *tcp_seek_last_pos(struct seq_file *seq)
2223{
2224 struct tcp_iter_state *st = seq->private;
2225 int offset = st->offset;
2226 int orig_num = st->num;
2227 void *rc = NULL;
2228
2229 switch (st->state) {
2230 case TCP_SEQ_STATE_OPENREQ:
2231 case TCP_SEQ_STATE_LISTENING:
2232 if (st->bucket >= INET_LHTABLE_SIZE)
2233 break;
2234 st->state = TCP_SEQ_STATE_LISTENING;
2235 rc = listening_get_next(seq, NULL);
2236 while (offset-- && rc)
2237 rc = listening_get_next(seq, rc);
2238 if (rc)
2239 break;
2240 st->bucket = 0;
2241 /* Fallthrough */
2242 case TCP_SEQ_STATE_ESTABLISHED:
2243 case TCP_SEQ_STATE_TIME_WAIT:
2244 st->state = TCP_SEQ_STATE_ESTABLISHED;
2245 if (st->bucket > tcp_hashinfo.ehash_mask)
2246 break;
2247 rc = established_get_first(seq);
2248 while (offset-- && rc)
2249 rc = established_get_next(seq, rc);
2250 }
2251
2252 st->num = orig_num;
2253
2254 return rc;
2255}
2256
1da177e4
LT
2257static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2258{
5799de0b 2259 struct tcp_iter_state *st = seq->private;
a8b690f9
TH
2260 void *rc;
2261
2262 if (*pos && *pos == st->last_pos) {
2263 rc = tcp_seek_last_pos(seq);
2264 if (rc)
2265 goto out;
2266 }
2267
1da177e4
LT
2268 st->state = TCP_SEQ_STATE_LISTENING;
2269 st->num = 0;
a8b690f9
TH
2270 st->bucket = 0;
2271 st->offset = 0;
2272 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2273
2274out:
2275 st->last_pos = *pos;
2276 return rc;
1da177e4
LT
2277}
2278
2279static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2280{
a8b690f9 2281 struct tcp_iter_state *st = seq->private;
1da177e4 2282 void *rc = NULL;
1da177e4
LT
2283
2284 if (v == SEQ_START_TOKEN) {
2285 rc = tcp_get_idx(seq, 0);
2286 goto out;
2287 }
1da177e4
LT
2288
2289 switch (st->state) {
2290 case TCP_SEQ_STATE_OPENREQ:
2291 case TCP_SEQ_STATE_LISTENING:
2292 rc = listening_get_next(seq, v);
2293 if (!rc) {
1da177e4 2294 st->state = TCP_SEQ_STATE_ESTABLISHED;
a8b690f9
TH
2295 st->bucket = 0;
2296 st->offset = 0;
1da177e4
LT
2297 rc = established_get_first(seq);
2298 }
2299 break;
2300 case TCP_SEQ_STATE_ESTABLISHED:
2301 case TCP_SEQ_STATE_TIME_WAIT:
2302 rc = established_get_next(seq, v);
2303 break;
2304 }
2305out:
2306 ++*pos;
a8b690f9 2307 st->last_pos = *pos;
1da177e4
LT
2308 return rc;
2309}
2310
2311static void tcp_seq_stop(struct seq_file *seq, void *v)
2312{
5799de0b 2313 struct tcp_iter_state *st = seq->private;
1da177e4
LT
2314
2315 switch (st->state) {
2316 case TCP_SEQ_STATE_OPENREQ:
2317 if (v) {
463c84b9
ACM
2318 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2319 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4
LT
2320 }
2321 case TCP_SEQ_STATE_LISTENING:
2322 if (v != SEQ_START_TOKEN)
5caea4ea 2323 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
1da177e4
LT
2324 break;
2325 case TCP_SEQ_STATE_TIME_WAIT:
2326 case TCP_SEQ_STATE_ESTABLISHED:
2327 if (v)
9db66bdc 2328 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1da177e4
LT
2329 break;
2330 }
2331}
2332
2333static int tcp_seq_open(struct inode *inode, struct file *file)
2334{
2335 struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
1da177e4 2336 struct tcp_iter_state *s;
52d6f3f1 2337 int err;
1da177e4 2338
52d6f3f1
DL
2339 err = seq_open_net(inode, file, &afinfo->seq_ops,
2340 sizeof(struct tcp_iter_state));
2341 if (err < 0)
2342 return err;
f40c8174 2343
52d6f3f1 2344 s = ((struct seq_file *)file->private_data)->private;
1da177e4 2345 s->family = afinfo->family;
a8b690f9 2346 s->last_pos = 0;
f40c8174
DL
2347 return 0;
2348}
2349
6f8b13bc 2350int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4
LT
2351{
2352 int rc = 0;
2353 struct proc_dir_entry *p;
2354
68fcadd1
DL
2355 afinfo->seq_fops.open = tcp_seq_open;
2356 afinfo->seq_fops.read = seq_read;
2357 afinfo->seq_fops.llseek = seq_lseek;
2358 afinfo->seq_fops.release = seq_release_net;
7174259e 2359
9427c4b3
DL
2360 afinfo->seq_ops.start = tcp_seq_start;
2361 afinfo->seq_ops.next = tcp_seq_next;
2362 afinfo->seq_ops.stop = tcp_seq_stop;
2363
84841c3c
DL
2364 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2365 &afinfo->seq_fops, afinfo);
2366 if (!p)
1da177e4
LT
2367 rc = -ENOMEM;
2368 return rc;
2369}
2370
6f8b13bc 2371void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4 2372{
6f8b13bc 2373 proc_net_remove(net, afinfo->name);
1da177e4
LT
2374}
2375
60236fdd 2376static void get_openreq4(struct sock *sk, struct request_sock *req,
5e659e4c 2377 struct seq_file *f, int i, int uid, int *len)
1da177e4 2378{
2e6599cb 2379 const struct inet_request_sock *ireq = inet_rsk(req);
1da177e4
LT
2380 int ttd = req->expires - jiffies;
2381
5e659e4c
PE
2382 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2383 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p%n",
1da177e4 2384 i,
2e6599cb 2385 ireq->loc_addr,
c720c7e8 2386 ntohs(inet_sk(sk)->inet_sport),
2e6599cb
ACM
2387 ireq->rmt_addr,
2388 ntohs(ireq->rmt_port),
1da177e4
LT
2389 TCP_SYN_RECV,
2390 0, 0, /* could print option size, but that is af dependent. */
2391 1, /* timers active (only the expire timer) */
2392 jiffies_to_clock_t(ttd),
2393 req->retrans,
2394 uid,
2395 0, /* non standard timer */
2396 0, /* open_requests have no inode */
2397 atomic_read(&sk->sk_refcnt),
5e659e4c
PE
2398 req,
2399 len);
1da177e4
LT
2400}
2401
5e659e4c 2402static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
1da177e4
LT
2403{
2404 int timer_active;
2405 unsigned long timer_expires;
cf4c6bf8
IJ
2406 struct tcp_sock *tp = tcp_sk(sk);
2407 const struct inet_connection_sock *icsk = inet_csk(sk);
2408 struct inet_sock *inet = inet_sk(sk);
c720c7e8
ED
2409 __be32 dest = inet->inet_daddr;
2410 __be32 src = inet->inet_rcv_saddr;
2411 __u16 destp = ntohs(inet->inet_dport);
2412 __u16 srcp = ntohs(inet->inet_sport);
49d09007 2413 int rx_queue;
1da177e4 2414
463c84b9 2415 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1da177e4 2416 timer_active = 1;
463c84b9
ACM
2417 timer_expires = icsk->icsk_timeout;
2418 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1da177e4 2419 timer_active = 4;
463c84b9 2420 timer_expires = icsk->icsk_timeout;
cf4c6bf8 2421 } else if (timer_pending(&sk->sk_timer)) {
1da177e4 2422 timer_active = 2;
cf4c6bf8 2423 timer_expires = sk->sk_timer.expires;
1da177e4
LT
2424 } else {
2425 timer_active = 0;
2426 timer_expires = jiffies;
2427 }
2428
49d09007
ED
2429 if (sk->sk_state == TCP_LISTEN)
2430 rx_queue = sk->sk_ack_backlog;
2431 else
2432 /*
2433 * because we dont lock socket, we might find a transient negative value
2434 */
2435 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2436
5e659e4c 2437 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
7be87351 2438 "%08X %5d %8d %lu %d %p %lu %lu %u %u %d%n",
cf4c6bf8 2439 i, src, srcp, dest, destp, sk->sk_state,
47da8ee6 2440 tp->write_seq - tp->snd_una,
49d09007 2441 rx_queue,
1da177e4
LT
2442 timer_active,
2443 jiffies_to_clock_t(timer_expires - jiffies),
463c84b9 2444 icsk->icsk_retransmits,
cf4c6bf8 2445 sock_i_uid(sk),
6687e988 2446 icsk->icsk_probes_out,
cf4c6bf8
IJ
2447 sock_i_ino(sk),
2448 atomic_read(&sk->sk_refcnt), sk,
7be87351
SH
2449 jiffies_to_clock_t(icsk->icsk_rto),
2450 jiffies_to_clock_t(icsk->icsk_ack.ato),
463c84b9 2451 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1da177e4 2452 tp->snd_cwnd,
0b6a05c1 2453 tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
5e659e4c 2454 len);
1da177e4
LT
2455}
2456
7174259e 2457static void get_timewait4_sock(struct inet_timewait_sock *tw,
5e659e4c 2458 struct seq_file *f, int i, int *len)
1da177e4 2459{
23f33c2d 2460 __be32 dest, src;
1da177e4
LT
2461 __u16 destp, srcp;
2462 int ttd = tw->tw_ttd - jiffies;
2463
2464 if (ttd < 0)
2465 ttd = 0;
2466
2467 dest = tw->tw_daddr;
2468 src = tw->tw_rcv_saddr;
2469 destp = ntohs(tw->tw_dport);
2470 srcp = ntohs(tw->tw_sport);
2471
5e659e4c
PE
2472 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2473 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p%n",
1da177e4
LT
2474 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2475 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
5e659e4c 2476 atomic_read(&tw->tw_refcnt), tw, len);
1da177e4
LT
2477}
2478
2479#define TMPSZ 150
2480
2481static int tcp4_seq_show(struct seq_file *seq, void *v)
2482{
5799de0b 2483 struct tcp_iter_state *st;
5e659e4c 2484 int len;
1da177e4
LT
2485
2486 if (v == SEQ_START_TOKEN) {
2487 seq_printf(seq, "%-*s\n", TMPSZ - 1,
2488 " sl local_address rem_address st tx_queue "
2489 "rx_queue tr tm->when retrnsmt uid timeout "
2490 "inode");
2491 goto out;
2492 }
2493 st = seq->private;
2494
2495 switch (st->state) {
2496 case TCP_SEQ_STATE_LISTENING:
2497 case TCP_SEQ_STATE_ESTABLISHED:
5e659e4c 2498 get_tcp4_sock(v, seq, st->num, &len);
1da177e4
LT
2499 break;
2500 case TCP_SEQ_STATE_OPENREQ:
5e659e4c 2501 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
1da177e4
LT
2502 break;
2503 case TCP_SEQ_STATE_TIME_WAIT:
5e659e4c 2504 get_timewait4_sock(v, seq, st->num, &len);
1da177e4
LT
2505 break;
2506 }
5e659e4c 2507 seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
1da177e4
LT
2508out:
2509 return 0;
2510}
2511
1da177e4 2512static struct tcp_seq_afinfo tcp4_seq_afinfo = {
1da177e4
LT
2513 .name = "tcp",
2514 .family = AF_INET,
5f4472c5
DL
2515 .seq_fops = {
2516 .owner = THIS_MODULE,
2517 },
9427c4b3
DL
2518 .seq_ops = {
2519 .show = tcp4_seq_show,
2520 },
1da177e4
LT
2521};
2522
2c8c1e72 2523static int __net_init tcp4_proc_init_net(struct net *net)
757764f6
PE
2524{
2525 return tcp_proc_register(net, &tcp4_seq_afinfo);
2526}
2527
2c8c1e72 2528static void __net_exit tcp4_proc_exit_net(struct net *net)
757764f6
PE
2529{
2530 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2531}
2532
2533static struct pernet_operations tcp4_net_ops = {
2534 .init = tcp4_proc_init_net,
2535 .exit = tcp4_proc_exit_net,
2536};
2537
1da177e4
LT
2538int __init tcp4_proc_init(void)
2539{
757764f6 2540 return register_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2541}
2542
2543void tcp4_proc_exit(void)
2544{
757764f6 2545 unregister_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2546}
2547#endif /* CONFIG_PROC_FS */
2548
bf296b12
HX
2549struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2550{
36e7b1b8 2551 struct iphdr *iph = skb_gro_network_header(skb);
bf296b12
HX
2552
2553 switch (skb->ip_summed) {
2554 case CHECKSUM_COMPLETE:
86911732 2555 if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
bf296b12
HX
2556 skb->csum)) {
2557 skb->ip_summed = CHECKSUM_UNNECESSARY;
2558 break;
2559 }
2560
2561 /* fall through */
2562 case CHECKSUM_NONE:
2563 NAPI_GRO_CB(skb)->flush = 1;
2564 return NULL;
2565 }
2566
2567 return tcp_gro_receive(head, skb);
2568}
2569EXPORT_SYMBOL(tcp4_gro_receive);
2570
2571int tcp4_gro_complete(struct sk_buff *skb)
2572{
2573 struct iphdr *iph = ip_hdr(skb);
2574 struct tcphdr *th = tcp_hdr(skb);
2575
2576 th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2577 iph->saddr, iph->daddr, 0);
2578 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2579
2580 return tcp_gro_complete(skb);
2581}
2582EXPORT_SYMBOL(tcp4_gro_complete);
2583
1da177e4
LT
2584struct proto tcp_prot = {
2585 .name = "TCP",
2586 .owner = THIS_MODULE,
2587 .close = tcp_close,
2588 .connect = tcp_v4_connect,
2589 .disconnect = tcp_disconnect,
463c84b9 2590 .accept = inet_csk_accept,
1da177e4
LT
2591 .ioctl = tcp_ioctl,
2592 .init = tcp_v4_init_sock,
2593 .destroy = tcp_v4_destroy_sock,
2594 .shutdown = tcp_shutdown,
2595 .setsockopt = tcp_setsockopt,
2596 .getsockopt = tcp_getsockopt,
1da177e4
LT
2597 .recvmsg = tcp_recvmsg,
2598 .backlog_rcv = tcp_v4_do_rcv,
ab1e0a13
ACM
2599 .hash = inet_hash,
2600 .unhash = inet_unhash,
2601 .get_port = inet_csk_get_port,
1da177e4
LT
2602 .enter_memory_pressure = tcp_enter_memory_pressure,
2603 .sockets_allocated = &tcp_sockets_allocated,
0a5578cf 2604 .orphan_count = &tcp_orphan_count,
1da177e4
LT
2605 .memory_allocated = &tcp_memory_allocated,
2606 .memory_pressure = &tcp_memory_pressure,
2607 .sysctl_mem = sysctl_tcp_mem,
2608 .sysctl_wmem = sysctl_tcp_wmem,
2609 .sysctl_rmem = sysctl_tcp_rmem,
2610 .max_header = MAX_TCP_HEADER,
2611 .obj_size = sizeof(struct tcp_sock),
3ab5aee7 2612 .slab_flags = SLAB_DESTROY_BY_RCU,
6d6ee43e 2613 .twsk_prot = &tcp_timewait_sock_ops,
60236fdd 2614 .rsk_prot = &tcp_request_sock_ops,
39d8cda7 2615 .h.hashinfo = &tcp_hashinfo,
543d9cfe
ACM
2616#ifdef CONFIG_COMPAT
2617 .compat_setsockopt = compat_tcp_setsockopt,
2618 .compat_getsockopt = compat_tcp_getsockopt,
2619#endif
1da177e4
LT
2620};
2621
046ee902
DL
2622
2623static int __net_init tcp_sk_init(struct net *net)
2624{
2625 return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2626 PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2627}
2628
2629static void __net_exit tcp_sk_exit(struct net *net)
2630{
2631 inet_ctl_sock_destroy(net->ipv4.tcp_sock);
b099ce26
EB
2632}
2633
2634static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2635{
2636 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
046ee902
DL
2637}
2638
2639static struct pernet_operations __net_initdata tcp_sk_ops = {
b099ce26
EB
2640 .init = tcp_sk_init,
2641 .exit = tcp_sk_exit,
2642 .exit_batch = tcp_sk_exit_batch,
046ee902
DL
2643};
2644
9b0f976f 2645void __init tcp_v4_init(void)
1da177e4 2646{
5caea4ea 2647 inet_hashinfo_init(&tcp_hashinfo);
6a1b3054 2648 if (register_pernet_subsys(&tcp_sk_ops))
1da177e4 2649 panic("Failed to create the TCP control socket.\n");
1da177e4
LT
2650}
2651
2652EXPORT_SYMBOL(ipv4_specific);
1da177e4 2653EXPORT_SYMBOL(tcp_hashinfo);
1da177e4 2654EXPORT_SYMBOL(tcp_prot);
1da177e4
LT
2655EXPORT_SYMBOL(tcp_v4_conn_request);
2656EXPORT_SYMBOL(tcp_v4_connect);
2657EXPORT_SYMBOL(tcp_v4_do_rcv);
1da177e4
LT
2658EXPORT_SYMBOL(tcp_v4_remember_stamp);
2659EXPORT_SYMBOL(tcp_v4_send_check);
2660EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
2661
2662#ifdef CONFIG_PROC_FS
2663EXPORT_SYMBOL(tcp_proc_register);
2664EXPORT_SYMBOL(tcp_proc_unregister);
2665#endif
1da177e4 2666EXPORT_SYMBOL(sysctl_tcp_low_latency);
1da177e4 2667
This page took 1.287314 seconds and 5 git commands to generate.