tcp memory pressure controls
[deliverable/linux.git] / net / ipv4 / tcp_ipv4.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
1da177e4
LT
8 * IPv4 specific functions
9 *
10 *
11 * code split from:
12 * linux/ipv4/tcp.c
13 * linux/ipv4/tcp_input.c
14 * linux/ipv4/tcp_output.c
15 *
16 * See tcp.c for author information
17 *
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation; either version
21 * 2 of the License, or (at your option) any later version.
22 */
23
24/*
25 * Changes:
26 * David S. Miller : New socket lookup architecture.
27 * This code is dedicated to John Dyson.
28 * David S. Miller : Change semantics of established hash,
29 * half is devoted to TIME_WAIT sockets
30 * and the rest go in the other half.
31 * Andi Kleen : Add support for syncookies and fixed
32 * some bugs: ip options weren't passed to
33 * the TCP layer, missed a check for an
34 * ACK bit.
35 * Andi Kleen : Implemented fast path mtu discovery.
36 * Fixed many serious bugs in the
60236fdd 37 * request_sock handling and moved
1da177e4
LT
38 * most of it into the af independent code.
39 * Added tail drop and some other bugfixes.
caa20d9a 40 * Added new listen semantics.
1da177e4
LT
41 * Mike McLagan : Routing by source
42 * Juan Jose Ciarlante: ip_dynaddr bits
43 * Andi Kleen: various fixes.
44 * Vitaly E. Lavrov : Transparent proxy revived after year
45 * coma.
46 * Andi Kleen : Fix new listen.
47 * Andi Kleen : Fix accept error reporting.
48 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
49 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
50 * a single port at the same time.
51 */
52
1da177e4 53
eb4dea58 54#include <linux/bottom_half.h>
1da177e4
LT
55#include <linux/types.h>
56#include <linux/fcntl.h>
57#include <linux/module.h>
58#include <linux/random.h>
59#include <linux/cache.h>
60#include <linux/jhash.h>
61#include <linux/init.h>
62#include <linux/times.h>
5a0e3ad6 63#include <linux/slab.h>
1da177e4 64
457c4cbc 65#include <net/net_namespace.h>
1da177e4 66#include <net/icmp.h>
304a1618 67#include <net/inet_hashtables.h>
1da177e4 68#include <net/tcp.h>
20380731 69#include <net/transp_v6.h>
1da177e4
LT
70#include <net/ipv6.h>
71#include <net/inet_common.h>
6d6ee43e 72#include <net/timewait_sock.h>
1da177e4 73#include <net/xfrm.h>
1a2449a8 74#include <net/netdma.h>
6e5714ea 75#include <net/secure_seq.h>
d1a4c0b3 76#include <net/tcp_memcontrol.h>
1da177e4
LT
77
78#include <linux/inet.h>
79#include <linux/ipv6.h>
80#include <linux/stddef.h>
81#include <linux/proc_fs.h>
82#include <linux/seq_file.h>
83
cfb6eeb4
YH
84#include <linux/crypto.h>
85#include <linux/scatterlist.h>
86
ab32ea5d
BH
87int sysctl_tcp_tw_reuse __read_mostly;
88int sysctl_tcp_low_latency __read_mostly;
4bc2f18b 89EXPORT_SYMBOL(sysctl_tcp_low_latency);
1da177e4 90
1da177e4 91
cfb6eeb4 92#ifdef CONFIG_TCP_MD5SIG
7174259e
ACM
93static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
94 __be32 addr);
49a72dfb 95static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
318cf7aa 96 __be32 daddr, __be32 saddr, const struct tcphdr *th);
9501f972
YH
97#else
98static inline
99struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
100{
101 return NULL;
102}
cfb6eeb4
YH
103#endif
104
5caea4ea 105struct inet_hashinfo tcp_hashinfo;
4bc2f18b 106EXPORT_SYMBOL(tcp_hashinfo);
1da177e4 107
cf533ea5 108static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
1da177e4 109{
eddc9ec5
ACM
110 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
111 ip_hdr(skb)->saddr,
aa8223c7
ACM
112 tcp_hdr(skb)->dest,
113 tcp_hdr(skb)->source);
1da177e4
LT
114}
115
6d6ee43e
ACM
116int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
117{
118 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
119 struct tcp_sock *tp = tcp_sk(sk);
120
121 /* With PAWS, it is safe from the viewpoint
122 of data integrity. Even without PAWS it is safe provided sequence
123 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
124
125 Actually, the idea is close to VJ's one, only timestamp cache is
126 held not per host, but per port pair and TW bucket is used as state
127 holder.
128
129 If TW bucket has been already destroyed we fall back to VJ's scheme
130 and use initial timestamp retrieved from peer table.
131 */
132 if (tcptw->tw_ts_recent_stamp &&
133 (twp == NULL || (sysctl_tcp_tw_reuse &&
9d729f72 134 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
6d6ee43e
ACM
135 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
136 if (tp->write_seq == 0)
137 tp->write_seq = 1;
138 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
139 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
140 sock_hold(sktw);
141 return 1;
142 }
143
144 return 0;
145}
6d6ee43e
ACM
146EXPORT_SYMBOL_GPL(tcp_twsk_unique);
147
1da177e4
LT
148/* This will initiate an outgoing connection. */
149int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
150{
2d7192d6 151 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
1da177e4
LT
152 struct inet_sock *inet = inet_sk(sk);
153 struct tcp_sock *tp = tcp_sk(sk);
dca8b089 154 __be16 orig_sport, orig_dport;
bada8adc 155 __be32 daddr, nexthop;
da905bd1 156 struct flowi4 *fl4;
2d7192d6 157 struct rtable *rt;
1da177e4 158 int err;
f6d8bd05 159 struct ip_options_rcu *inet_opt;
1da177e4
LT
160
161 if (addr_len < sizeof(struct sockaddr_in))
162 return -EINVAL;
163
164 if (usin->sin_family != AF_INET)
165 return -EAFNOSUPPORT;
166
167 nexthop = daddr = usin->sin_addr.s_addr;
f6d8bd05
ED
168 inet_opt = rcu_dereference_protected(inet->inet_opt,
169 sock_owned_by_user(sk));
170 if (inet_opt && inet_opt->opt.srr) {
1da177e4
LT
171 if (!daddr)
172 return -EINVAL;
f6d8bd05 173 nexthop = inet_opt->opt.faddr;
1da177e4
LT
174 }
175
dca8b089
DM
176 orig_sport = inet->inet_sport;
177 orig_dport = usin->sin_port;
da905bd1
DM
178 fl4 = &inet->cork.fl.u.ip4;
179 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
b23dd4fe
DM
180 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
181 IPPROTO_TCP,
182 orig_sport, orig_dport, sk, true);
183 if (IS_ERR(rt)) {
184 err = PTR_ERR(rt);
185 if (err == -ENETUNREACH)
7c73a6fa 186 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
b23dd4fe 187 return err;
584bdf8c 188 }
1da177e4
LT
189
190 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
191 ip_rt_put(rt);
192 return -ENETUNREACH;
193 }
194
f6d8bd05 195 if (!inet_opt || !inet_opt->opt.srr)
da905bd1 196 daddr = fl4->daddr;
1da177e4 197
c720c7e8 198 if (!inet->inet_saddr)
da905bd1 199 inet->inet_saddr = fl4->saddr;
c720c7e8 200 inet->inet_rcv_saddr = inet->inet_saddr;
1da177e4 201
c720c7e8 202 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
1da177e4
LT
203 /* Reset inherited state */
204 tp->rx_opt.ts_recent = 0;
205 tp->rx_opt.ts_recent_stamp = 0;
206 tp->write_seq = 0;
207 }
208
295ff7ed 209 if (tcp_death_row.sysctl_tw_recycle &&
da905bd1 210 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) {
ed2361e6 211 struct inet_peer *peer = rt_get_peer(rt, fl4->daddr);
7174259e
ACM
212 /*
213 * VJ's idea. We save last timestamp seen from
214 * the destination in peer table, when entering state
215 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
216 * when trying new connection.
1da177e4 217 */
317fe0e6
ED
218 if (peer) {
219 inet_peer_refcheck(peer);
220 if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
221 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
222 tp->rx_opt.ts_recent = peer->tcp_ts;
223 }
1da177e4
LT
224 }
225 }
226
c720c7e8
ED
227 inet->inet_dport = usin->sin_port;
228 inet->inet_daddr = daddr;
1da177e4 229
d83d8461 230 inet_csk(sk)->icsk_ext_hdr_len = 0;
f6d8bd05
ED
231 if (inet_opt)
232 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1da177e4 233
bee7ca9e 234 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
1da177e4
LT
235
236 /* Socket identity is still unknown (sport may be zero).
237 * However we set state to SYN-SENT and not releasing socket
238 * lock select source port, enter ourselves into the hash tables and
239 * complete initialization after this.
240 */
241 tcp_set_state(sk, TCP_SYN_SENT);
a7f5e7f1 242 err = inet_hash_connect(&tcp_death_row, sk);
1da177e4
LT
243 if (err)
244 goto failure;
245
da905bd1 246 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
b23dd4fe
DM
247 inet->inet_sport, inet->inet_dport, sk);
248 if (IS_ERR(rt)) {
249 err = PTR_ERR(rt);
250 rt = NULL;
1da177e4 251 goto failure;
b23dd4fe 252 }
1da177e4 253 /* OK, now commit destination to socket. */
bcd76111 254 sk->sk_gso_type = SKB_GSO_TCPV4;
d8d1f30b 255 sk_setup_caps(sk, &rt->dst);
1da177e4
LT
256
257 if (!tp->write_seq)
c720c7e8
ED
258 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
259 inet->inet_daddr,
260 inet->inet_sport,
1da177e4
LT
261 usin->sin_port);
262
c720c7e8 263 inet->inet_id = tp->write_seq ^ jiffies;
1da177e4
LT
264
265 err = tcp_connect(sk);
266 rt = NULL;
267 if (err)
268 goto failure;
269
270 return 0;
271
272failure:
7174259e
ACM
273 /*
274 * This unhashes the socket and releases the local port,
275 * if necessary.
276 */
1da177e4
LT
277 tcp_set_state(sk, TCP_CLOSE);
278 ip_rt_put(rt);
279 sk->sk_route_caps = 0;
c720c7e8 280 inet->inet_dport = 0;
1da177e4
LT
281 return err;
282}
4bc2f18b 283EXPORT_SYMBOL(tcp_v4_connect);
1da177e4 284
1da177e4
LT
285/*
286 * This routine does path mtu discovery as defined in RFC1191.
287 */
b71d1d42 288static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu)
1da177e4
LT
289{
290 struct dst_entry *dst;
291 struct inet_sock *inet = inet_sk(sk);
1da177e4
LT
292
293 /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
294 * send out by Linux are always <576bytes so they should go through
295 * unfragmented).
296 */
297 if (sk->sk_state == TCP_LISTEN)
298 return;
299
300 /* We don't check in the destentry if pmtu discovery is forbidden
301 * on this route. We just assume that no packet_to_big packets
302 * are send back when pmtu discovery is not active.
e905a9ed 303 * There is a small race when the user changes this flag in the
1da177e4
LT
304 * route, but I think that's acceptable.
305 */
306 if ((dst = __sk_dst_check(sk, 0)) == NULL)
307 return;
308
309 dst->ops->update_pmtu(dst, mtu);
310
311 /* Something is about to be wrong... Remember soft error
312 * for the case, if this connection will not able to recover.
313 */
314 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
315 sk->sk_err_soft = EMSGSIZE;
316
317 mtu = dst_mtu(dst);
318
319 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
d83d8461 320 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
1da177e4
LT
321 tcp_sync_mss(sk, mtu);
322
323 /* Resend the TCP packet because it's
324 * clear that the old packet has been
325 * dropped. This is the new "fast" path mtu
326 * discovery.
327 */
328 tcp_simple_retransmit(sk);
329 } /* else let the usual retransmit timer handle it */
330}
331
332/*
333 * This routine is called by the ICMP module when it gets some
334 * sort of error condition. If err < 0 then the socket should
335 * be closed and the error returned to the user. If err > 0
336 * it's just the icmp type << 8 | icmp code. After adjustment
337 * header points to the first 8 bytes of the tcp header. We need
338 * to find the appropriate port.
339 *
340 * The locking strategy used here is very "optimistic". When
341 * someone else accesses the socket the ICMP is just dropped
342 * and for some paths there is no check at all.
343 * A more general error queue to queue errors for later handling
344 * is probably better.
345 *
346 */
347
4d1a2d9e 348void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
1da177e4 349{
b71d1d42 350 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
4d1a2d9e 351 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
f1ecd5d9 352 struct inet_connection_sock *icsk;
1da177e4
LT
353 struct tcp_sock *tp;
354 struct inet_sock *inet;
4d1a2d9e
DL
355 const int type = icmp_hdr(icmp_skb)->type;
356 const int code = icmp_hdr(icmp_skb)->code;
1da177e4 357 struct sock *sk;
f1ecd5d9 358 struct sk_buff *skb;
1da177e4 359 __u32 seq;
f1ecd5d9 360 __u32 remaining;
1da177e4 361 int err;
4d1a2d9e 362 struct net *net = dev_net(icmp_skb->dev);
1da177e4 363
4d1a2d9e 364 if (icmp_skb->len < (iph->ihl << 2) + 8) {
dcfc23ca 365 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
366 return;
367 }
368
fd54d716 369 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
4d1a2d9e 370 iph->saddr, th->source, inet_iif(icmp_skb));
1da177e4 371 if (!sk) {
dcfc23ca 372 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
373 return;
374 }
375 if (sk->sk_state == TCP_TIME_WAIT) {
9469c7b4 376 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
377 return;
378 }
379
380 bh_lock_sock(sk);
381 /* If too many ICMPs get dropped on busy
382 * servers this needs to be solved differently.
383 */
384 if (sock_owned_by_user(sk))
de0744af 385 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
1da177e4
LT
386
387 if (sk->sk_state == TCP_CLOSE)
388 goto out;
389
97e3ecd1 390 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
391 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
392 goto out;
393 }
394
f1ecd5d9 395 icsk = inet_csk(sk);
1da177e4
LT
396 tp = tcp_sk(sk);
397 seq = ntohl(th->seq);
398 if (sk->sk_state != TCP_LISTEN &&
399 !between(seq, tp->snd_una, tp->snd_nxt)) {
de0744af 400 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
401 goto out;
402 }
403
404 switch (type) {
405 case ICMP_SOURCE_QUENCH:
406 /* Just silently ignore these. */
407 goto out;
408 case ICMP_PARAMETERPROB:
409 err = EPROTO;
410 break;
411 case ICMP_DEST_UNREACH:
412 if (code > NR_ICMP_UNREACH)
413 goto out;
414
415 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
416 if (!sock_owned_by_user(sk))
417 do_pmtu_discovery(sk, iph, info);
418 goto out;
419 }
420
421 err = icmp_err_convert[code].errno;
f1ecd5d9
DL
422 /* check if icmp_skb allows revert of backoff
423 * (see draft-zimmermann-tcp-lcd) */
424 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
425 break;
426 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
427 !icsk->icsk_backoff)
428 break;
429
8f49c270
DM
430 if (sock_owned_by_user(sk))
431 break;
432
f1ecd5d9 433 icsk->icsk_backoff--;
9ad7c049
JC
434 inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
435 TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
f1ecd5d9
DL
436 tcp_bound_rto(sk);
437
438 skb = tcp_write_queue_head(sk);
439 BUG_ON(!skb);
440
441 remaining = icsk->icsk_rto - min(icsk->icsk_rto,
442 tcp_time_stamp - TCP_SKB_CB(skb)->when);
443
444 if (remaining) {
445 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
446 remaining, TCP_RTO_MAX);
f1ecd5d9
DL
447 } else {
448 /* RTO revert clocked out retransmission.
449 * Will retransmit now */
450 tcp_retransmit_timer(sk);
451 }
452
1da177e4
LT
453 break;
454 case ICMP_TIME_EXCEEDED:
455 err = EHOSTUNREACH;
456 break;
457 default:
458 goto out;
459 }
460
461 switch (sk->sk_state) {
60236fdd 462 struct request_sock *req, **prev;
1da177e4
LT
463 case TCP_LISTEN:
464 if (sock_owned_by_user(sk))
465 goto out;
466
463c84b9
ACM
467 req = inet_csk_search_req(sk, &prev, th->dest,
468 iph->daddr, iph->saddr);
1da177e4
LT
469 if (!req)
470 goto out;
471
472 /* ICMPs are not backlogged, hence we cannot get
473 an established socket here.
474 */
547b792c 475 WARN_ON(req->sk);
1da177e4 476
2e6599cb 477 if (seq != tcp_rsk(req)->snt_isn) {
de0744af 478 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
479 goto out;
480 }
481
482 /*
483 * Still in SYN_RECV, just remove it silently.
484 * There is no good way to pass the error to the newly
485 * created socket, and POSIX does not want network
486 * errors returned from accept().
487 */
463c84b9 488 inet_csk_reqsk_queue_drop(sk, req, prev);
1da177e4
LT
489 goto out;
490
491 case TCP_SYN_SENT:
492 case TCP_SYN_RECV: /* Cannot happen.
493 It can f.e. if SYNs crossed.
494 */
495 if (!sock_owned_by_user(sk)) {
1da177e4
LT
496 sk->sk_err = err;
497
498 sk->sk_error_report(sk);
499
500 tcp_done(sk);
501 } else {
502 sk->sk_err_soft = err;
503 }
504 goto out;
505 }
506
507 /* If we've already connected we will keep trying
508 * until we time out, or the user gives up.
509 *
510 * rfc1122 4.2.3.9 allows to consider as hard errors
511 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
512 * but it is obsoleted by pmtu discovery).
513 *
514 * Note, that in modern internet, where routing is unreliable
515 * and in each dark corner broken firewalls sit, sending random
516 * errors ordered by their masters even this two messages finally lose
517 * their original sense (even Linux sends invalid PORT_UNREACHs)
518 *
519 * Now we are in compliance with RFCs.
520 * --ANK (980905)
521 */
522
523 inet = inet_sk(sk);
524 if (!sock_owned_by_user(sk) && inet->recverr) {
525 sk->sk_err = err;
526 sk->sk_error_report(sk);
527 } else { /* Only an error on timeout */
528 sk->sk_err_soft = err;
529 }
530
531out:
532 bh_unlock_sock(sk);
533 sock_put(sk);
534}
535
419f9f89
HX
536static void __tcp_v4_send_check(struct sk_buff *skb,
537 __be32 saddr, __be32 daddr)
1da177e4 538{
aa8223c7 539 struct tcphdr *th = tcp_hdr(skb);
1da177e4 540
84fa7933 541 if (skb->ip_summed == CHECKSUM_PARTIAL) {
419f9f89 542 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
663ead3b 543 skb->csum_start = skb_transport_header(skb) - skb->head;
ff1dcadb 544 skb->csum_offset = offsetof(struct tcphdr, check);
1da177e4 545 } else {
419f9f89 546 th->check = tcp_v4_check(skb->len, saddr, daddr,
07f0757a 547 csum_partial(th,
1da177e4
LT
548 th->doff << 2,
549 skb->csum));
550 }
551}
552
419f9f89 553/* This routine computes an IPv4 TCP checksum. */
bb296246 554void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
419f9f89 555{
cf533ea5 556 const struct inet_sock *inet = inet_sk(sk);
419f9f89
HX
557
558 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
559}
4bc2f18b 560EXPORT_SYMBOL(tcp_v4_send_check);
419f9f89 561
a430a43d
HX
562int tcp_v4_gso_send_check(struct sk_buff *skb)
563{
eddc9ec5 564 const struct iphdr *iph;
a430a43d
HX
565 struct tcphdr *th;
566
567 if (!pskb_may_pull(skb, sizeof(*th)))
568 return -EINVAL;
569
eddc9ec5 570 iph = ip_hdr(skb);
aa8223c7 571 th = tcp_hdr(skb);
a430a43d
HX
572
573 th->check = 0;
84fa7933 574 skb->ip_summed = CHECKSUM_PARTIAL;
419f9f89 575 __tcp_v4_send_check(skb, iph->saddr, iph->daddr);
a430a43d
HX
576 return 0;
577}
578
1da177e4
LT
579/*
580 * This routine will send an RST to the other tcp.
581 *
582 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
583 * for reset.
584 * Answer: if a packet caused RST, it is not for a socket
585 * existing in our system, if it is matched to a socket,
586 * it is just duplicate segment or bug in other side's TCP.
587 * So that we build reply only basing on parameters
588 * arrived with segment.
589 * Exception: precedence violation. We do not implement it in any case.
590 */
591
cfb6eeb4 592static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
1da177e4 593{
cf533ea5 594 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
595 struct {
596 struct tcphdr th;
597#ifdef CONFIG_TCP_MD5SIG
714e85be 598 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
cfb6eeb4
YH
599#endif
600 } rep;
1da177e4 601 struct ip_reply_arg arg;
cfb6eeb4
YH
602#ifdef CONFIG_TCP_MD5SIG
603 struct tcp_md5sig_key *key;
604#endif
a86b1e30 605 struct net *net;
1da177e4
LT
606
607 /* Never send a reset in response to a reset. */
608 if (th->rst)
609 return;
610
511c3f92 611 if (skb_rtable(skb)->rt_type != RTN_LOCAL)
1da177e4
LT
612 return;
613
614 /* Swap the send and the receive. */
cfb6eeb4
YH
615 memset(&rep, 0, sizeof(rep));
616 rep.th.dest = th->source;
617 rep.th.source = th->dest;
618 rep.th.doff = sizeof(struct tcphdr) / 4;
619 rep.th.rst = 1;
1da177e4
LT
620
621 if (th->ack) {
cfb6eeb4 622 rep.th.seq = th->ack_seq;
1da177e4 623 } else {
cfb6eeb4
YH
624 rep.th.ack = 1;
625 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
626 skb->len - (th->doff << 2));
1da177e4
LT
627 }
628
7174259e 629 memset(&arg, 0, sizeof(arg));
cfb6eeb4
YH
630 arg.iov[0].iov_base = (unsigned char *)&rep;
631 arg.iov[0].iov_len = sizeof(rep.th);
632
633#ifdef CONFIG_TCP_MD5SIG
eddc9ec5 634 key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL;
cfb6eeb4
YH
635 if (key) {
636 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
637 (TCPOPT_NOP << 16) |
638 (TCPOPT_MD5SIG << 8) |
639 TCPOLEN_MD5SIG);
640 /* Update length and the length the header thinks exists */
641 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
642 rep.th.doff = arg.iov[0].iov_len / 4;
643
49a72dfb 644 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
78e645cb
IJ
645 key, ip_hdr(skb)->saddr,
646 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
647 }
648#endif
eddc9ec5
ACM
649 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
650 ip_hdr(skb)->saddr, /* XXX */
52cd5750 651 arg.iov[0].iov_len, IPPROTO_TCP, 0);
1da177e4 652 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
88ef4a5a 653 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
1da177e4 654
adf30907 655 net = dev_net(skb_dst(skb)->dev);
66b13d99 656 arg.tos = ip_hdr(skb)->tos;
0a5ebb80 657 ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
7feb49c8 658 &arg, arg.iov[0].iov_len);
1da177e4 659
63231bdd
PE
660 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
661 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
1da177e4
LT
662}
663
664/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
665 outside socket context is ugly, certainly. What can I do?
666 */
667
9501f972
YH
668static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
669 u32 win, u32 ts, int oif,
88ef4a5a 670 struct tcp_md5sig_key *key,
66b13d99 671 int reply_flags, u8 tos)
1da177e4 672{
cf533ea5 673 const struct tcphdr *th = tcp_hdr(skb);
1da177e4
LT
674 struct {
675 struct tcphdr th;
714e85be 676 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
cfb6eeb4 677#ifdef CONFIG_TCP_MD5SIG
714e85be 678 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
cfb6eeb4
YH
679#endif
680 ];
1da177e4
LT
681 } rep;
682 struct ip_reply_arg arg;
adf30907 683 struct net *net = dev_net(skb_dst(skb)->dev);
1da177e4
LT
684
685 memset(&rep.th, 0, sizeof(struct tcphdr));
7174259e 686 memset(&arg, 0, sizeof(arg));
1da177e4
LT
687
688 arg.iov[0].iov_base = (unsigned char *)&rep;
689 arg.iov[0].iov_len = sizeof(rep.th);
690 if (ts) {
cfb6eeb4
YH
691 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
692 (TCPOPT_TIMESTAMP << 8) |
693 TCPOLEN_TIMESTAMP);
694 rep.opt[1] = htonl(tcp_time_stamp);
695 rep.opt[2] = htonl(ts);
cb48cfe8 696 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
1da177e4
LT
697 }
698
699 /* Swap the send and the receive. */
700 rep.th.dest = th->source;
701 rep.th.source = th->dest;
702 rep.th.doff = arg.iov[0].iov_len / 4;
703 rep.th.seq = htonl(seq);
704 rep.th.ack_seq = htonl(ack);
705 rep.th.ack = 1;
706 rep.th.window = htons(win);
707
cfb6eeb4 708#ifdef CONFIG_TCP_MD5SIG
cfb6eeb4
YH
709 if (key) {
710 int offset = (ts) ? 3 : 0;
711
712 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
713 (TCPOPT_NOP << 16) |
714 (TCPOPT_MD5SIG << 8) |
715 TCPOLEN_MD5SIG);
716 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
717 rep.th.doff = arg.iov[0].iov_len/4;
718
49a72dfb 719 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
90b7e112
AL
720 key, ip_hdr(skb)->saddr,
721 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
722 }
723#endif
88ef4a5a 724 arg.flags = reply_flags;
eddc9ec5
ACM
725 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
726 ip_hdr(skb)->saddr, /* XXX */
1da177e4
LT
727 arg.iov[0].iov_len, IPPROTO_TCP, 0);
728 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
9501f972
YH
729 if (oif)
730 arg.bound_dev_if = oif;
66b13d99 731 arg.tos = tos;
0a5ebb80 732 ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
7feb49c8 733 &arg, arg.iov[0].iov_len);
1da177e4 734
63231bdd 735 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
1da177e4
LT
736}
737
738static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
739{
8feaf0c0 740 struct inet_timewait_sock *tw = inet_twsk(sk);
cfb6eeb4 741 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1da177e4 742
9501f972 743 tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
7174259e 744 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
9501f972
YH
745 tcptw->tw_ts_recent,
746 tw->tw_bound_dev_if,
88ef4a5a 747 tcp_twsk_md5_key(tcptw),
66b13d99
ED
748 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
749 tw->tw_tos
9501f972 750 );
1da177e4 751
8feaf0c0 752 inet_twsk_put(tw);
1da177e4
LT
753}
754
6edafaaf 755static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
7174259e 756 struct request_sock *req)
1da177e4 757{
9501f972 758 tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
cfb6eeb4 759 tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
9501f972
YH
760 req->ts_recent,
761 0,
88ef4a5a 762 tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr),
66b13d99
ED
763 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
764 ip_hdr(skb)->tos);
1da177e4
LT
765}
766
1da177e4 767/*
9bf1d83e 768 * Send a SYN-ACK after having received a SYN.
60236fdd 769 * This still operates on a request_sock only, not on a big
1da177e4
LT
770 * socket.
771 */
72659ecc
OP
772static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
773 struct request_sock *req,
774 struct request_values *rvp)
1da177e4 775{
2e6599cb 776 const struct inet_request_sock *ireq = inet_rsk(req);
6bd023f3 777 struct flowi4 fl4;
1da177e4
LT
778 int err = -1;
779 struct sk_buff * skb;
780
781 /* First, grab a route. */
6bd023f3 782 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
fd80eb94 783 return -1;
1da177e4 784
e6b4d113 785 skb = tcp_make_synack(sk, dst, req, rvp);
1da177e4
LT
786
787 if (skb) {
419f9f89 788 __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
1da177e4 789
2e6599cb
ACM
790 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
791 ireq->rmt_addr,
792 ireq->opt);
b9df3cb8 793 err = net_xmit_eval(err);
1da177e4
LT
794 }
795
1da177e4
LT
796 dst_release(dst);
797 return err;
798}
799
72659ecc 800static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
e6b4d113 801 struct request_values *rvp)
fd80eb94 802{
72659ecc
OP
803 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
804 return tcp_v4_send_synack(sk, NULL, req, rvp);
fd80eb94
DL
805}
806
1da177e4 807/*
60236fdd 808 * IPv4 request_sock destructor.
1da177e4 809 */
60236fdd 810static void tcp_v4_reqsk_destructor(struct request_sock *req)
1da177e4 811{
a51482bd 812 kfree(inet_rsk(req)->opt);
1da177e4
LT
813}
814
946cedcc
ED
815/*
816 * Return 1 if a syncookie should be sent
817 */
818int tcp_syn_flood_action(struct sock *sk,
819 const struct sk_buff *skb,
820 const char *proto)
1da177e4 821{
946cedcc
ED
822 const char *msg = "Dropping request";
823 int want_cookie = 0;
824 struct listen_sock *lopt;
825
826
1da177e4 827
2a1d4bd4 828#ifdef CONFIG_SYN_COOKIES
946cedcc 829 if (sysctl_tcp_syncookies) {
2a1d4bd4 830 msg = "Sending cookies";
946cedcc
ED
831 want_cookie = 1;
832 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
833 } else
80e40daa 834#endif
946cedcc
ED
835 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
836
837 lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
838 if (!lopt->synflood_warned) {
839 lopt->synflood_warned = 1;
840 pr_info("%s: Possible SYN flooding on port %d. %s. "
841 " Check SNMP counters.\n",
842 proto, ntohs(tcp_hdr(skb)->dest), msg);
843 }
844 return want_cookie;
2a1d4bd4 845}
946cedcc 846EXPORT_SYMBOL(tcp_syn_flood_action);
1da177e4
LT
847
848/*
60236fdd 849 * Save and compile IPv4 options into the request_sock if needed.
1da177e4 850 */
f6d8bd05
ED
851static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk,
852 struct sk_buff *skb)
1da177e4 853{
f6d8bd05
ED
854 const struct ip_options *opt = &(IPCB(skb)->opt);
855 struct ip_options_rcu *dopt = NULL;
1da177e4
LT
856
857 if (opt && opt->optlen) {
f6d8bd05
ED
858 int opt_size = sizeof(*dopt) + opt->optlen;
859
1da177e4
LT
860 dopt = kmalloc(opt_size, GFP_ATOMIC);
861 if (dopt) {
f6d8bd05 862 if (ip_options_echo(&dopt->opt, skb)) {
1da177e4
LT
863 kfree(dopt);
864 dopt = NULL;
865 }
866 }
867 }
868 return dopt;
869}
870
cfb6eeb4
YH
871#ifdef CONFIG_TCP_MD5SIG
872/*
873 * RFC2385 MD5 checksumming requires a mapping of
874 * IP address->MD5 Key.
875 * We need to maintain these in the sk structure.
876 */
877
878/* Find the Key structure for an address. */
7174259e
ACM
879static struct tcp_md5sig_key *
880 tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
cfb6eeb4
YH
881{
882 struct tcp_sock *tp = tcp_sk(sk);
883 int i;
884
885 if (!tp->md5sig_info || !tp->md5sig_info->entries4)
886 return NULL;
887 for (i = 0; i < tp->md5sig_info->entries4; i++) {
888 if (tp->md5sig_info->keys4[i].addr == addr)
f8ab18d2 889 return &tp->md5sig_info->keys4[i].base;
cfb6eeb4
YH
890 }
891 return NULL;
892}
893
894struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
895 struct sock *addr_sk)
896{
c720c7e8 897 return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr);
cfb6eeb4 898}
cfb6eeb4
YH
899EXPORT_SYMBOL(tcp_v4_md5_lookup);
900
f5b99bcd
AB
901static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
902 struct request_sock *req)
cfb6eeb4
YH
903{
904 return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
905}
906
907/* This can be called on a newly created socket, from other files */
908int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
909 u8 *newkey, u8 newkeylen)
910{
911 /* Add Key to the list */
b0a713e9 912 struct tcp_md5sig_key *key;
cfb6eeb4
YH
913 struct tcp_sock *tp = tcp_sk(sk);
914 struct tcp4_md5sig_key *keys;
915
b0a713e9 916 key = tcp_v4_md5_do_lookup(sk, addr);
cfb6eeb4
YH
917 if (key) {
918 /* Pre-existing entry - just update that one. */
b0a713e9
MD
919 kfree(key->key);
920 key->key = newkey;
921 key->keylen = newkeylen;
cfb6eeb4 922 } else {
f6685938
ACM
923 struct tcp_md5sig_info *md5sig;
924
cfb6eeb4 925 if (!tp->md5sig_info) {
f6685938
ACM
926 tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info),
927 GFP_ATOMIC);
cfb6eeb4
YH
928 if (!tp->md5sig_info) {
929 kfree(newkey);
930 return -ENOMEM;
931 }
a465419b 932 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
cfb6eeb4 933 }
260fcbeb
YZ
934
935 md5sig = tp->md5sig_info;
936 if (md5sig->entries4 == 0 &&
937 tcp_alloc_md5sig_pool(sk) == NULL) {
cfb6eeb4
YH
938 kfree(newkey);
939 return -ENOMEM;
940 }
f6685938
ACM
941
942 if (md5sig->alloced4 == md5sig->entries4) {
943 keys = kmalloc((sizeof(*keys) *
e905a9ed 944 (md5sig->entries4 + 1)), GFP_ATOMIC);
cfb6eeb4
YH
945 if (!keys) {
946 kfree(newkey);
260fcbeb
YZ
947 if (md5sig->entries4 == 0)
948 tcp_free_md5sig_pool();
cfb6eeb4
YH
949 return -ENOMEM;
950 }
951
f6685938
ACM
952 if (md5sig->entries4)
953 memcpy(keys, md5sig->keys4,
954 sizeof(*keys) * md5sig->entries4);
cfb6eeb4
YH
955
956 /* Free old key list, and reference new one */
a80cc20d 957 kfree(md5sig->keys4);
f6685938
ACM
958 md5sig->keys4 = keys;
959 md5sig->alloced4++;
cfb6eeb4 960 }
f6685938 961 md5sig->entries4++;
f8ab18d2
DM
962 md5sig->keys4[md5sig->entries4 - 1].addr = addr;
963 md5sig->keys4[md5sig->entries4 - 1].base.key = newkey;
964 md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen;
cfb6eeb4
YH
965 }
966 return 0;
967}
cfb6eeb4
YH
968EXPORT_SYMBOL(tcp_v4_md5_do_add);
969
970static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
971 u8 *newkey, u8 newkeylen)
972{
c720c7e8 973 return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->inet_daddr,
cfb6eeb4
YH
974 newkey, newkeylen);
975}
976
977int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
978{
979 struct tcp_sock *tp = tcp_sk(sk);
980 int i;
981
982 for (i = 0; i < tp->md5sig_info->entries4; i++) {
983 if (tp->md5sig_info->keys4[i].addr == addr) {
984 /* Free the key */
f8ab18d2 985 kfree(tp->md5sig_info->keys4[i].base.key);
cfb6eeb4
YH
986 tp->md5sig_info->entries4--;
987
988 if (tp->md5sig_info->entries4 == 0) {
989 kfree(tp->md5sig_info->keys4);
990 tp->md5sig_info->keys4 = NULL;
8228a18d 991 tp->md5sig_info->alloced4 = 0;
260fcbeb 992 tcp_free_md5sig_pool();
7174259e 993 } else if (tp->md5sig_info->entries4 != i) {
cfb6eeb4 994 /* Need to do some manipulation */
354faf09
YH
995 memmove(&tp->md5sig_info->keys4[i],
996 &tp->md5sig_info->keys4[i+1],
997 (tp->md5sig_info->entries4 - i) *
998 sizeof(struct tcp4_md5sig_key));
cfb6eeb4 999 }
cfb6eeb4
YH
1000 return 0;
1001 }
1002 }
1003 return -ENOENT;
1004}
cfb6eeb4
YH
1005EXPORT_SYMBOL(tcp_v4_md5_do_del);
1006
7174259e 1007static void tcp_v4_clear_md5_list(struct sock *sk)
cfb6eeb4
YH
1008{
1009 struct tcp_sock *tp = tcp_sk(sk);
1010
1011 /* Free each key, then the set of key keys,
1012 * the crypto element, and then decrement our
1013 * hold on the last resort crypto.
1014 */
1015 if (tp->md5sig_info->entries4) {
1016 int i;
1017 for (i = 0; i < tp->md5sig_info->entries4; i++)
f8ab18d2 1018 kfree(tp->md5sig_info->keys4[i].base.key);
cfb6eeb4
YH
1019 tp->md5sig_info->entries4 = 0;
1020 tcp_free_md5sig_pool();
1021 }
1022 if (tp->md5sig_info->keys4) {
1023 kfree(tp->md5sig_info->keys4);
1024 tp->md5sig_info->keys4 = NULL;
1025 tp->md5sig_info->alloced4 = 0;
1026 }
1027}
1028
7174259e
ACM
1029static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1030 int optlen)
cfb6eeb4
YH
1031{
1032 struct tcp_md5sig cmd;
1033 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1034 u8 *newkey;
1035
1036 if (optlen < sizeof(cmd))
1037 return -EINVAL;
1038
7174259e 1039 if (copy_from_user(&cmd, optval, sizeof(cmd)))
cfb6eeb4
YH
1040 return -EFAULT;
1041
1042 if (sin->sin_family != AF_INET)
1043 return -EINVAL;
1044
1045 if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
1046 if (!tcp_sk(sk)->md5sig_info)
1047 return -ENOENT;
1048 return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
1049 }
1050
1051 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1052 return -EINVAL;
1053
1054 if (!tcp_sk(sk)->md5sig_info) {
1055 struct tcp_sock *tp = tcp_sk(sk);
aa133076 1056 struct tcp_md5sig_info *p;
cfb6eeb4 1057
aa133076 1058 p = kzalloc(sizeof(*p), sk->sk_allocation);
cfb6eeb4
YH
1059 if (!p)
1060 return -EINVAL;
1061
1062 tp->md5sig_info = p;
a465419b 1063 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
cfb6eeb4
YH
1064 }
1065
aa133076 1066 newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation);
cfb6eeb4
YH
1067 if (!newkey)
1068 return -ENOMEM;
cfb6eeb4
YH
1069 return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
1070 newkey, cmd.tcpm_keylen);
1071}
1072
49a72dfb
AL
1073static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1074 __be32 daddr, __be32 saddr, int nbytes)
cfb6eeb4 1075{
cfb6eeb4 1076 struct tcp4_pseudohdr *bp;
49a72dfb 1077 struct scatterlist sg;
cfb6eeb4
YH
1078
1079 bp = &hp->md5_blk.ip4;
cfb6eeb4
YH
1080
1081 /*
49a72dfb 1082 * 1. the TCP pseudo-header (in the order: source IP address,
cfb6eeb4
YH
1083 * destination IP address, zero-padded protocol number, and
1084 * segment length)
1085 */
1086 bp->saddr = saddr;
1087 bp->daddr = daddr;
1088 bp->pad = 0;
076fb722 1089 bp->protocol = IPPROTO_TCP;
49a72dfb 1090 bp->len = cpu_to_be16(nbytes);
c7da57a1 1091
49a72dfb
AL
1092 sg_init_one(&sg, bp, sizeof(*bp));
1093 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1094}
1095
1096static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
318cf7aa 1097 __be32 daddr, __be32 saddr, const struct tcphdr *th)
49a72dfb
AL
1098{
1099 struct tcp_md5sig_pool *hp;
1100 struct hash_desc *desc;
1101
1102 hp = tcp_get_md5sig_pool();
1103 if (!hp)
1104 goto clear_hash_noput;
1105 desc = &hp->md5_desc;
1106
1107 if (crypto_hash_init(desc))
1108 goto clear_hash;
1109 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1110 goto clear_hash;
1111 if (tcp_md5_hash_header(hp, th))
1112 goto clear_hash;
1113 if (tcp_md5_hash_key(hp, key))
1114 goto clear_hash;
1115 if (crypto_hash_final(desc, md5_hash))
cfb6eeb4
YH
1116 goto clear_hash;
1117
cfb6eeb4 1118 tcp_put_md5sig_pool();
cfb6eeb4 1119 return 0;
49a72dfb 1120
cfb6eeb4
YH
1121clear_hash:
1122 tcp_put_md5sig_pool();
1123clear_hash_noput:
1124 memset(md5_hash, 0, 16);
49a72dfb 1125 return 1;
cfb6eeb4
YH
1126}
1127
49a72dfb 1128int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
318cf7aa
ED
1129 const struct sock *sk, const struct request_sock *req,
1130 const struct sk_buff *skb)
cfb6eeb4 1131{
49a72dfb
AL
1132 struct tcp_md5sig_pool *hp;
1133 struct hash_desc *desc;
318cf7aa 1134 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
1135 __be32 saddr, daddr;
1136
1137 if (sk) {
c720c7e8
ED
1138 saddr = inet_sk(sk)->inet_saddr;
1139 daddr = inet_sk(sk)->inet_daddr;
49a72dfb
AL
1140 } else if (req) {
1141 saddr = inet_rsk(req)->loc_addr;
1142 daddr = inet_rsk(req)->rmt_addr;
cfb6eeb4 1143 } else {
49a72dfb
AL
1144 const struct iphdr *iph = ip_hdr(skb);
1145 saddr = iph->saddr;
1146 daddr = iph->daddr;
cfb6eeb4 1147 }
49a72dfb
AL
1148
1149 hp = tcp_get_md5sig_pool();
1150 if (!hp)
1151 goto clear_hash_noput;
1152 desc = &hp->md5_desc;
1153
1154 if (crypto_hash_init(desc))
1155 goto clear_hash;
1156
1157 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1158 goto clear_hash;
1159 if (tcp_md5_hash_header(hp, th))
1160 goto clear_hash;
1161 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1162 goto clear_hash;
1163 if (tcp_md5_hash_key(hp, key))
1164 goto clear_hash;
1165 if (crypto_hash_final(desc, md5_hash))
1166 goto clear_hash;
1167
1168 tcp_put_md5sig_pool();
1169 return 0;
1170
1171clear_hash:
1172 tcp_put_md5sig_pool();
1173clear_hash_noput:
1174 memset(md5_hash, 0, 16);
1175 return 1;
cfb6eeb4 1176}
49a72dfb 1177EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
cfb6eeb4 1178
318cf7aa 1179static int tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
cfb6eeb4
YH
1180{
1181 /*
1182 * This gets called for each TCP segment that arrives
1183 * so we want to be efficient.
1184 * We have 3 drop cases:
1185 * o No MD5 hash and one expected.
1186 * o MD5 hash and we're not expecting one.
1187 * o MD5 hash and its wrong.
1188 */
cf533ea5 1189 const __u8 *hash_location = NULL;
cfb6eeb4 1190 struct tcp_md5sig_key *hash_expected;
eddc9ec5 1191 const struct iphdr *iph = ip_hdr(skb);
cf533ea5 1192 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4 1193 int genhash;
cfb6eeb4
YH
1194 unsigned char newhash[16];
1195
1196 hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
7d5d5525 1197 hash_location = tcp_parse_md5sig_option(th);
cfb6eeb4 1198
cfb6eeb4
YH
1199 /* We've parsed the options - do we have a hash? */
1200 if (!hash_expected && !hash_location)
1201 return 0;
1202
1203 if (hash_expected && !hash_location) {
785957d3 1204 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
cfb6eeb4
YH
1205 return 1;
1206 }
1207
1208 if (!hash_expected && hash_location) {
785957d3 1209 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
cfb6eeb4
YH
1210 return 1;
1211 }
1212
1213 /* Okay, so this is hash_expected and hash_location -
1214 * so we need to calculate the checksum.
1215 */
49a72dfb
AL
1216 genhash = tcp_v4_md5_hash_skb(newhash,
1217 hash_expected,
1218 NULL, NULL, skb);
cfb6eeb4
YH
1219
1220 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1221 if (net_ratelimit()) {
673d57e7
HH
1222 printk(KERN_INFO "MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1223 &iph->saddr, ntohs(th->source),
1224 &iph->daddr, ntohs(th->dest),
cfb6eeb4 1225 genhash ? " tcp_v4_calc_md5_hash failed" : "");
cfb6eeb4
YH
1226 }
1227 return 1;
1228 }
1229 return 0;
1230}
1231
1232#endif
1233
72a3effa 1234struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1da177e4 1235 .family = PF_INET,
2e6599cb 1236 .obj_size = sizeof(struct tcp_request_sock),
72659ecc 1237 .rtx_syn_ack = tcp_v4_rtx_synack,
60236fdd
ACM
1238 .send_ack = tcp_v4_reqsk_send_ack,
1239 .destructor = tcp_v4_reqsk_destructor,
1da177e4 1240 .send_reset = tcp_v4_send_reset,
72659ecc 1241 .syn_ack_timeout = tcp_syn_ack_timeout,
1da177e4
LT
1242};
1243
cfb6eeb4 1244#ifdef CONFIG_TCP_MD5SIG
b2e4b3de 1245static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
cfb6eeb4 1246 .md5_lookup = tcp_v4_reqsk_md5_lookup,
e3afe7b7 1247 .calc_md5_hash = tcp_v4_md5_hash_skb,
cfb6eeb4 1248};
b6332e6c 1249#endif
cfb6eeb4 1250
1da177e4
LT
1251int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1252{
4957faad 1253 struct tcp_extend_values tmp_ext;
1da177e4 1254 struct tcp_options_received tmp_opt;
cf533ea5 1255 const u8 *hash_location;
60236fdd 1256 struct request_sock *req;
e6b4d113 1257 struct inet_request_sock *ireq;
4957faad 1258 struct tcp_sock *tp = tcp_sk(sk);
e6b4d113 1259 struct dst_entry *dst = NULL;
eddc9ec5
ACM
1260 __be32 saddr = ip_hdr(skb)->saddr;
1261 __be32 daddr = ip_hdr(skb)->daddr;
1da177e4 1262 __u32 isn = TCP_SKB_CB(skb)->when;
1da177e4 1263 int want_cookie = 0;
1da177e4
LT
1264
1265 /* Never answer to SYNs send to broadcast or multicast */
511c3f92 1266 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1da177e4
LT
1267 goto drop;
1268
1269 /* TW buckets are converted to open requests without
1270 * limitations, they conserve resources and peer is
1271 * evidently real one.
1272 */
463c84b9 1273 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
946cedcc
ED
1274 want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1275 if (!want_cookie)
1276 goto drop;
1da177e4
LT
1277 }
1278
1279 /* Accept backlog is full. If we have already queued enough
1280 * of warm entries in syn queue, drop request. It is better than
1281 * clogging syn queue with openreqs with exponentially increasing
1282 * timeout.
1283 */
463c84b9 1284 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1da177e4
LT
1285 goto drop;
1286
ce4a7d0d 1287 req = inet_reqsk_alloc(&tcp_request_sock_ops);
1da177e4
LT
1288 if (!req)
1289 goto drop;
1290
cfb6eeb4
YH
1291#ifdef CONFIG_TCP_MD5SIG
1292 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1293#endif
1294
1da177e4 1295 tcp_clear_options(&tmp_opt);
bee7ca9e 1296 tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
4957faad 1297 tmp_opt.user_mss = tp->rx_opt.user_mss;
bb5b7c11 1298 tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
4957faad
WAS
1299
1300 if (tmp_opt.cookie_plus > 0 &&
1301 tmp_opt.saw_tstamp &&
1302 !tp->rx_opt.cookie_out_never &&
1303 (sysctl_tcp_cookie_size > 0 ||
1304 (tp->cookie_values != NULL &&
1305 tp->cookie_values->cookie_desired > 0))) {
1306 u8 *c;
1307 u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1308 int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1309
1310 if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1311 goto drop_and_release;
1312
1313 /* Secret recipe starts with IP addresses */
0eae88f3
ED
1314 *mess++ ^= (__force u32)daddr;
1315 *mess++ ^= (__force u32)saddr;
1da177e4 1316
4957faad
WAS
1317 /* plus variable length Initiator Cookie */
1318 c = (u8 *)mess;
1319 while (l-- > 0)
1320 *c++ ^= *hash_location++;
1321
4957faad 1322 want_cookie = 0; /* not our kind of cookie */
4957faad
WAS
1323 tmp_ext.cookie_out_never = 0; /* false */
1324 tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1325 } else if (!tp->rx_opt.cookie_in_always) {
1326 /* redundant indications, but ensure initialization. */
1327 tmp_ext.cookie_out_never = 1; /* true */
1328 tmp_ext.cookie_plus = 0;
1329 } else {
1330 goto drop_and_release;
1331 }
1332 tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1da177e4 1333
4dfc2817 1334 if (want_cookie && !tmp_opt.saw_tstamp)
1da177e4 1335 tcp_clear_options(&tmp_opt);
1da177e4 1336
1da177e4 1337 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1da177e4
LT
1338 tcp_openreq_init(req, &tmp_opt, skb);
1339
bb5b7c11
DM
1340 ireq = inet_rsk(req);
1341 ireq->loc_addr = daddr;
1342 ireq->rmt_addr = saddr;
1343 ireq->no_srccheck = inet_sk(sk)->transparent;
1344 ireq->opt = tcp_v4_save_options(sk, skb);
1345
284904aa 1346 if (security_inet_conn_request(sk, skb, req))
bb5b7c11 1347 goto drop_and_free;
284904aa 1348
172d69e6 1349 if (!want_cookie || tmp_opt.tstamp_ok)
aa8223c7 1350 TCP_ECN_create_request(req, tcp_hdr(skb));
1da177e4
LT
1351
1352 if (want_cookie) {
1da177e4 1353 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
172d69e6 1354 req->cookie_ts = tmp_opt.tstamp_ok;
1da177e4
LT
1355 } else if (!isn) {
1356 struct inet_peer *peer = NULL;
6bd023f3 1357 struct flowi4 fl4;
1da177e4
LT
1358
1359 /* VJ's idea. We save last timestamp seen
1360 * from the destination in peer table, when entering
1361 * state TIME-WAIT, and check against it before
1362 * accepting new connection request.
1363 *
1364 * If "isn" is not zero, this request hit alive
1365 * timewait bucket, so that all the necessary checks
1366 * are made in the function processing timewait state.
1367 */
1368 if (tmp_opt.saw_tstamp &&
295ff7ed 1369 tcp_death_row.sysctl_tw_recycle &&
6bd023f3 1370 (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
ed2361e6
DM
1371 fl4.daddr == saddr &&
1372 (peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) {
317fe0e6 1373 inet_peer_refcheck(peer);
2c1409a0 1374 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1da177e4
LT
1375 (s32)(peer->tcp_ts - req->ts_recent) >
1376 TCP_PAWS_WINDOW) {
de0744af 1377 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
7cd04fa7 1378 goto drop_and_release;
1da177e4
LT
1379 }
1380 }
1381 /* Kill the following clause, if you dislike this way. */
1382 else if (!sysctl_tcp_syncookies &&
463c84b9 1383 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1da177e4
LT
1384 (sysctl_max_syn_backlog >> 2)) &&
1385 (!peer || !peer->tcp_ts_stamp) &&
1386 (!dst || !dst_metric(dst, RTAX_RTT))) {
1387 /* Without syncookies last quarter of
1388 * backlog is filled with destinations,
1389 * proven to be alive.
1390 * It means that we continue to communicate
1391 * to destinations, already remembered
1392 * to the moment of synflood.
1393 */
673d57e7
HH
1394 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n",
1395 &saddr, ntohs(tcp_hdr(skb)->source));
7cd04fa7 1396 goto drop_and_release;
1da177e4
LT
1397 }
1398
a94f723d 1399 isn = tcp_v4_init_sequence(skb);
1da177e4 1400 }
2e6599cb 1401 tcp_rsk(req)->snt_isn = isn;
9ad7c049 1402 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1da177e4 1403
72659ecc
OP
1404 if (tcp_v4_send_synack(sk, dst, req,
1405 (struct request_values *)&tmp_ext) ||
4957faad 1406 want_cookie)
1da177e4
LT
1407 goto drop_and_free;
1408
7cd04fa7 1409 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1da177e4
LT
1410 return 0;
1411
7cd04fa7
DL
1412drop_and_release:
1413 dst_release(dst);
1da177e4 1414drop_and_free:
60236fdd 1415 reqsk_free(req);
1da177e4 1416drop:
1da177e4
LT
1417 return 0;
1418}
4bc2f18b 1419EXPORT_SYMBOL(tcp_v4_conn_request);
1da177e4
LT
1420
1421
1422/*
1423 * The three way handshake has completed - we got a valid synack -
1424 * now create the new socket.
1425 */
1426struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
60236fdd 1427 struct request_sock *req,
1da177e4
LT
1428 struct dst_entry *dst)
1429{
2e6599cb 1430 struct inet_request_sock *ireq;
1da177e4
LT
1431 struct inet_sock *newinet;
1432 struct tcp_sock *newtp;
1433 struct sock *newsk;
cfb6eeb4
YH
1434#ifdef CONFIG_TCP_MD5SIG
1435 struct tcp_md5sig_key *key;
1436#endif
f6d8bd05 1437 struct ip_options_rcu *inet_opt;
1da177e4
LT
1438
1439 if (sk_acceptq_is_full(sk))
1440 goto exit_overflow;
1441
1da177e4
LT
1442 newsk = tcp_create_openreq_child(sk, req, skb);
1443 if (!newsk)
093d2823 1444 goto exit_nonewsk;
1da177e4 1445
bcd76111 1446 newsk->sk_gso_type = SKB_GSO_TCPV4;
1da177e4
LT
1447
1448 newtp = tcp_sk(newsk);
1449 newinet = inet_sk(newsk);
2e6599cb 1450 ireq = inet_rsk(req);
c720c7e8
ED
1451 newinet->inet_daddr = ireq->rmt_addr;
1452 newinet->inet_rcv_saddr = ireq->loc_addr;
1453 newinet->inet_saddr = ireq->loc_addr;
f6d8bd05
ED
1454 inet_opt = ireq->opt;
1455 rcu_assign_pointer(newinet->inet_opt, inet_opt);
2e6599cb 1456 ireq->opt = NULL;
463c84b9 1457 newinet->mc_index = inet_iif(skb);
eddc9ec5 1458 newinet->mc_ttl = ip_hdr(skb)->ttl;
d83d8461 1459 inet_csk(newsk)->icsk_ext_hdr_len = 0;
f6d8bd05
ED
1460 if (inet_opt)
1461 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
c720c7e8 1462 newinet->inet_id = newtp->write_seq ^ jiffies;
1da177e4 1463
0e734419
DM
1464 if (!dst && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL)
1465 goto put_and_exit;
1466
1467 sk_setup_caps(newsk, dst);
1468
5d424d5a 1469 tcp_mtup_init(newsk);
1da177e4 1470 tcp_sync_mss(newsk, dst_mtu(dst));
0dbaee3b 1471 newtp->advmss = dst_metric_advmss(dst);
f5fff5dc
TQ
1472 if (tcp_sk(sk)->rx_opt.user_mss &&
1473 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1474 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1475
1da177e4 1476 tcp_initialize_rcv_mss(newsk);
9ad7c049
JC
1477 if (tcp_rsk(req)->snt_synack)
1478 tcp_valid_rtt_meas(newsk,
1479 tcp_time_stamp - tcp_rsk(req)->snt_synack);
1480 newtp->total_retrans = req->retrans;
1da177e4 1481
cfb6eeb4
YH
1482#ifdef CONFIG_TCP_MD5SIG
1483 /* Copy over the MD5 key from the original socket */
c720c7e8
ED
1484 key = tcp_v4_md5_do_lookup(sk, newinet->inet_daddr);
1485 if (key != NULL) {
cfb6eeb4
YH
1486 /*
1487 * We're using one, so create a matching key
1488 * on the newsk structure. If we fail to get
1489 * memory, then we end up not copying the key
1490 * across. Shucks.
1491 */
f6685938
ACM
1492 char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1493 if (newkey != NULL)
c720c7e8 1494 tcp_v4_md5_do_add(newsk, newinet->inet_daddr,
cfb6eeb4 1495 newkey, key->keylen);
a465419b 1496 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
cfb6eeb4
YH
1497 }
1498#endif
1499
0e734419
DM
1500 if (__inet_inherit_port(sk, newsk) < 0)
1501 goto put_and_exit;
9327f705 1502 __inet_hash_nolisten(newsk, NULL);
1da177e4
LT
1503
1504 return newsk;
1505
1506exit_overflow:
de0744af 1507 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
093d2823
BS
1508exit_nonewsk:
1509 dst_release(dst);
1da177e4 1510exit:
de0744af 1511 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4 1512 return NULL;
0e734419 1513put_and_exit:
709e8697 1514 tcp_clear_xmit_timers(newsk);
d8a6e65f 1515 tcp_cleanup_congestion_control(newsk);
918eb399 1516 bh_unlock_sock(newsk);
0e734419
DM
1517 sock_put(newsk);
1518 goto exit;
1da177e4 1519}
4bc2f18b 1520EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1da177e4
LT
1521
1522static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1523{
aa8223c7 1524 struct tcphdr *th = tcp_hdr(skb);
eddc9ec5 1525 const struct iphdr *iph = ip_hdr(skb);
1da177e4 1526 struct sock *nsk;
60236fdd 1527 struct request_sock **prev;
1da177e4 1528 /* Find possible connection requests. */
463c84b9
ACM
1529 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1530 iph->saddr, iph->daddr);
1da177e4
LT
1531 if (req)
1532 return tcp_check_req(sk, skb, req, prev);
1533
3b1e0a65 1534 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
c67499c0 1535 th->source, iph->daddr, th->dest, inet_iif(skb));
1da177e4
LT
1536
1537 if (nsk) {
1538 if (nsk->sk_state != TCP_TIME_WAIT) {
1539 bh_lock_sock(nsk);
1540 return nsk;
1541 }
9469c7b4 1542 inet_twsk_put(inet_twsk(nsk));
1da177e4
LT
1543 return NULL;
1544 }
1545
1546#ifdef CONFIG_SYN_COOKIES
af9b4738 1547 if (!th->syn)
1da177e4
LT
1548 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1549#endif
1550 return sk;
1551}
1552
b51655b9 1553static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1da177e4 1554{
eddc9ec5
ACM
1555 const struct iphdr *iph = ip_hdr(skb);
1556
84fa7933 1557 if (skb->ip_summed == CHECKSUM_COMPLETE) {
eddc9ec5
ACM
1558 if (!tcp_v4_check(skb->len, iph->saddr,
1559 iph->daddr, skb->csum)) {
fb286bb2 1560 skb->ip_summed = CHECKSUM_UNNECESSARY;
1da177e4 1561 return 0;
fb286bb2 1562 }
1da177e4 1563 }
fb286bb2 1564
eddc9ec5 1565 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
fb286bb2
HX
1566 skb->len, IPPROTO_TCP, 0);
1567
1da177e4 1568 if (skb->len <= 76) {
fb286bb2 1569 return __skb_checksum_complete(skb);
1da177e4
LT
1570 }
1571 return 0;
1572}
1573
1574
1575/* The socket must have it's spinlock held when we get
1576 * here.
1577 *
1578 * We have a potential double-lock case here, so even when
1579 * doing backlog processing we use the BH locking scheme.
1580 * This is because we cannot sleep with the original spinlock
1581 * held.
1582 */
1583int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1584{
cfb6eeb4
YH
1585 struct sock *rsk;
1586#ifdef CONFIG_TCP_MD5SIG
1587 /*
1588 * We really want to reject the packet as early as possible
1589 * if:
1590 * o We're expecting an MD5'd packet and this is no MD5 tcp option
1591 * o There is an MD5 option and we're not expecting one
1592 */
7174259e 1593 if (tcp_v4_inbound_md5_hash(sk, skb))
cfb6eeb4
YH
1594 goto discard;
1595#endif
1596
1da177e4 1597 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
bdeab991 1598 sock_rps_save_rxhash(sk, skb);
aa8223c7 1599 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
cfb6eeb4 1600 rsk = sk;
1da177e4 1601 goto reset;
cfb6eeb4 1602 }
1da177e4
LT
1603 return 0;
1604 }
1605
ab6a5bb6 1606 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1da177e4
LT
1607 goto csum_err;
1608
1609 if (sk->sk_state == TCP_LISTEN) {
1610 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1611 if (!nsk)
1612 goto discard;
1613
1614 if (nsk != sk) {
bdeab991 1615 sock_rps_save_rxhash(nsk, skb);
cfb6eeb4
YH
1616 if (tcp_child_process(sk, nsk, skb)) {
1617 rsk = nsk;
1da177e4 1618 goto reset;
cfb6eeb4 1619 }
1da177e4
LT
1620 return 0;
1621 }
ca55158c 1622 } else
bdeab991 1623 sock_rps_save_rxhash(sk, skb);
ca55158c 1624
aa8223c7 1625 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
cfb6eeb4 1626 rsk = sk;
1da177e4 1627 goto reset;
cfb6eeb4 1628 }
1da177e4
LT
1629 return 0;
1630
1631reset:
cfb6eeb4 1632 tcp_v4_send_reset(rsk, skb);
1da177e4
LT
1633discard:
1634 kfree_skb(skb);
1635 /* Be careful here. If this function gets more complicated and
1636 * gcc suffers from register pressure on the x86, sk (in %ebx)
1637 * might be destroyed here. This current version compiles correctly,
1638 * but you have been warned.
1639 */
1640 return 0;
1641
1642csum_err:
63231bdd 1643 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1da177e4
LT
1644 goto discard;
1645}
4bc2f18b 1646EXPORT_SYMBOL(tcp_v4_do_rcv);
1da177e4
LT
1647
1648/*
1649 * From tcp_input.c
1650 */
1651
1652int tcp_v4_rcv(struct sk_buff *skb)
1653{
eddc9ec5 1654 const struct iphdr *iph;
cf533ea5 1655 const struct tcphdr *th;
1da177e4
LT
1656 struct sock *sk;
1657 int ret;
a86b1e30 1658 struct net *net = dev_net(skb->dev);
1da177e4
LT
1659
1660 if (skb->pkt_type != PACKET_HOST)
1661 goto discard_it;
1662
1663 /* Count it even if it's bad */
63231bdd 1664 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1da177e4
LT
1665
1666 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1667 goto discard_it;
1668
aa8223c7 1669 th = tcp_hdr(skb);
1da177e4
LT
1670
1671 if (th->doff < sizeof(struct tcphdr) / 4)
1672 goto bad_packet;
1673 if (!pskb_may_pull(skb, th->doff * 4))
1674 goto discard_it;
1675
1676 /* An explanation is required here, I think.
1677 * Packet length and doff are validated by header prediction,
caa20d9a 1678 * provided case of th->doff==0 is eliminated.
1da177e4 1679 * So, we defer the checks. */
60476372 1680 if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1da177e4
LT
1681 goto bad_packet;
1682
aa8223c7 1683 th = tcp_hdr(skb);
eddc9ec5 1684 iph = ip_hdr(skb);
1da177e4
LT
1685 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1686 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1687 skb->len - th->doff * 4);
1688 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1689 TCP_SKB_CB(skb)->when = 0;
b82d1bb4 1690 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1da177e4
LT
1691 TCP_SKB_CB(skb)->sacked = 0;
1692
9a1f27c4 1693 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1da177e4
LT
1694 if (!sk)
1695 goto no_tcp_socket;
1696
bb134d5d
ED
1697process:
1698 if (sk->sk_state == TCP_TIME_WAIT)
1699 goto do_time_wait;
1700
6cce09f8
ED
1701 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1702 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
d218d111 1703 goto discard_and_relse;
6cce09f8 1704 }
d218d111 1705
1da177e4
LT
1706 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1707 goto discard_and_relse;
b59c2701 1708 nf_reset(skb);
1da177e4 1709
fda9ef5d 1710 if (sk_filter(sk, skb))
1da177e4
LT
1711 goto discard_and_relse;
1712
1713 skb->dev = NULL;
1714
c6366184 1715 bh_lock_sock_nested(sk);
1da177e4
LT
1716 ret = 0;
1717 if (!sock_owned_by_user(sk)) {
1a2449a8
CL
1718#ifdef CONFIG_NET_DMA
1719 struct tcp_sock *tp = tcp_sk(sk);
1720 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
f67b4599 1721 tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
1a2449a8 1722 if (tp->ucopy.dma_chan)
1da177e4 1723 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8
CL
1724 else
1725#endif
1726 {
1727 if (!tcp_prequeue(sk, skb))
ae8d7f88 1728 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8 1729 }
6cce09f8 1730 } else if (unlikely(sk_add_backlog(sk, skb))) {
6b03a53a 1731 bh_unlock_sock(sk);
6cce09f8 1732 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
6b03a53a
ZY
1733 goto discard_and_relse;
1734 }
1da177e4
LT
1735 bh_unlock_sock(sk);
1736
1737 sock_put(sk);
1738
1739 return ret;
1740
1741no_tcp_socket:
1742 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1743 goto discard_it;
1744
1745 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1746bad_packet:
63231bdd 1747 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1da177e4 1748 } else {
cfb6eeb4 1749 tcp_v4_send_reset(NULL, skb);
1da177e4
LT
1750 }
1751
1752discard_it:
1753 /* Discard frame. */
1754 kfree_skb(skb);
e905a9ed 1755 return 0;
1da177e4
LT
1756
1757discard_and_relse:
1758 sock_put(sk);
1759 goto discard_it;
1760
1761do_time_wait:
1762 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
9469c7b4 1763 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1764 goto discard_it;
1765 }
1766
1767 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
63231bdd 1768 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
9469c7b4 1769 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1770 goto discard_it;
1771 }
9469c7b4 1772 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1da177e4 1773 case TCP_TW_SYN: {
c346dca1 1774 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
c67499c0 1775 &tcp_hashinfo,
eddc9ec5 1776 iph->daddr, th->dest,
463c84b9 1777 inet_iif(skb));
1da177e4 1778 if (sk2) {
9469c7b4
YH
1779 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1780 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1781 sk = sk2;
1782 goto process;
1783 }
1784 /* Fall through to ACK */
1785 }
1786 case TCP_TW_ACK:
1787 tcp_v4_timewait_ack(sk, skb);
1788 break;
1789 case TCP_TW_RST:
1790 goto no_tcp_socket;
1791 case TCP_TW_SUCCESS:;
1792 }
1793 goto discard_it;
1794}
1795
3f419d2d 1796struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it)
1da177e4 1797{
3f419d2d 1798 struct rtable *rt = (struct rtable *) __sk_dst_get(sk);
1da177e4 1799 struct inet_sock *inet = inet_sk(sk);
3f419d2d 1800 struct inet_peer *peer;
1da177e4 1801
c5216cc7
DM
1802 if (!rt ||
1803 inet->cork.fl.u.ip4.daddr != inet->inet_daddr) {
b534ecf1 1804 peer = inet_getpeer_v4(inet->inet_daddr, 1);
3f419d2d 1805 *release_it = true;
1da177e4
LT
1806 } else {
1807 if (!rt->peer)
a48eff12 1808 rt_bind_peer(rt, inet->inet_daddr, 1);
1da177e4 1809 peer = rt->peer;
3f419d2d 1810 *release_it = false;
1da177e4
LT
1811 }
1812
3f419d2d 1813 return peer;
1da177e4 1814}
3f419d2d 1815EXPORT_SYMBOL(tcp_v4_get_peer);
1da177e4 1816
ccb7c410 1817void *tcp_v4_tw_get_peer(struct sock *sk)
1da177e4 1818{
cf533ea5 1819 const struct inet_timewait_sock *tw = inet_twsk(sk);
1da177e4 1820
ccb7c410 1821 return inet_getpeer_v4(tw->tw_daddr, 1);
1da177e4 1822}
ccb7c410
DM
1823EXPORT_SYMBOL(tcp_v4_tw_get_peer);
1824
1825static struct timewait_sock_ops tcp_timewait_sock_ops = {
1826 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1827 .twsk_unique = tcp_twsk_unique,
1828 .twsk_destructor= tcp_twsk_destructor,
1829 .twsk_getpeer = tcp_v4_tw_get_peer,
1830};
1da177e4 1831
3b401a81 1832const struct inet_connection_sock_af_ops ipv4_specific = {
543d9cfe
ACM
1833 .queue_xmit = ip_queue_xmit,
1834 .send_check = tcp_v4_send_check,
1835 .rebuild_header = inet_sk_rebuild_header,
1836 .conn_request = tcp_v4_conn_request,
1837 .syn_recv_sock = tcp_v4_syn_recv_sock,
3f419d2d 1838 .get_peer = tcp_v4_get_peer,
543d9cfe
ACM
1839 .net_header_len = sizeof(struct iphdr),
1840 .setsockopt = ip_setsockopt,
1841 .getsockopt = ip_getsockopt,
1842 .addr2sockaddr = inet_csk_addr2sockaddr,
1843 .sockaddr_len = sizeof(struct sockaddr_in),
ab1e0a13 1844 .bind_conflict = inet_csk_bind_conflict,
3fdadf7d 1845#ifdef CONFIG_COMPAT
543d9cfe
ACM
1846 .compat_setsockopt = compat_ip_setsockopt,
1847 .compat_getsockopt = compat_ip_getsockopt,
3fdadf7d 1848#endif
1da177e4 1849};
4bc2f18b 1850EXPORT_SYMBOL(ipv4_specific);
1da177e4 1851
cfb6eeb4 1852#ifdef CONFIG_TCP_MD5SIG
b2e4b3de 1853static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
cfb6eeb4 1854 .md5_lookup = tcp_v4_md5_lookup,
49a72dfb 1855 .calc_md5_hash = tcp_v4_md5_hash_skb,
cfb6eeb4
YH
1856 .md5_add = tcp_v4_md5_add_func,
1857 .md5_parse = tcp_v4_parse_md5_keys,
cfb6eeb4 1858};
b6332e6c 1859#endif
cfb6eeb4 1860
1da177e4
LT
1861/* NOTE: A lot of things set to zero explicitly by call to
1862 * sk_alloc() so need not be done here.
1863 */
1864static int tcp_v4_init_sock(struct sock *sk)
1865{
6687e988 1866 struct inet_connection_sock *icsk = inet_csk(sk);
1da177e4
LT
1867 struct tcp_sock *tp = tcp_sk(sk);
1868
1869 skb_queue_head_init(&tp->out_of_order_queue);
1870 tcp_init_xmit_timers(sk);
1871 tcp_prequeue_init(tp);
1872
6687e988 1873 icsk->icsk_rto = TCP_TIMEOUT_INIT;
1da177e4
LT
1874 tp->mdev = TCP_TIMEOUT_INIT;
1875
1876 /* So many TCP implementations out there (incorrectly) count the
1877 * initial SYN frame in their delayed-ACK and congestion control
1878 * algorithms that we must have the following bandaid to talk
1879 * efficiently to them. -DaveM
1880 */
9ad7c049 1881 tp->snd_cwnd = TCP_INIT_CWND;
1da177e4
LT
1882
1883 /* See draft-stevens-tcpca-spec-01 for discussion of the
1884 * initialization of these values.
1885 */
0b6a05c1 1886 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
1da177e4 1887 tp->snd_cwnd_clamp = ~0;
bee7ca9e 1888 tp->mss_cache = TCP_MSS_DEFAULT;
1da177e4
LT
1889
1890 tp->reordering = sysctl_tcp_reordering;
6687e988 1891 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1da177e4
LT
1892
1893 sk->sk_state = TCP_CLOSE;
1894
1895 sk->sk_write_space = sk_stream_write_space;
1896 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1897
8292a17a 1898 icsk->icsk_af_ops = &ipv4_specific;
d83d8461 1899 icsk->icsk_sync_mss = tcp_sync_mss;
cfb6eeb4
YH
1900#ifdef CONFIG_TCP_MD5SIG
1901 tp->af_specific = &tcp_sock_ipv4_specific;
1902#endif
1da177e4 1903
435cf559
WAS
1904 /* TCP Cookie Transactions */
1905 if (sysctl_tcp_cookie_size > 0) {
1906 /* Default, cookies without s_data_payload. */
1907 tp->cookie_values =
1908 kzalloc(sizeof(*tp->cookie_values),
1909 sk->sk_allocation);
1910 if (tp->cookie_values != NULL)
1911 kref_init(&tp->cookie_values->kref);
1912 }
1913 /* Presumed zeroed, in order of appearance:
1914 * cookie_in_always, cookie_out_never,
1915 * s_data_constant, s_data_in, s_data_out
1916 */
1da177e4
LT
1917 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1918 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1919
eb4dea58 1920 local_bh_disable();
d1a4c0b3 1921 sock_update_memcg(sk);
180d8cd9 1922 sk_sockets_allocated_inc(sk);
eb4dea58 1923 local_bh_enable();
1da177e4
LT
1924
1925 return 0;
1926}
1927
7d06b2e0 1928void tcp_v4_destroy_sock(struct sock *sk)
1da177e4
LT
1929{
1930 struct tcp_sock *tp = tcp_sk(sk);
1931
1932 tcp_clear_xmit_timers(sk);
1933
6687e988 1934 tcp_cleanup_congestion_control(sk);
317a76f9 1935
1da177e4 1936 /* Cleanup up the write buffer. */
fe067e8a 1937 tcp_write_queue_purge(sk);
1da177e4
LT
1938
1939 /* Cleans up our, hopefully empty, out_of_order_queue. */
e905a9ed 1940 __skb_queue_purge(&tp->out_of_order_queue);
1da177e4 1941
cfb6eeb4
YH
1942#ifdef CONFIG_TCP_MD5SIG
1943 /* Clean up the MD5 key list, if any */
1944 if (tp->md5sig_info) {
1945 tcp_v4_clear_md5_list(sk);
1946 kfree(tp->md5sig_info);
1947 tp->md5sig_info = NULL;
1948 }
1949#endif
1950
1a2449a8
CL
1951#ifdef CONFIG_NET_DMA
1952 /* Cleans up our sk_async_wait_queue */
e905a9ed 1953 __skb_queue_purge(&sk->sk_async_wait_queue);
1a2449a8
CL
1954#endif
1955
1da177e4
LT
1956 /* Clean prequeue, it must be empty really */
1957 __skb_queue_purge(&tp->ucopy.prequeue);
1958
1959 /* Clean up a referenced TCP bind bucket. */
463c84b9 1960 if (inet_csk(sk)->icsk_bind_hash)
ab1e0a13 1961 inet_put_port(sk);
1da177e4
LT
1962
1963 /*
1964 * If sendmsg cached page exists, toss it.
1965 */
1966 if (sk->sk_sndmsg_page) {
1967 __free_page(sk->sk_sndmsg_page);
1968 sk->sk_sndmsg_page = NULL;
1969 }
1970
435cf559
WAS
1971 /* TCP Cookie Transactions */
1972 if (tp->cookie_values != NULL) {
1973 kref_put(&tp->cookie_values->kref,
1974 tcp_cookie_values_release);
1975 tp->cookie_values = NULL;
1976 }
1977
180d8cd9 1978 sk_sockets_allocated_dec(sk);
d1a4c0b3 1979 sock_release_memcg(sk);
1da177e4 1980}
1da177e4
LT
1981EXPORT_SYMBOL(tcp_v4_destroy_sock);
1982
1983#ifdef CONFIG_PROC_FS
1984/* Proc filesystem TCP sock list dumping. */
1985
3ab5aee7 1986static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
1da177e4 1987{
3ab5aee7 1988 return hlist_nulls_empty(head) ? NULL :
8feaf0c0 1989 list_entry(head->first, struct inet_timewait_sock, tw_node);
1da177e4
LT
1990}
1991
8feaf0c0 1992static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1da177e4 1993{
3ab5aee7
ED
1994 return !is_a_nulls(tw->tw_node.next) ?
1995 hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1da177e4
LT
1996}
1997
a8b690f9
TH
1998/*
1999 * Get next listener socket follow cur. If cur is NULL, get first socket
2000 * starting from bucket given in st->bucket; when st->bucket is zero the
2001 * very first socket in the hash table is returned.
2002 */
1da177e4
LT
2003static void *listening_get_next(struct seq_file *seq, void *cur)
2004{
463c84b9 2005 struct inet_connection_sock *icsk;
c25eb3bf 2006 struct hlist_nulls_node *node;
1da177e4 2007 struct sock *sk = cur;
5caea4ea 2008 struct inet_listen_hashbucket *ilb;
5799de0b 2009 struct tcp_iter_state *st = seq->private;
a4146b1b 2010 struct net *net = seq_file_net(seq);
1da177e4
LT
2011
2012 if (!sk) {
a8b690f9 2013 ilb = &tcp_hashinfo.listening_hash[st->bucket];
5caea4ea 2014 spin_lock_bh(&ilb->lock);
c25eb3bf 2015 sk = sk_nulls_head(&ilb->head);
a8b690f9 2016 st->offset = 0;
1da177e4
LT
2017 goto get_sk;
2018 }
5caea4ea 2019 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1da177e4 2020 ++st->num;
a8b690f9 2021 ++st->offset;
1da177e4
LT
2022
2023 if (st->state == TCP_SEQ_STATE_OPENREQ) {
60236fdd 2024 struct request_sock *req = cur;
1da177e4 2025
72a3effa 2026 icsk = inet_csk(st->syn_wait_sk);
1da177e4
LT
2027 req = req->dl_next;
2028 while (1) {
2029 while (req) {
bdccc4ca 2030 if (req->rsk_ops->family == st->family) {
1da177e4
LT
2031 cur = req;
2032 goto out;
2033 }
2034 req = req->dl_next;
2035 }
72a3effa 2036 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
1da177e4
LT
2037 break;
2038get_req:
463c84b9 2039 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
1da177e4 2040 }
1bde5ac4 2041 sk = sk_nulls_next(st->syn_wait_sk);
1da177e4 2042 st->state = TCP_SEQ_STATE_LISTENING;
463c84b9 2043 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2044 } else {
e905a9ed 2045 icsk = inet_csk(sk);
463c84b9
ACM
2046 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2047 if (reqsk_queue_len(&icsk->icsk_accept_queue))
1da177e4 2048 goto start_req;
463c84b9 2049 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1bde5ac4 2050 sk = sk_nulls_next(sk);
1da177e4
LT
2051 }
2052get_sk:
c25eb3bf 2053 sk_nulls_for_each_from(sk, node) {
8475ef9f
PE
2054 if (!net_eq(sock_net(sk), net))
2055 continue;
2056 if (sk->sk_family == st->family) {
1da177e4
LT
2057 cur = sk;
2058 goto out;
2059 }
e905a9ed 2060 icsk = inet_csk(sk);
463c84b9
ACM
2061 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2062 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
1da177e4
LT
2063start_req:
2064 st->uid = sock_i_uid(sk);
2065 st->syn_wait_sk = sk;
2066 st->state = TCP_SEQ_STATE_OPENREQ;
2067 st->sbucket = 0;
2068 goto get_req;
2069 }
463c84b9 2070 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2071 }
5caea4ea 2072 spin_unlock_bh(&ilb->lock);
a8b690f9 2073 st->offset = 0;
0f7ff927 2074 if (++st->bucket < INET_LHTABLE_SIZE) {
5caea4ea
ED
2075 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2076 spin_lock_bh(&ilb->lock);
c25eb3bf 2077 sk = sk_nulls_head(&ilb->head);
1da177e4
LT
2078 goto get_sk;
2079 }
2080 cur = NULL;
2081out:
2082 return cur;
2083}
2084
2085static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2086{
a8b690f9
TH
2087 struct tcp_iter_state *st = seq->private;
2088 void *rc;
2089
2090 st->bucket = 0;
2091 st->offset = 0;
2092 rc = listening_get_next(seq, NULL);
1da177e4
LT
2093
2094 while (rc && *pos) {
2095 rc = listening_get_next(seq, rc);
2096 --*pos;
2097 }
2098 return rc;
2099}
2100
6eac5604
AK
2101static inline int empty_bucket(struct tcp_iter_state *st)
2102{
3ab5aee7
ED
2103 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
2104 hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
6eac5604
AK
2105}
2106
a8b690f9
TH
2107/*
2108 * Get first established socket starting from bucket given in st->bucket.
2109 * If st->bucket is zero, the very first socket in the hash is returned.
2110 */
1da177e4
LT
2111static void *established_get_first(struct seq_file *seq)
2112{
5799de0b 2113 struct tcp_iter_state *st = seq->private;
a4146b1b 2114 struct net *net = seq_file_net(seq);
1da177e4
LT
2115 void *rc = NULL;
2116
a8b690f9
TH
2117 st->offset = 0;
2118 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
1da177e4 2119 struct sock *sk;
3ab5aee7 2120 struct hlist_nulls_node *node;
8feaf0c0 2121 struct inet_timewait_sock *tw;
9db66bdc 2122 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
1da177e4 2123
6eac5604
AK
2124 /* Lockless fast path for the common case of empty buckets */
2125 if (empty_bucket(st))
2126 continue;
2127
9db66bdc 2128 spin_lock_bh(lock);
3ab5aee7 2129 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
f40c8174 2130 if (sk->sk_family != st->family ||
878628fb 2131 !net_eq(sock_net(sk), net)) {
1da177e4
LT
2132 continue;
2133 }
2134 rc = sk;
2135 goto out;
2136 }
2137 st->state = TCP_SEQ_STATE_TIME_WAIT;
8feaf0c0 2138 inet_twsk_for_each(tw, node,
dbca9b27 2139 &tcp_hashinfo.ehash[st->bucket].twchain) {
28518fc1 2140 if (tw->tw_family != st->family ||
878628fb 2141 !net_eq(twsk_net(tw), net)) {
1da177e4
LT
2142 continue;
2143 }
2144 rc = tw;
2145 goto out;
2146 }
9db66bdc 2147 spin_unlock_bh(lock);
1da177e4
LT
2148 st->state = TCP_SEQ_STATE_ESTABLISHED;
2149 }
2150out:
2151 return rc;
2152}
2153
2154static void *established_get_next(struct seq_file *seq, void *cur)
2155{
2156 struct sock *sk = cur;
8feaf0c0 2157 struct inet_timewait_sock *tw;
3ab5aee7 2158 struct hlist_nulls_node *node;
5799de0b 2159 struct tcp_iter_state *st = seq->private;
a4146b1b 2160 struct net *net = seq_file_net(seq);
1da177e4
LT
2161
2162 ++st->num;
a8b690f9 2163 ++st->offset;
1da177e4
LT
2164
2165 if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2166 tw = cur;
2167 tw = tw_next(tw);
2168get_tw:
878628fb 2169 while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
1da177e4
LT
2170 tw = tw_next(tw);
2171 }
2172 if (tw) {
2173 cur = tw;
2174 goto out;
2175 }
9db66bdc 2176 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1da177e4
LT
2177 st->state = TCP_SEQ_STATE_ESTABLISHED;
2178
6eac5604 2179 /* Look for next non empty bucket */
a8b690f9 2180 st->offset = 0;
f373b53b 2181 while (++st->bucket <= tcp_hashinfo.ehash_mask &&
6eac5604
AK
2182 empty_bucket(st))
2183 ;
f373b53b 2184 if (st->bucket > tcp_hashinfo.ehash_mask)
6eac5604
AK
2185 return NULL;
2186
9db66bdc 2187 spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
3ab5aee7 2188 sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
1da177e4 2189 } else
3ab5aee7 2190 sk = sk_nulls_next(sk);
1da177e4 2191
3ab5aee7 2192 sk_nulls_for_each_from(sk, node) {
878628fb 2193 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
1da177e4
LT
2194 goto found;
2195 }
2196
2197 st->state = TCP_SEQ_STATE_TIME_WAIT;
dbca9b27 2198 tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
1da177e4
LT
2199 goto get_tw;
2200found:
2201 cur = sk;
2202out:
2203 return cur;
2204}
2205
2206static void *established_get_idx(struct seq_file *seq, loff_t pos)
2207{
a8b690f9
TH
2208 struct tcp_iter_state *st = seq->private;
2209 void *rc;
2210
2211 st->bucket = 0;
2212 rc = established_get_first(seq);
1da177e4
LT
2213
2214 while (rc && pos) {
2215 rc = established_get_next(seq, rc);
2216 --pos;
7174259e 2217 }
1da177e4
LT
2218 return rc;
2219}
2220
2221static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2222{
2223 void *rc;
5799de0b 2224 struct tcp_iter_state *st = seq->private;
1da177e4 2225
1da177e4
LT
2226 st->state = TCP_SEQ_STATE_LISTENING;
2227 rc = listening_get_idx(seq, &pos);
2228
2229 if (!rc) {
1da177e4
LT
2230 st->state = TCP_SEQ_STATE_ESTABLISHED;
2231 rc = established_get_idx(seq, pos);
2232 }
2233
2234 return rc;
2235}
2236
a8b690f9
TH
2237static void *tcp_seek_last_pos(struct seq_file *seq)
2238{
2239 struct tcp_iter_state *st = seq->private;
2240 int offset = st->offset;
2241 int orig_num = st->num;
2242 void *rc = NULL;
2243
2244 switch (st->state) {
2245 case TCP_SEQ_STATE_OPENREQ:
2246 case TCP_SEQ_STATE_LISTENING:
2247 if (st->bucket >= INET_LHTABLE_SIZE)
2248 break;
2249 st->state = TCP_SEQ_STATE_LISTENING;
2250 rc = listening_get_next(seq, NULL);
2251 while (offset-- && rc)
2252 rc = listening_get_next(seq, rc);
2253 if (rc)
2254 break;
2255 st->bucket = 0;
2256 /* Fallthrough */
2257 case TCP_SEQ_STATE_ESTABLISHED:
2258 case TCP_SEQ_STATE_TIME_WAIT:
2259 st->state = TCP_SEQ_STATE_ESTABLISHED;
2260 if (st->bucket > tcp_hashinfo.ehash_mask)
2261 break;
2262 rc = established_get_first(seq);
2263 while (offset-- && rc)
2264 rc = established_get_next(seq, rc);
2265 }
2266
2267 st->num = orig_num;
2268
2269 return rc;
2270}
2271
1da177e4
LT
2272static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2273{
5799de0b 2274 struct tcp_iter_state *st = seq->private;
a8b690f9
TH
2275 void *rc;
2276
2277 if (*pos && *pos == st->last_pos) {
2278 rc = tcp_seek_last_pos(seq);
2279 if (rc)
2280 goto out;
2281 }
2282
1da177e4
LT
2283 st->state = TCP_SEQ_STATE_LISTENING;
2284 st->num = 0;
a8b690f9
TH
2285 st->bucket = 0;
2286 st->offset = 0;
2287 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2288
2289out:
2290 st->last_pos = *pos;
2291 return rc;
1da177e4
LT
2292}
2293
2294static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2295{
a8b690f9 2296 struct tcp_iter_state *st = seq->private;
1da177e4 2297 void *rc = NULL;
1da177e4
LT
2298
2299 if (v == SEQ_START_TOKEN) {
2300 rc = tcp_get_idx(seq, 0);
2301 goto out;
2302 }
1da177e4
LT
2303
2304 switch (st->state) {
2305 case TCP_SEQ_STATE_OPENREQ:
2306 case TCP_SEQ_STATE_LISTENING:
2307 rc = listening_get_next(seq, v);
2308 if (!rc) {
1da177e4 2309 st->state = TCP_SEQ_STATE_ESTABLISHED;
a8b690f9
TH
2310 st->bucket = 0;
2311 st->offset = 0;
1da177e4
LT
2312 rc = established_get_first(seq);
2313 }
2314 break;
2315 case TCP_SEQ_STATE_ESTABLISHED:
2316 case TCP_SEQ_STATE_TIME_WAIT:
2317 rc = established_get_next(seq, v);
2318 break;
2319 }
2320out:
2321 ++*pos;
a8b690f9 2322 st->last_pos = *pos;
1da177e4
LT
2323 return rc;
2324}
2325
2326static void tcp_seq_stop(struct seq_file *seq, void *v)
2327{
5799de0b 2328 struct tcp_iter_state *st = seq->private;
1da177e4
LT
2329
2330 switch (st->state) {
2331 case TCP_SEQ_STATE_OPENREQ:
2332 if (v) {
463c84b9
ACM
2333 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2334 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4
LT
2335 }
2336 case TCP_SEQ_STATE_LISTENING:
2337 if (v != SEQ_START_TOKEN)
5caea4ea 2338 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
1da177e4
LT
2339 break;
2340 case TCP_SEQ_STATE_TIME_WAIT:
2341 case TCP_SEQ_STATE_ESTABLISHED:
2342 if (v)
9db66bdc 2343 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1da177e4
LT
2344 break;
2345 }
2346}
2347
73cb88ec 2348int tcp_seq_open(struct inode *inode, struct file *file)
1da177e4
LT
2349{
2350 struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
1da177e4 2351 struct tcp_iter_state *s;
52d6f3f1 2352 int err;
1da177e4 2353
52d6f3f1
DL
2354 err = seq_open_net(inode, file, &afinfo->seq_ops,
2355 sizeof(struct tcp_iter_state));
2356 if (err < 0)
2357 return err;
f40c8174 2358
52d6f3f1 2359 s = ((struct seq_file *)file->private_data)->private;
1da177e4 2360 s->family = afinfo->family;
a8b690f9 2361 s->last_pos = 0;
f40c8174
DL
2362 return 0;
2363}
73cb88ec 2364EXPORT_SYMBOL(tcp_seq_open);
f40c8174 2365
6f8b13bc 2366int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4
LT
2367{
2368 int rc = 0;
2369 struct proc_dir_entry *p;
2370
9427c4b3
DL
2371 afinfo->seq_ops.start = tcp_seq_start;
2372 afinfo->seq_ops.next = tcp_seq_next;
2373 afinfo->seq_ops.stop = tcp_seq_stop;
2374
84841c3c 2375 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
73cb88ec 2376 afinfo->seq_fops, afinfo);
84841c3c 2377 if (!p)
1da177e4
LT
2378 rc = -ENOMEM;
2379 return rc;
2380}
4bc2f18b 2381EXPORT_SYMBOL(tcp_proc_register);
1da177e4 2382
6f8b13bc 2383void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4 2384{
6f8b13bc 2385 proc_net_remove(net, afinfo->name);
1da177e4 2386}
4bc2f18b 2387EXPORT_SYMBOL(tcp_proc_unregister);
1da177e4 2388
cf533ea5 2389static void get_openreq4(const struct sock *sk, const struct request_sock *req,
5e659e4c 2390 struct seq_file *f, int i, int uid, int *len)
1da177e4 2391{
2e6599cb 2392 const struct inet_request_sock *ireq = inet_rsk(req);
1da177e4
LT
2393 int ttd = req->expires - jiffies;
2394
5e659e4c 2395 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
71338aa7 2396 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
1da177e4 2397 i,
2e6599cb 2398 ireq->loc_addr,
c720c7e8 2399 ntohs(inet_sk(sk)->inet_sport),
2e6599cb
ACM
2400 ireq->rmt_addr,
2401 ntohs(ireq->rmt_port),
1da177e4
LT
2402 TCP_SYN_RECV,
2403 0, 0, /* could print option size, but that is af dependent. */
2404 1, /* timers active (only the expire timer) */
2405 jiffies_to_clock_t(ttd),
2406 req->retrans,
2407 uid,
2408 0, /* non standard timer */
2409 0, /* open_requests have no inode */
2410 atomic_read(&sk->sk_refcnt),
5e659e4c
PE
2411 req,
2412 len);
1da177e4
LT
2413}
2414
5e659e4c 2415static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
1da177e4
LT
2416{
2417 int timer_active;
2418 unsigned long timer_expires;
cf533ea5 2419 const struct tcp_sock *tp = tcp_sk(sk);
cf4c6bf8 2420 const struct inet_connection_sock *icsk = inet_csk(sk);
cf533ea5 2421 const struct inet_sock *inet = inet_sk(sk);
c720c7e8
ED
2422 __be32 dest = inet->inet_daddr;
2423 __be32 src = inet->inet_rcv_saddr;
2424 __u16 destp = ntohs(inet->inet_dport);
2425 __u16 srcp = ntohs(inet->inet_sport);
49d09007 2426 int rx_queue;
1da177e4 2427
463c84b9 2428 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1da177e4 2429 timer_active = 1;
463c84b9
ACM
2430 timer_expires = icsk->icsk_timeout;
2431 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1da177e4 2432 timer_active = 4;
463c84b9 2433 timer_expires = icsk->icsk_timeout;
cf4c6bf8 2434 } else if (timer_pending(&sk->sk_timer)) {
1da177e4 2435 timer_active = 2;
cf4c6bf8 2436 timer_expires = sk->sk_timer.expires;
1da177e4
LT
2437 } else {
2438 timer_active = 0;
2439 timer_expires = jiffies;
2440 }
2441
49d09007
ED
2442 if (sk->sk_state == TCP_LISTEN)
2443 rx_queue = sk->sk_ack_backlog;
2444 else
2445 /*
2446 * because we dont lock socket, we might find a transient negative value
2447 */
2448 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2449
5e659e4c 2450 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
71338aa7 2451 "%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n",
cf4c6bf8 2452 i, src, srcp, dest, destp, sk->sk_state,
47da8ee6 2453 tp->write_seq - tp->snd_una,
49d09007 2454 rx_queue,
1da177e4
LT
2455 timer_active,
2456 jiffies_to_clock_t(timer_expires - jiffies),
463c84b9 2457 icsk->icsk_retransmits,
cf4c6bf8 2458 sock_i_uid(sk),
6687e988 2459 icsk->icsk_probes_out,
cf4c6bf8
IJ
2460 sock_i_ino(sk),
2461 atomic_read(&sk->sk_refcnt), sk,
7be87351
SH
2462 jiffies_to_clock_t(icsk->icsk_rto),
2463 jiffies_to_clock_t(icsk->icsk_ack.ato),
463c84b9 2464 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1da177e4 2465 tp->snd_cwnd,
0b6a05c1 2466 tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
5e659e4c 2467 len);
1da177e4
LT
2468}
2469
cf533ea5 2470static void get_timewait4_sock(const struct inet_timewait_sock *tw,
5e659e4c 2471 struct seq_file *f, int i, int *len)
1da177e4 2472{
23f33c2d 2473 __be32 dest, src;
1da177e4
LT
2474 __u16 destp, srcp;
2475 int ttd = tw->tw_ttd - jiffies;
2476
2477 if (ttd < 0)
2478 ttd = 0;
2479
2480 dest = tw->tw_daddr;
2481 src = tw->tw_rcv_saddr;
2482 destp = ntohs(tw->tw_dport);
2483 srcp = ntohs(tw->tw_sport);
2484
5e659e4c 2485 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
71338aa7 2486 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n",
1da177e4
LT
2487 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2488 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
5e659e4c 2489 atomic_read(&tw->tw_refcnt), tw, len);
1da177e4
LT
2490}
2491
2492#define TMPSZ 150
2493
2494static int tcp4_seq_show(struct seq_file *seq, void *v)
2495{
5799de0b 2496 struct tcp_iter_state *st;
5e659e4c 2497 int len;
1da177e4
LT
2498
2499 if (v == SEQ_START_TOKEN) {
2500 seq_printf(seq, "%-*s\n", TMPSZ - 1,
2501 " sl local_address rem_address st tx_queue "
2502 "rx_queue tr tm->when retrnsmt uid timeout "
2503 "inode");
2504 goto out;
2505 }
2506 st = seq->private;
2507
2508 switch (st->state) {
2509 case TCP_SEQ_STATE_LISTENING:
2510 case TCP_SEQ_STATE_ESTABLISHED:
5e659e4c 2511 get_tcp4_sock(v, seq, st->num, &len);
1da177e4
LT
2512 break;
2513 case TCP_SEQ_STATE_OPENREQ:
5e659e4c 2514 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
1da177e4
LT
2515 break;
2516 case TCP_SEQ_STATE_TIME_WAIT:
5e659e4c 2517 get_timewait4_sock(v, seq, st->num, &len);
1da177e4
LT
2518 break;
2519 }
5e659e4c 2520 seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
1da177e4
LT
2521out:
2522 return 0;
2523}
2524
73cb88ec
AV
2525static const struct file_operations tcp_afinfo_seq_fops = {
2526 .owner = THIS_MODULE,
2527 .open = tcp_seq_open,
2528 .read = seq_read,
2529 .llseek = seq_lseek,
2530 .release = seq_release_net
2531};
2532
1da177e4 2533static struct tcp_seq_afinfo tcp4_seq_afinfo = {
1da177e4
LT
2534 .name = "tcp",
2535 .family = AF_INET,
73cb88ec 2536 .seq_fops = &tcp_afinfo_seq_fops,
9427c4b3
DL
2537 .seq_ops = {
2538 .show = tcp4_seq_show,
2539 },
1da177e4
LT
2540};
2541
2c8c1e72 2542static int __net_init tcp4_proc_init_net(struct net *net)
757764f6
PE
2543{
2544 return tcp_proc_register(net, &tcp4_seq_afinfo);
2545}
2546
2c8c1e72 2547static void __net_exit tcp4_proc_exit_net(struct net *net)
757764f6
PE
2548{
2549 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2550}
2551
2552static struct pernet_operations tcp4_net_ops = {
2553 .init = tcp4_proc_init_net,
2554 .exit = tcp4_proc_exit_net,
2555};
2556
1da177e4
LT
2557int __init tcp4_proc_init(void)
2558{
757764f6 2559 return register_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2560}
2561
2562void tcp4_proc_exit(void)
2563{
757764f6 2564 unregister_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2565}
2566#endif /* CONFIG_PROC_FS */
2567
bf296b12
HX
2568struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2569{
b71d1d42 2570 const struct iphdr *iph = skb_gro_network_header(skb);
bf296b12
HX
2571
2572 switch (skb->ip_summed) {
2573 case CHECKSUM_COMPLETE:
86911732 2574 if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
bf296b12
HX
2575 skb->csum)) {
2576 skb->ip_summed = CHECKSUM_UNNECESSARY;
2577 break;
2578 }
2579
2580 /* fall through */
2581 case CHECKSUM_NONE:
2582 NAPI_GRO_CB(skb)->flush = 1;
2583 return NULL;
2584 }
2585
2586 return tcp_gro_receive(head, skb);
2587}
bf296b12
HX
2588
2589int tcp4_gro_complete(struct sk_buff *skb)
2590{
b71d1d42 2591 const struct iphdr *iph = ip_hdr(skb);
bf296b12
HX
2592 struct tcphdr *th = tcp_hdr(skb);
2593
2594 th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2595 iph->saddr, iph->daddr, 0);
2596 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2597
2598 return tcp_gro_complete(skb);
2599}
bf296b12 2600
1da177e4
LT
2601struct proto tcp_prot = {
2602 .name = "TCP",
2603 .owner = THIS_MODULE,
2604 .close = tcp_close,
2605 .connect = tcp_v4_connect,
2606 .disconnect = tcp_disconnect,
463c84b9 2607 .accept = inet_csk_accept,
1da177e4
LT
2608 .ioctl = tcp_ioctl,
2609 .init = tcp_v4_init_sock,
2610 .destroy = tcp_v4_destroy_sock,
2611 .shutdown = tcp_shutdown,
2612 .setsockopt = tcp_setsockopt,
2613 .getsockopt = tcp_getsockopt,
1da177e4 2614 .recvmsg = tcp_recvmsg,
7ba42910
CG
2615 .sendmsg = tcp_sendmsg,
2616 .sendpage = tcp_sendpage,
1da177e4 2617 .backlog_rcv = tcp_v4_do_rcv,
ab1e0a13
ACM
2618 .hash = inet_hash,
2619 .unhash = inet_unhash,
2620 .get_port = inet_csk_get_port,
1da177e4
LT
2621 .enter_memory_pressure = tcp_enter_memory_pressure,
2622 .sockets_allocated = &tcp_sockets_allocated,
0a5578cf 2623 .orphan_count = &tcp_orphan_count,
1da177e4
LT
2624 .memory_allocated = &tcp_memory_allocated,
2625 .memory_pressure = &tcp_memory_pressure,
2626 .sysctl_mem = sysctl_tcp_mem,
2627 .sysctl_wmem = sysctl_tcp_wmem,
2628 .sysctl_rmem = sysctl_tcp_rmem,
2629 .max_header = MAX_TCP_HEADER,
2630 .obj_size = sizeof(struct tcp_sock),
3ab5aee7 2631 .slab_flags = SLAB_DESTROY_BY_RCU,
6d6ee43e 2632 .twsk_prot = &tcp_timewait_sock_ops,
60236fdd 2633 .rsk_prot = &tcp_request_sock_ops,
39d8cda7 2634 .h.hashinfo = &tcp_hashinfo,
7ba42910 2635 .no_autobind = true,
543d9cfe
ACM
2636#ifdef CONFIG_COMPAT
2637 .compat_setsockopt = compat_tcp_setsockopt,
2638 .compat_getsockopt = compat_tcp_getsockopt,
2639#endif
d1a4c0b3
GC
2640#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
2641 .init_cgroup = tcp_init_cgroup,
2642 .destroy_cgroup = tcp_destroy_cgroup,
2643 .proto_cgroup = tcp_proto_cgroup,
2644#endif
1da177e4 2645};
4bc2f18b 2646EXPORT_SYMBOL(tcp_prot);
1da177e4 2647
046ee902
DL
2648static int __net_init tcp_sk_init(struct net *net)
2649{
2650 return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2651 PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2652}
2653
2654static void __net_exit tcp_sk_exit(struct net *net)
2655{
2656 inet_ctl_sock_destroy(net->ipv4.tcp_sock);
b099ce26
EB
2657}
2658
2659static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2660{
2661 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
046ee902
DL
2662}
2663
2664static struct pernet_operations __net_initdata tcp_sk_ops = {
b099ce26
EB
2665 .init = tcp_sk_init,
2666 .exit = tcp_sk_exit,
2667 .exit_batch = tcp_sk_exit_batch,
046ee902
DL
2668};
2669
9b0f976f 2670void __init tcp_v4_init(void)
1da177e4 2671{
5caea4ea 2672 inet_hashinfo_init(&tcp_hashinfo);
6a1b3054 2673 if (register_pernet_subsys(&tcp_sk_ops))
1da177e4 2674 panic("Failed to create the TCP control socket.\n");
1da177e4 2675}
This page took 1.148689 seconds and 5 git commands to generate.