tcp: move fastopen functions to tcp_fastopen.c
[deliverable/linux.git] / net / ipv4 / tcp_ipv4.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
1da177e4
LT
8 * IPv4 specific functions
9 *
10 *
11 * code split from:
12 * linux/ipv4/tcp.c
13 * linux/ipv4/tcp_input.c
14 * linux/ipv4/tcp_output.c
15 *
16 * See tcp.c for author information
17 *
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation; either version
21 * 2 of the License, or (at your option) any later version.
22 */
23
24/*
25 * Changes:
26 * David S. Miller : New socket lookup architecture.
27 * This code is dedicated to John Dyson.
28 * David S. Miller : Change semantics of established hash,
29 * half is devoted to TIME_WAIT sockets
30 * and the rest go in the other half.
31 * Andi Kleen : Add support for syncookies and fixed
32 * some bugs: ip options weren't passed to
33 * the TCP layer, missed a check for an
34 * ACK bit.
35 * Andi Kleen : Implemented fast path mtu discovery.
36 * Fixed many serious bugs in the
60236fdd 37 * request_sock handling and moved
1da177e4
LT
38 * most of it into the af independent code.
39 * Added tail drop and some other bugfixes.
caa20d9a 40 * Added new listen semantics.
1da177e4
LT
41 * Mike McLagan : Routing by source
42 * Juan Jose Ciarlante: ip_dynaddr bits
43 * Andi Kleen: various fixes.
44 * Vitaly E. Lavrov : Transparent proxy revived after year
45 * coma.
46 * Andi Kleen : Fix new listen.
47 * Andi Kleen : Fix accept error reporting.
48 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
49 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
50 * a single port at the same time.
51 */
52
afd46503 53#define pr_fmt(fmt) "TCP: " fmt
1da177e4 54
eb4dea58 55#include <linux/bottom_half.h>
1da177e4
LT
56#include <linux/types.h>
57#include <linux/fcntl.h>
58#include <linux/module.h>
59#include <linux/random.h>
60#include <linux/cache.h>
61#include <linux/jhash.h>
62#include <linux/init.h>
63#include <linux/times.h>
5a0e3ad6 64#include <linux/slab.h>
1da177e4 65
457c4cbc 66#include <net/net_namespace.h>
1da177e4 67#include <net/icmp.h>
304a1618 68#include <net/inet_hashtables.h>
1da177e4 69#include <net/tcp.h>
20380731 70#include <net/transp_v6.h>
1da177e4
LT
71#include <net/ipv6.h>
72#include <net/inet_common.h>
6d6ee43e 73#include <net/timewait_sock.h>
1da177e4 74#include <net/xfrm.h>
1a2449a8 75#include <net/netdma.h>
6e5714ea 76#include <net/secure_seq.h>
d1a4c0b3 77#include <net/tcp_memcontrol.h>
076bb0c8 78#include <net/busy_poll.h>
1da177e4
LT
79
80#include <linux/inet.h>
81#include <linux/ipv6.h>
82#include <linux/stddef.h>
83#include <linux/proc_fs.h>
84#include <linux/seq_file.h>
85
cfb6eeb4
YH
86#include <linux/crypto.h>
87#include <linux/scatterlist.h>
88
ab32ea5d
BH
89int sysctl_tcp_tw_reuse __read_mostly;
90int sysctl_tcp_low_latency __read_mostly;
4bc2f18b 91EXPORT_SYMBOL(sysctl_tcp_low_latency);
1da177e4 92
1da177e4 93
cfb6eeb4 94#ifdef CONFIG_TCP_MD5SIG
a915da9b 95static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
318cf7aa 96 __be32 daddr, __be32 saddr, const struct tcphdr *th);
cfb6eeb4
YH
97#endif
98
5caea4ea 99struct inet_hashinfo tcp_hashinfo;
4bc2f18b 100EXPORT_SYMBOL(tcp_hashinfo);
1da177e4 101
cf533ea5 102static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
1da177e4 103{
eddc9ec5
ACM
104 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
105 ip_hdr(skb)->saddr,
aa8223c7
ACM
106 tcp_hdr(skb)->dest,
107 tcp_hdr(skb)->source);
1da177e4
LT
108}
109
6d6ee43e
ACM
110int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
111{
112 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
113 struct tcp_sock *tp = tcp_sk(sk);
114
115 /* With PAWS, it is safe from the viewpoint
116 of data integrity. Even without PAWS it is safe provided sequence
117 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
118
119 Actually, the idea is close to VJ's one, only timestamp cache is
120 held not per host, but per port pair and TW bucket is used as state
121 holder.
122
123 If TW bucket has been already destroyed we fall back to VJ's scheme
124 and use initial timestamp retrieved from peer table.
125 */
126 if (tcptw->tw_ts_recent_stamp &&
127 (twp == NULL || (sysctl_tcp_tw_reuse &&
9d729f72 128 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
6d6ee43e
ACM
129 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
130 if (tp->write_seq == 0)
131 tp->write_seq = 1;
132 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
133 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
134 sock_hold(sktw);
135 return 1;
136 }
137
138 return 0;
139}
6d6ee43e
ACM
140EXPORT_SYMBOL_GPL(tcp_twsk_unique);
141
1da177e4
LT
142/* This will initiate an outgoing connection. */
143int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
144{
2d7192d6 145 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
1da177e4
LT
146 struct inet_sock *inet = inet_sk(sk);
147 struct tcp_sock *tp = tcp_sk(sk);
dca8b089 148 __be16 orig_sport, orig_dport;
bada8adc 149 __be32 daddr, nexthop;
da905bd1 150 struct flowi4 *fl4;
2d7192d6 151 struct rtable *rt;
1da177e4 152 int err;
f6d8bd05 153 struct ip_options_rcu *inet_opt;
1da177e4
LT
154
155 if (addr_len < sizeof(struct sockaddr_in))
156 return -EINVAL;
157
158 if (usin->sin_family != AF_INET)
159 return -EAFNOSUPPORT;
160
161 nexthop = daddr = usin->sin_addr.s_addr;
f6d8bd05
ED
162 inet_opt = rcu_dereference_protected(inet->inet_opt,
163 sock_owned_by_user(sk));
164 if (inet_opt && inet_opt->opt.srr) {
1da177e4
LT
165 if (!daddr)
166 return -EINVAL;
f6d8bd05 167 nexthop = inet_opt->opt.faddr;
1da177e4
LT
168 }
169
dca8b089
DM
170 orig_sport = inet->inet_sport;
171 orig_dport = usin->sin_port;
da905bd1
DM
172 fl4 = &inet->cork.fl.u.ip4;
173 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
b23dd4fe
DM
174 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
175 IPPROTO_TCP,
0e0d44ab 176 orig_sport, orig_dport, sk);
b23dd4fe
DM
177 if (IS_ERR(rt)) {
178 err = PTR_ERR(rt);
179 if (err == -ENETUNREACH)
f1d8cba6 180 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
b23dd4fe 181 return err;
584bdf8c 182 }
1da177e4
LT
183
184 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
185 ip_rt_put(rt);
186 return -ENETUNREACH;
187 }
188
f6d8bd05 189 if (!inet_opt || !inet_opt->opt.srr)
da905bd1 190 daddr = fl4->daddr;
1da177e4 191
c720c7e8 192 if (!inet->inet_saddr)
da905bd1 193 inet->inet_saddr = fl4->saddr;
c720c7e8 194 inet->inet_rcv_saddr = inet->inet_saddr;
1da177e4 195
c720c7e8 196 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
1da177e4
LT
197 /* Reset inherited state */
198 tp->rx_opt.ts_recent = 0;
199 tp->rx_opt.ts_recent_stamp = 0;
ee995283
PE
200 if (likely(!tp->repair))
201 tp->write_seq = 0;
1da177e4
LT
202 }
203
295ff7ed 204 if (tcp_death_row.sysctl_tw_recycle &&
81166dd6
DM
205 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
206 tcp_fetch_timewait_stamp(sk, &rt->dst);
1da177e4 207
c720c7e8
ED
208 inet->inet_dport = usin->sin_port;
209 inet->inet_daddr = daddr;
1da177e4 210
d83d8461 211 inet_csk(sk)->icsk_ext_hdr_len = 0;
f6d8bd05
ED
212 if (inet_opt)
213 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1da177e4 214
bee7ca9e 215 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
1da177e4
LT
216
217 /* Socket identity is still unknown (sport may be zero).
218 * However we set state to SYN-SENT and not releasing socket
219 * lock select source port, enter ourselves into the hash tables and
220 * complete initialization after this.
221 */
222 tcp_set_state(sk, TCP_SYN_SENT);
a7f5e7f1 223 err = inet_hash_connect(&tcp_death_row, sk);
1da177e4
LT
224 if (err)
225 goto failure;
226
da905bd1 227 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
b23dd4fe
DM
228 inet->inet_sport, inet->inet_dport, sk);
229 if (IS_ERR(rt)) {
230 err = PTR_ERR(rt);
231 rt = NULL;
1da177e4 232 goto failure;
b23dd4fe 233 }
1da177e4 234 /* OK, now commit destination to socket. */
bcd76111 235 sk->sk_gso_type = SKB_GSO_TCPV4;
d8d1f30b 236 sk_setup_caps(sk, &rt->dst);
1da177e4 237
ee995283 238 if (!tp->write_seq && likely(!tp->repair))
c720c7e8
ED
239 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
240 inet->inet_daddr,
241 inet->inet_sport,
1da177e4
LT
242 usin->sin_port);
243
c720c7e8 244 inet->inet_id = tp->write_seq ^ jiffies;
1da177e4 245
2b916477 246 err = tcp_connect(sk);
ee995283 247
1da177e4
LT
248 rt = NULL;
249 if (err)
250 goto failure;
251
252 return 0;
253
254failure:
7174259e
ACM
255 /*
256 * This unhashes the socket and releases the local port,
257 * if necessary.
258 */
1da177e4
LT
259 tcp_set_state(sk, TCP_CLOSE);
260 ip_rt_put(rt);
261 sk->sk_route_caps = 0;
c720c7e8 262 inet->inet_dport = 0;
1da177e4
LT
263 return err;
264}
4bc2f18b 265EXPORT_SYMBOL(tcp_v4_connect);
1da177e4 266
1da177e4 267/*
563d34d0
ED
268 * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
269 * It can be called through tcp_release_cb() if socket was owned by user
270 * at the time tcp_v4_err() was called to handle ICMP message.
1da177e4 271 */
563d34d0 272static void tcp_v4_mtu_reduced(struct sock *sk)
1da177e4
LT
273{
274 struct dst_entry *dst;
275 struct inet_sock *inet = inet_sk(sk);
563d34d0 276 u32 mtu = tcp_sk(sk)->mtu_info;
1da177e4 277
80d0a69f
DM
278 dst = inet_csk_update_pmtu(sk, mtu);
279 if (!dst)
1da177e4
LT
280 return;
281
1da177e4
LT
282 /* Something is about to be wrong... Remember soft error
283 * for the case, if this connection will not able to recover.
284 */
285 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
286 sk->sk_err_soft = EMSGSIZE;
287
288 mtu = dst_mtu(dst);
289
290 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
482fc609 291 ip_sk_accept_pmtu(sk) &&
d83d8461 292 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
1da177e4
LT
293 tcp_sync_mss(sk, mtu);
294
295 /* Resend the TCP packet because it's
296 * clear that the old packet has been
297 * dropped. This is the new "fast" path mtu
298 * discovery.
299 */
300 tcp_simple_retransmit(sk);
301 } /* else let the usual retransmit timer handle it */
302}
303
55be7a9c
DM
304static void do_redirect(struct sk_buff *skb, struct sock *sk)
305{
306 struct dst_entry *dst = __sk_dst_check(sk, 0);
307
1ed5c48f 308 if (dst)
6700c270 309 dst->ops->redirect(dst, sk, skb);
55be7a9c
DM
310}
311
1da177e4
LT
312/*
313 * This routine is called by the ICMP module when it gets some
314 * sort of error condition. If err < 0 then the socket should
315 * be closed and the error returned to the user. If err > 0
316 * it's just the icmp type << 8 | icmp code. After adjustment
317 * header points to the first 8 bytes of the tcp header. We need
318 * to find the appropriate port.
319 *
320 * The locking strategy used here is very "optimistic". When
321 * someone else accesses the socket the ICMP is just dropped
322 * and for some paths there is no check at all.
323 * A more general error queue to queue errors for later handling
324 * is probably better.
325 *
326 */
327
4d1a2d9e 328void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
1da177e4 329{
b71d1d42 330 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
4d1a2d9e 331 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
f1ecd5d9 332 struct inet_connection_sock *icsk;
1da177e4
LT
333 struct tcp_sock *tp;
334 struct inet_sock *inet;
4d1a2d9e
DL
335 const int type = icmp_hdr(icmp_skb)->type;
336 const int code = icmp_hdr(icmp_skb)->code;
1da177e4 337 struct sock *sk;
f1ecd5d9 338 struct sk_buff *skb;
168a8f58 339 struct request_sock *req;
1da177e4 340 __u32 seq;
f1ecd5d9 341 __u32 remaining;
1da177e4 342 int err;
4d1a2d9e 343 struct net *net = dev_net(icmp_skb->dev);
1da177e4 344
4d1a2d9e 345 if (icmp_skb->len < (iph->ihl << 2) + 8) {
dcfc23ca 346 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
347 return;
348 }
349
fd54d716 350 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
4d1a2d9e 351 iph->saddr, th->source, inet_iif(icmp_skb));
1da177e4 352 if (!sk) {
dcfc23ca 353 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
354 return;
355 }
356 if (sk->sk_state == TCP_TIME_WAIT) {
9469c7b4 357 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
358 return;
359 }
360
361 bh_lock_sock(sk);
362 /* If too many ICMPs get dropped on busy
363 * servers this needs to be solved differently.
563d34d0
ED
364 * We do take care of PMTU discovery (RFC1191) special case :
365 * we can receive locally generated ICMP messages while socket is held.
1da177e4 366 */
b74aa930
ED
367 if (sock_owned_by_user(sk)) {
368 if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
369 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
370 }
1da177e4
LT
371 if (sk->sk_state == TCP_CLOSE)
372 goto out;
373
97e3ecd1 374 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
375 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
376 goto out;
377 }
378
f1ecd5d9 379 icsk = inet_csk(sk);
1da177e4 380 tp = tcp_sk(sk);
168a8f58 381 req = tp->fastopen_rsk;
1da177e4
LT
382 seq = ntohl(th->seq);
383 if (sk->sk_state != TCP_LISTEN &&
168a8f58
JC
384 !between(seq, tp->snd_una, tp->snd_nxt) &&
385 (req == NULL || seq != tcp_rsk(req)->snt_isn)) {
386 /* For a Fast Open socket, allow seq to be snt_isn. */
de0744af 387 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
388 goto out;
389 }
390
391 switch (type) {
55be7a9c
DM
392 case ICMP_REDIRECT:
393 do_redirect(icmp_skb, sk);
394 goto out;
1da177e4
LT
395 case ICMP_SOURCE_QUENCH:
396 /* Just silently ignore these. */
397 goto out;
398 case ICMP_PARAMETERPROB:
399 err = EPROTO;
400 break;
401 case ICMP_DEST_UNREACH:
402 if (code > NR_ICMP_UNREACH)
403 goto out;
404
405 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
0d4f0608
ED
406 /* We are not interested in TCP_LISTEN and open_requests
407 * (SYN-ACKs send out by Linux are always <576bytes so
408 * they should go through unfragmented).
409 */
410 if (sk->sk_state == TCP_LISTEN)
411 goto out;
412
563d34d0 413 tp->mtu_info = info;
144d56e9 414 if (!sock_owned_by_user(sk)) {
563d34d0 415 tcp_v4_mtu_reduced(sk);
144d56e9
ED
416 } else {
417 if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
418 sock_hold(sk);
419 }
1da177e4
LT
420 goto out;
421 }
422
423 err = icmp_err_convert[code].errno;
f1ecd5d9
DL
424 /* check if icmp_skb allows revert of backoff
425 * (see draft-zimmermann-tcp-lcd) */
426 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
427 break;
428 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
429 !icsk->icsk_backoff)
430 break;
431
168a8f58
JC
432 /* XXX (TFO) - revisit the following logic for TFO */
433
8f49c270
DM
434 if (sock_owned_by_user(sk))
435 break;
436
f1ecd5d9 437 icsk->icsk_backoff--;
740b0f18 438 inet_csk(sk)->icsk_rto = (tp->srtt_us ? __tcp_set_rto(tp) :
9ad7c049 439 TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
f1ecd5d9
DL
440 tcp_bound_rto(sk);
441
442 skb = tcp_write_queue_head(sk);
443 BUG_ON(!skb);
444
445 remaining = icsk->icsk_rto - min(icsk->icsk_rto,
446 tcp_time_stamp - TCP_SKB_CB(skb)->when);
447
448 if (remaining) {
449 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
450 remaining, TCP_RTO_MAX);
f1ecd5d9
DL
451 } else {
452 /* RTO revert clocked out retransmission.
453 * Will retransmit now */
454 tcp_retransmit_timer(sk);
455 }
456
1da177e4
LT
457 break;
458 case ICMP_TIME_EXCEEDED:
459 err = EHOSTUNREACH;
460 break;
461 default:
462 goto out;
463 }
464
168a8f58
JC
465 /* XXX (TFO) - if it's a TFO socket and has been accepted, rather
466 * than following the TCP_SYN_RECV case and closing the socket,
467 * we ignore the ICMP error and keep trying like a fully established
468 * socket. Is this the right thing to do?
469 */
470 if (req && req->sk == NULL)
471 goto out;
472
1da177e4 473 switch (sk->sk_state) {
60236fdd 474 struct request_sock *req, **prev;
1da177e4
LT
475 case TCP_LISTEN:
476 if (sock_owned_by_user(sk))
477 goto out;
478
463c84b9
ACM
479 req = inet_csk_search_req(sk, &prev, th->dest,
480 iph->daddr, iph->saddr);
1da177e4
LT
481 if (!req)
482 goto out;
483
484 /* ICMPs are not backlogged, hence we cannot get
485 an established socket here.
486 */
547b792c 487 WARN_ON(req->sk);
1da177e4 488
2e6599cb 489 if (seq != tcp_rsk(req)->snt_isn) {
de0744af 490 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
491 goto out;
492 }
493
494 /*
495 * Still in SYN_RECV, just remove it silently.
496 * There is no good way to pass the error to the newly
497 * created socket, and POSIX does not want network
498 * errors returned from accept().
499 */
463c84b9 500 inet_csk_reqsk_queue_drop(sk, req, prev);
848bf15f 501 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4
LT
502 goto out;
503
504 case TCP_SYN_SENT:
505 case TCP_SYN_RECV: /* Cannot happen.
168a8f58
JC
506 It can f.e. if SYNs crossed,
507 or Fast Open.
1da177e4
LT
508 */
509 if (!sock_owned_by_user(sk)) {
1da177e4
LT
510 sk->sk_err = err;
511
512 sk->sk_error_report(sk);
513
514 tcp_done(sk);
515 } else {
516 sk->sk_err_soft = err;
517 }
518 goto out;
519 }
520
521 /* If we've already connected we will keep trying
522 * until we time out, or the user gives up.
523 *
524 * rfc1122 4.2.3.9 allows to consider as hard errors
525 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
526 * but it is obsoleted by pmtu discovery).
527 *
528 * Note, that in modern internet, where routing is unreliable
529 * and in each dark corner broken firewalls sit, sending random
530 * errors ordered by their masters even this two messages finally lose
531 * their original sense (even Linux sends invalid PORT_UNREACHs)
532 *
533 * Now we are in compliance with RFCs.
534 * --ANK (980905)
535 */
536
537 inet = inet_sk(sk);
538 if (!sock_owned_by_user(sk) && inet->recverr) {
539 sk->sk_err = err;
540 sk->sk_error_report(sk);
541 } else { /* Only an error on timeout */
542 sk->sk_err_soft = err;
543 }
544
545out:
546 bh_unlock_sock(sk);
547 sock_put(sk);
548}
549
28850dc7 550void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1da177e4 551{
aa8223c7 552 struct tcphdr *th = tcp_hdr(skb);
1da177e4 553
84fa7933 554 if (skb->ip_summed == CHECKSUM_PARTIAL) {
419f9f89 555 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
663ead3b 556 skb->csum_start = skb_transport_header(skb) - skb->head;
ff1dcadb 557 skb->csum_offset = offsetof(struct tcphdr, check);
1da177e4 558 } else {
419f9f89 559 th->check = tcp_v4_check(skb->len, saddr, daddr,
07f0757a 560 csum_partial(th,
1da177e4
LT
561 th->doff << 2,
562 skb->csum));
563 }
564}
565
419f9f89 566/* This routine computes an IPv4 TCP checksum. */
bb296246 567void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
419f9f89 568{
cf533ea5 569 const struct inet_sock *inet = inet_sk(sk);
419f9f89
HX
570
571 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
572}
4bc2f18b 573EXPORT_SYMBOL(tcp_v4_send_check);
419f9f89 574
1da177e4
LT
575/*
576 * This routine will send an RST to the other tcp.
577 *
578 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
579 * for reset.
580 * Answer: if a packet caused RST, it is not for a socket
581 * existing in our system, if it is matched to a socket,
582 * it is just duplicate segment or bug in other side's TCP.
583 * So that we build reply only basing on parameters
584 * arrived with segment.
585 * Exception: precedence violation. We do not implement it in any case.
586 */
587
cfb6eeb4 588static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
1da177e4 589{
cf533ea5 590 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
591 struct {
592 struct tcphdr th;
593#ifdef CONFIG_TCP_MD5SIG
714e85be 594 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
cfb6eeb4
YH
595#endif
596 } rep;
1da177e4 597 struct ip_reply_arg arg;
cfb6eeb4
YH
598#ifdef CONFIG_TCP_MD5SIG
599 struct tcp_md5sig_key *key;
658ddaaf
SL
600 const __u8 *hash_location = NULL;
601 unsigned char newhash[16];
602 int genhash;
603 struct sock *sk1 = NULL;
cfb6eeb4 604#endif
a86b1e30 605 struct net *net;
1da177e4
LT
606
607 /* Never send a reset in response to a reset. */
608 if (th->rst)
609 return;
610
511c3f92 611 if (skb_rtable(skb)->rt_type != RTN_LOCAL)
1da177e4
LT
612 return;
613
614 /* Swap the send and the receive. */
cfb6eeb4
YH
615 memset(&rep, 0, sizeof(rep));
616 rep.th.dest = th->source;
617 rep.th.source = th->dest;
618 rep.th.doff = sizeof(struct tcphdr) / 4;
619 rep.th.rst = 1;
1da177e4
LT
620
621 if (th->ack) {
cfb6eeb4 622 rep.th.seq = th->ack_seq;
1da177e4 623 } else {
cfb6eeb4
YH
624 rep.th.ack = 1;
625 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
626 skb->len - (th->doff << 2));
1da177e4
LT
627 }
628
7174259e 629 memset(&arg, 0, sizeof(arg));
cfb6eeb4
YH
630 arg.iov[0].iov_base = (unsigned char *)&rep;
631 arg.iov[0].iov_len = sizeof(rep.th);
632
633#ifdef CONFIG_TCP_MD5SIG
658ddaaf
SL
634 hash_location = tcp_parse_md5sig_option(th);
635 if (!sk && hash_location) {
636 /*
637 * active side is lost. Try to find listening socket through
638 * source port, and then find md5 key through listening socket.
639 * we are not loose security here:
640 * Incoming packet is checked with md5 hash with finding key,
641 * no RST generated if md5 hash doesn't match.
642 */
643 sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
da5e3630
TH
644 &tcp_hashinfo, ip_hdr(skb)->saddr,
645 th->source, ip_hdr(skb)->daddr,
658ddaaf
SL
646 ntohs(th->source), inet_iif(skb));
647 /* don't send rst if it can't find key */
648 if (!sk1)
649 return;
650 rcu_read_lock();
651 key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
652 &ip_hdr(skb)->saddr, AF_INET);
653 if (!key)
654 goto release_sk1;
655
656 genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
657 if (genhash || memcmp(hash_location, newhash, 16) != 0)
658 goto release_sk1;
659 } else {
660 key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
661 &ip_hdr(skb)->saddr,
662 AF_INET) : NULL;
663 }
664
cfb6eeb4
YH
665 if (key) {
666 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
667 (TCPOPT_NOP << 16) |
668 (TCPOPT_MD5SIG << 8) |
669 TCPOLEN_MD5SIG);
670 /* Update length and the length the header thinks exists */
671 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
672 rep.th.doff = arg.iov[0].iov_len / 4;
673
49a72dfb 674 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
78e645cb
IJ
675 key, ip_hdr(skb)->saddr,
676 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
677 }
678#endif
eddc9ec5
ACM
679 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
680 ip_hdr(skb)->saddr, /* XXX */
52cd5750 681 arg.iov[0].iov_len, IPPROTO_TCP, 0);
1da177e4 682 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
88ef4a5a 683 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
e2446eaa 684 /* When socket is gone, all binding information is lost.
4c675258
AK
685 * routing might fail in this case. No choice here, if we choose to force
686 * input interface, we will misroute in case of asymmetric route.
e2446eaa 687 */
4c675258
AK
688 if (sk)
689 arg.bound_dev_if = sk->sk_bound_dev_if;
1da177e4 690
adf30907 691 net = dev_net(skb_dst(skb)->dev);
66b13d99 692 arg.tos = ip_hdr(skb)->tos;
be9f4a44 693 ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
70e73416 694 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
1da177e4 695
63231bdd
PE
696 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
697 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
658ddaaf
SL
698
699#ifdef CONFIG_TCP_MD5SIG
700release_sk1:
701 if (sk1) {
702 rcu_read_unlock();
703 sock_put(sk1);
704 }
705#endif
1da177e4
LT
706}
707
708/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
709 outside socket context is ugly, certainly. What can I do?
710 */
711
9501f972 712static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
ee684b6f 713 u32 win, u32 tsval, u32 tsecr, int oif,
88ef4a5a 714 struct tcp_md5sig_key *key,
66b13d99 715 int reply_flags, u8 tos)
1da177e4 716{
cf533ea5 717 const struct tcphdr *th = tcp_hdr(skb);
1da177e4
LT
718 struct {
719 struct tcphdr th;
714e85be 720 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
cfb6eeb4 721#ifdef CONFIG_TCP_MD5SIG
714e85be 722 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
cfb6eeb4
YH
723#endif
724 ];
1da177e4
LT
725 } rep;
726 struct ip_reply_arg arg;
adf30907 727 struct net *net = dev_net(skb_dst(skb)->dev);
1da177e4
LT
728
729 memset(&rep.th, 0, sizeof(struct tcphdr));
7174259e 730 memset(&arg, 0, sizeof(arg));
1da177e4
LT
731
732 arg.iov[0].iov_base = (unsigned char *)&rep;
733 arg.iov[0].iov_len = sizeof(rep.th);
ee684b6f 734 if (tsecr) {
cfb6eeb4
YH
735 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
736 (TCPOPT_TIMESTAMP << 8) |
737 TCPOLEN_TIMESTAMP);
ee684b6f
AV
738 rep.opt[1] = htonl(tsval);
739 rep.opt[2] = htonl(tsecr);
cb48cfe8 740 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
1da177e4
LT
741 }
742
743 /* Swap the send and the receive. */
744 rep.th.dest = th->source;
745 rep.th.source = th->dest;
746 rep.th.doff = arg.iov[0].iov_len / 4;
747 rep.th.seq = htonl(seq);
748 rep.th.ack_seq = htonl(ack);
749 rep.th.ack = 1;
750 rep.th.window = htons(win);
751
cfb6eeb4 752#ifdef CONFIG_TCP_MD5SIG
cfb6eeb4 753 if (key) {
ee684b6f 754 int offset = (tsecr) ? 3 : 0;
cfb6eeb4
YH
755
756 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
757 (TCPOPT_NOP << 16) |
758 (TCPOPT_MD5SIG << 8) |
759 TCPOLEN_MD5SIG);
760 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
761 rep.th.doff = arg.iov[0].iov_len/4;
762
49a72dfb 763 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
90b7e112
AL
764 key, ip_hdr(skb)->saddr,
765 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
766 }
767#endif
88ef4a5a 768 arg.flags = reply_flags;
eddc9ec5
ACM
769 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
770 ip_hdr(skb)->saddr, /* XXX */
1da177e4
LT
771 arg.iov[0].iov_len, IPPROTO_TCP, 0);
772 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
9501f972
YH
773 if (oif)
774 arg.bound_dev_if = oif;
66b13d99 775 arg.tos = tos;
be9f4a44 776 ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
70e73416 777 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
1da177e4 778
63231bdd 779 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
1da177e4
LT
780}
781
782static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
783{
8feaf0c0 784 struct inet_timewait_sock *tw = inet_twsk(sk);
cfb6eeb4 785 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1da177e4 786
9501f972 787 tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
7174259e 788 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
ee684b6f 789 tcp_time_stamp + tcptw->tw_ts_offset,
9501f972
YH
790 tcptw->tw_ts_recent,
791 tw->tw_bound_dev_if,
88ef4a5a 792 tcp_twsk_md5_key(tcptw),
66b13d99
ED
793 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
794 tw->tw_tos
9501f972 795 );
1da177e4 796
8feaf0c0 797 inet_twsk_put(tw);
1da177e4
LT
798}
799
6edafaaf 800static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
7174259e 801 struct request_sock *req)
1da177e4 802{
168a8f58
JC
803 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
804 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
805 */
806 tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
807 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
808 tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
ee684b6f 809 tcp_time_stamp,
9501f972
YH
810 req->ts_recent,
811 0,
a915da9b
ED
812 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
813 AF_INET),
66b13d99
ED
814 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
815 ip_hdr(skb)->tos);
1da177e4
LT
816}
817
1da177e4 818/*
9bf1d83e 819 * Send a SYN-ACK after having received a SYN.
60236fdd 820 * This still operates on a request_sock only, not on a big
1da177e4
LT
821 * socket.
822 */
72659ecc
OP
823static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
824 struct request_sock *req,
397b4174 825 u16 queue_mapping)
1da177e4 826{
2e6599cb 827 const struct inet_request_sock *ireq = inet_rsk(req);
6bd023f3 828 struct flowi4 fl4;
1da177e4 829 int err = -1;
d41db5af 830 struct sk_buff *skb;
1da177e4
LT
831
832 /* First, grab a route. */
ba3f7f04 833 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
fd80eb94 834 return -1;
1da177e4 835
1a2c6181 836 skb = tcp_make_synack(sk, dst, req, NULL);
1da177e4
LT
837
838 if (skb) {
634fb979 839 __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
1da177e4 840
fff32699 841 skb_set_queue_mapping(skb, queue_mapping);
634fb979
ED
842 err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
843 ireq->ir_rmt_addr,
2e6599cb 844 ireq->opt);
b9df3cb8 845 err = net_xmit_eval(err);
016818d0
NC
846 if (!tcp_rsk(req)->snt_synack && !err)
847 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1da177e4
LT
848 }
849
1da177e4
LT
850 return err;
851}
852
1a2c6181 853static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req)
fd80eb94 854{
397b4174 855 int res = tcp_v4_send_synack(sk, NULL, req, 0);
e6c022a4 856
f19c29e3 857 if (!res) {
e6c022a4 858 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
f19c29e3
YC
859 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
860 }
e6c022a4 861 return res;
fd80eb94
DL
862}
863
1da177e4 864/*
60236fdd 865 * IPv4 request_sock destructor.
1da177e4 866 */
60236fdd 867static void tcp_v4_reqsk_destructor(struct request_sock *req)
1da177e4 868{
a51482bd 869 kfree(inet_rsk(req)->opt);
1da177e4
LT
870}
871
946cedcc 872/*
a2a385d6 873 * Return true if a syncookie should be sent
946cedcc 874 */
a2a385d6 875bool tcp_syn_flood_action(struct sock *sk,
946cedcc
ED
876 const struct sk_buff *skb,
877 const char *proto)
1da177e4 878{
946cedcc 879 const char *msg = "Dropping request";
a2a385d6 880 bool want_cookie = false;
946cedcc
ED
881 struct listen_sock *lopt;
882
2a1d4bd4 883#ifdef CONFIG_SYN_COOKIES
946cedcc 884 if (sysctl_tcp_syncookies) {
2a1d4bd4 885 msg = "Sending cookies";
a2a385d6 886 want_cookie = true;
946cedcc
ED
887 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
888 } else
80e40daa 889#endif
946cedcc
ED
890 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
891
892 lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
5ad37d5d 893 if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) {
946cedcc 894 lopt->synflood_warned = 1;
afd46503 895 pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
946cedcc
ED
896 proto, ntohs(tcp_hdr(skb)->dest), msg);
897 }
898 return want_cookie;
2a1d4bd4 899}
946cedcc 900EXPORT_SYMBOL(tcp_syn_flood_action);
1da177e4
LT
901
902/*
60236fdd 903 * Save and compile IPv4 options into the request_sock if needed.
1da177e4 904 */
5dff747b 905static struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb)
1da177e4 906{
f6d8bd05
ED
907 const struct ip_options *opt = &(IPCB(skb)->opt);
908 struct ip_options_rcu *dopt = NULL;
1da177e4
LT
909
910 if (opt && opt->optlen) {
f6d8bd05
ED
911 int opt_size = sizeof(*dopt) + opt->optlen;
912
1da177e4
LT
913 dopt = kmalloc(opt_size, GFP_ATOMIC);
914 if (dopt) {
f6d8bd05 915 if (ip_options_echo(&dopt->opt, skb)) {
1da177e4
LT
916 kfree(dopt);
917 dopt = NULL;
918 }
919 }
920 }
921 return dopt;
922}
923
cfb6eeb4
YH
924#ifdef CONFIG_TCP_MD5SIG
925/*
926 * RFC2385 MD5 checksumming requires a mapping of
927 * IP address->MD5 Key.
928 * We need to maintain these in the sk structure.
929 */
930
931/* Find the Key structure for an address. */
a915da9b
ED
932struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
933 const union tcp_md5_addr *addr,
934 int family)
cfb6eeb4
YH
935{
936 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 937 struct tcp_md5sig_key *key;
a915da9b 938 unsigned int size = sizeof(struct in_addr);
a8afca03 939 struct tcp_md5sig_info *md5sig;
cfb6eeb4 940
a8afca03
ED
941 /* caller either holds rcu_read_lock() or socket lock */
942 md5sig = rcu_dereference_check(tp->md5sig_info,
b4fb05ea
ED
943 sock_owned_by_user(sk) ||
944 lockdep_is_held(&sk->sk_lock.slock));
a8afca03 945 if (!md5sig)
cfb6eeb4 946 return NULL;
a915da9b
ED
947#if IS_ENABLED(CONFIG_IPV6)
948 if (family == AF_INET6)
949 size = sizeof(struct in6_addr);
950#endif
b67bfe0d 951 hlist_for_each_entry_rcu(key, &md5sig->head, node) {
a915da9b
ED
952 if (key->family != family)
953 continue;
954 if (!memcmp(&key->addr, addr, size))
955 return key;
cfb6eeb4
YH
956 }
957 return NULL;
958}
a915da9b 959EXPORT_SYMBOL(tcp_md5_do_lookup);
cfb6eeb4
YH
960
961struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
962 struct sock *addr_sk)
963{
a915da9b
ED
964 union tcp_md5_addr *addr;
965
966 addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
967 return tcp_md5_do_lookup(sk, addr, AF_INET);
cfb6eeb4 968}
cfb6eeb4
YH
969EXPORT_SYMBOL(tcp_v4_md5_lookup);
970
f5b99bcd
AB
971static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
972 struct request_sock *req)
cfb6eeb4 973{
a915da9b
ED
974 union tcp_md5_addr *addr;
975
634fb979 976 addr = (union tcp_md5_addr *)&inet_rsk(req)->ir_rmt_addr;
a915da9b 977 return tcp_md5_do_lookup(sk, addr, AF_INET);
cfb6eeb4
YH
978}
979
980/* This can be called on a newly created socket, from other files */
a915da9b
ED
981int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
982 int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
cfb6eeb4
YH
983{
984 /* Add Key to the list */
b0a713e9 985 struct tcp_md5sig_key *key;
cfb6eeb4 986 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 987 struct tcp_md5sig_info *md5sig;
cfb6eeb4 988
c0353c7b 989 key = tcp_md5_do_lookup(sk, addr, family);
cfb6eeb4
YH
990 if (key) {
991 /* Pre-existing entry - just update that one. */
a915da9b 992 memcpy(key->key, newkey, newkeylen);
b0a713e9 993 key->keylen = newkeylen;
a915da9b
ED
994 return 0;
995 }
260fcbeb 996
a8afca03
ED
997 md5sig = rcu_dereference_protected(tp->md5sig_info,
998 sock_owned_by_user(sk));
a915da9b
ED
999 if (!md5sig) {
1000 md5sig = kmalloc(sizeof(*md5sig), gfp);
1001 if (!md5sig)
cfb6eeb4 1002 return -ENOMEM;
cfb6eeb4 1003
a915da9b
ED
1004 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1005 INIT_HLIST_HEAD(&md5sig->head);
a8afca03 1006 rcu_assign_pointer(tp->md5sig_info, md5sig);
a915da9b 1007 }
cfb6eeb4 1008
5f3d9cb2 1009 key = sock_kmalloc(sk, sizeof(*key), gfp);
a915da9b
ED
1010 if (!key)
1011 return -ENOMEM;
71cea17e 1012 if (!tcp_alloc_md5sig_pool()) {
5f3d9cb2 1013 sock_kfree_s(sk, key, sizeof(*key));
a915da9b 1014 return -ENOMEM;
cfb6eeb4 1015 }
a915da9b
ED
1016
1017 memcpy(key->key, newkey, newkeylen);
1018 key->keylen = newkeylen;
1019 key->family = family;
1020 memcpy(&key->addr, addr,
1021 (family == AF_INET6) ? sizeof(struct in6_addr) :
1022 sizeof(struct in_addr));
1023 hlist_add_head_rcu(&key->node, &md5sig->head);
cfb6eeb4
YH
1024 return 0;
1025}
a915da9b 1026EXPORT_SYMBOL(tcp_md5_do_add);
cfb6eeb4 1027
a915da9b 1028int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
cfb6eeb4 1029{
a915da9b
ED
1030 struct tcp_md5sig_key *key;
1031
c0353c7b 1032 key = tcp_md5_do_lookup(sk, addr, family);
a915da9b
ED
1033 if (!key)
1034 return -ENOENT;
1035 hlist_del_rcu(&key->node);
5f3d9cb2 1036 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
a915da9b 1037 kfree_rcu(key, rcu);
a915da9b 1038 return 0;
cfb6eeb4 1039}
a915da9b 1040EXPORT_SYMBOL(tcp_md5_do_del);
cfb6eeb4 1041
e0683e70 1042static void tcp_clear_md5_list(struct sock *sk)
cfb6eeb4
YH
1043{
1044 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 1045 struct tcp_md5sig_key *key;
b67bfe0d 1046 struct hlist_node *n;
a8afca03 1047 struct tcp_md5sig_info *md5sig;
cfb6eeb4 1048
a8afca03
ED
1049 md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1050
b67bfe0d 1051 hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
a915da9b 1052 hlist_del_rcu(&key->node);
5f3d9cb2 1053 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
a915da9b 1054 kfree_rcu(key, rcu);
cfb6eeb4
YH
1055 }
1056}
1057
7174259e
ACM
1058static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1059 int optlen)
cfb6eeb4
YH
1060{
1061 struct tcp_md5sig cmd;
1062 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
cfb6eeb4
YH
1063
1064 if (optlen < sizeof(cmd))
1065 return -EINVAL;
1066
7174259e 1067 if (copy_from_user(&cmd, optval, sizeof(cmd)))
cfb6eeb4
YH
1068 return -EFAULT;
1069
1070 if (sin->sin_family != AF_INET)
1071 return -EINVAL;
1072
a8afca03 1073 if (!cmd.tcpm_key || !cmd.tcpm_keylen)
a915da9b
ED
1074 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1075 AF_INET);
cfb6eeb4
YH
1076
1077 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1078 return -EINVAL;
1079
a915da9b
ED
1080 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1081 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1082 GFP_KERNEL);
cfb6eeb4
YH
1083}
1084
49a72dfb
AL
1085static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1086 __be32 daddr, __be32 saddr, int nbytes)
cfb6eeb4 1087{
cfb6eeb4 1088 struct tcp4_pseudohdr *bp;
49a72dfb 1089 struct scatterlist sg;
cfb6eeb4
YH
1090
1091 bp = &hp->md5_blk.ip4;
cfb6eeb4
YH
1092
1093 /*
49a72dfb 1094 * 1. the TCP pseudo-header (in the order: source IP address,
cfb6eeb4
YH
1095 * destination IP address, zero-padded protocol number, and
1096 * segment length)
1097 */
1098 bp->saddr = saddr;
1099 bp->daddr = daddr;
1100 bp->pad = 0;
076fb722 1101 bp->protocol = IPPROTO_TCP;
49a72dfb 1102 bp->len = cpu_to_be16(nbytes);
c7da57a1 1103
49a72dfb
AL
1104 sg_init_one(&sg, bp, sizeof(*bp));
1105 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1106}
1107
a915da9b 1108static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
318cf7aa 1109 __be32 daddr, __be32 saddr, const struct tcphdr *th)
49a72dfb
AL
1110{
1111 struct tcp_md5sig_pool *hp;
1112 struct hash_desc *desc;
1113
1114 hp = tcp_get_md5sig_pool();
1115 if (!hp)
1116 goto clear_hash_noput;
1117 desc = &hp->md5_desc;
1118
1119 if (crypto_hash_init(desc))
1120 goto clear_hash;
1121 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1122 goto clear_hash;
1123 if (tcp_md5_hash_header(hp, th))
1124 goto clear_hash;
1125 if (tcp_md5_hash_key(hp, key))
1126 goto clear_hash;
1127 if (crypto_hash_final(desc, md5_hash))
cfb6eeb4
YH
1128 goto clear_hash;
1129
cfb6eeb4 1130 tcp_put_md5sig_pool();
cfb6eeb4 1131 return 0;
49a72dfb 1132
cfb6eeb4
YH
1133clear_hash:
1134 tcp_put_md5sig_pool();
1135clear_hash_noput:
1136 memset(md5_hash, 0, 16);
49a72dfb 1137 return 1;
cfb6eeb4
YH
1138}
1139
49a72dfb 1140int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
318cf7aa
ED
1141 const struct sock *sk, const struct request_sock *req,
1142 const struct sk_buff *skb)
cfb6eeb4 1143{
49a72dfb
AL
1144 struct tcp_md5sig_pool *hp;
1145 struct hash_desc *desc;
318cf7aa 1146 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
1147 __be32 saddr, daddr;
1148
1149 if (sk) {
c720c7e8
ED
1150 saddr = inet_sk(sk)->inet_saddr;
1151 daddr = inet_sk(sk)->inet_daddr;
49a72dfb 1152 } else if (req) {
634fb979
ED
1153 saddr = inet_rsk(req)->ir_loc_addr;
1154 daddr = inet_rsk(req)->ir_rmt_addr;
cfb6eeb4 1155 } else {
49a72dfb
AL
1156 const struct iphdr *iph = ip_hdr(skb);
1157 saddr = iph->saddr;
1158 daddr = iph->daddr;
cfb6eeb4 1159 }
49a72dfb
AL
1160
1161 hp = tcp_get_md5sig_pool();
1162 if (!hp)
1163 goto clear_hash_noput;
1164 desc = &hp->md5_desc;
1165
1166 if (crypto_hash_init(desc))
1167 goto clear_hash;
1168
1169 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1170 goto clear_hash;
1171 if (tcp_md5_hash_header(hp, th))
1172 goto clear_hash;
1173 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1174 goto clear_hash;
1175 if (tcp_md5_hash_key(hp, key))
1176 goto clear_hash;
1177 if (crypto_hash_final(desc, md5_hash))
1178 goto clear_hash;
1179
1180 tcp_put_md5sig_pool();
1181 return 0;
1182
1183clear_hash:
1184 tcp_put_md5sig_pool();
1185clear_hash_noput:
1186 memset(md5_hash, 0, 16);
1187 return 1;
cfb6eeb4 1188}
49a72dfb 1189EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
cfb6eeb4 1190
a2a385d6 1191static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
cfb6eeb4
YH
1192{
1193 /*
1194 * This gets called for each TCP segment that arrives
1195 * so we want to be efficient.
1196 * We have 3 drop cases:
1197 * o No MD5 hash and one expected.
1198 * o MD5 hash and we're not expecting one.
1199 * o MD5 hash and its wrong.
1200 */
cf533ea5 1201 const __u8 *hash_location = NULL;
cfb6eeb4 1202 struct tcp_md5sig_key *hash_expected;
eddc9ec5 1203 const struct iphdr *iph = ip_hdr(skb);
cf533ea5 1204 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4 1205 int genhash;
cfb6eeb4
YH
1206 unsigned char newhash[16];
1207
a915da9b
ED
1208 hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1209 AF_INET);
7d5d5525 1210 hash_location = tcp_parse_md5sig_option(th);
cfb6eeb4 1211
cfb6eeb4
YH
1212 /* We've parsed the options - do we have a hash? */
1213 if (!hash_expected && !hash_location)
a2a385d6 1214 return false;
cfb6eeb4
YH
1215
1216 if (hash_expected && !hash_location) {
785957d3 1217 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
a2a385d6 1218 return true;
cfb6eeb4
YH
1219 }
1220
1221 if (!hash_expected && hash_location) {
785957d3 1222 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
a2a385d6 1223 return true;
cfb6eeb4
YH
1224 }
1225
1226 /* Okay, so this is hash_expected and hash_location -
1227 * so we need to calculate the checksum.
1228 */
49a72dfb
AL
1229 genhash = tcp_v4_md5_hash_skb(newhash,
1230 hash_expected,
1231 NULL, NULL, skb);
cfb6eeb4
YH
1232
1233 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
e87cc472
JP
1234 net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1235 &iph->saddr, ntohs(th->source),
1236 &iph->daddr, ntohs(th->dest),
1237 genhash ? " tcp_v4_calc_md5_hash failed"
1238 : "");
a2a385d6 1239 return true;
cfb6eeb4 1240 }
a2a385d6 1241 return false;
cfb6eeb4
YH
1242}
1243
1244#endif
1245
72a3effa 1246struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1da177e4 1247 .family = PF_INET,
2e6599cb 1248 .obj_size = sizeof(struct tcp_request_sock),
72659ecc 1249 .rtx_syn_ack = tcp_v4_rtx_synack,
60236fdd
ACM
1250 .send_ack = tcp_v4_reqsk_send_ack,
1251 .destructor = tcp_v4_reqsk_destructor,
1da177e4 1252 .send_reset = tcp_v4_send_reset,
72659ecc 1253 .syn_ack_timeout = tcp_syn_ack_timeout,
1da177e4
LT
1254};
1255
cfb6eeb4 1256#ifdef CONFIG_TCP_MD5SIG
b2e4b3de 1257static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
cfb6eeb4 1258 .md5_lookup = tcp_v4_reqsk_md5_lookup,
e3afe7b7 1259 .calc_md5_hash = tcp_v4_md5_hash_skb,
cfb6eeb4 1260};
b6332e6c 1261#endif
cfb6eeb4 1262
1da177e4
LT
1263int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1264{
1265 struct tcp_options_received tmp_opt;
60236fdd 1266 struct request_sock *req;
e6b4d113 1267 struct inet_request_sock *ireq;
4957faad 1268 struct tcp_sock *tp = tcp_sk(sk);
e6b4d113 1269 struct dst_entry *dst = NULL;
eddc9ec5
ACM
1270 __be32 saddr = ip_hdr(skb)->saddr;
1271 __be32 daddr = ip_hdr(skb)->daddr;
1da177e4 1272 __u32 isn = TCP_SKB_CB(skb)->when;
a2a385d6 1273 bool want_cookie = false;
168a8f58
JC
1274 struct flowi4 fl4;
1275 struct tcp_fastopen_cookie foc = { .len = -1 };
1276 struct tcp_fastopen_cookie valid_foc = { .len = -1 };
1277 struct sk_buff *skb_synack;
1278 int do_fastopen;
1da177e4
LT
1279
1280 /* Never answer to SYNs send to broadcast or multicast */
511c3f92 1281 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1da177e4
LT
1282 goto drop;
1283
1284 /* TW buckets are converted to open requests without
1285 * limitations, they conserve resources and peer is
1286 * evidently real one.
1287 */
5ad37d5d
HFS
1288 if ((sysctl_tcp_syncookies == 2 ||
1289 inet_csk_reqsk_queue_is_full(sk)) && !isn) {
946cedcc
ED
1290 want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1291 if (!want_cookie)
1292 goto drop;
1da177e4
LT
1293 }
1294
1295 /* Accept backlog is full. If we have already queued enough
1296 * of warm entries in syn queue, drop request. It is better than
1297 * clogging syn queue with openreqs with exponentially increasing
1298 * timeout.
1299 */
2aeef18d
NS
1300 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
1301 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1da177e4 1302 goto drop;
2aeef18d 1303 }
1da177e4 1304
ce4a7d0d 1305 req = inet_reqsk_alloc(&tcp_request_sock_ops);
1da177e4
LT
1306 if (!req)
1307 goto drop;
1308
cfb6eeb4
YH
1309#ifdef CONFIG_TCP_MD5SIG
1310 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1311#endif
1312
1da177e4 1313 tcp_clear_options(&tmp_opt);
bee7ca9e 1314 tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
4957faad 1315 tmp_opt.user_mss = tp->rx_opt.user_mss;
1a2c6181 1316 tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
1da177e4 1317
4dfc2817 1318 if (want_cookie && !tmp_opt.saw_tstamp)
1da177e4 1319 tcp_clear_options(&tmp_opt);
1da177e4 1320
1da177e4 1321 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1da177e4
LT
1322 tcp_openreq_init(req, &tmp_opt, skb);
1323
bb5b7c11 1324 ireq = inet_rsk(req);
634fb979
ED
1325 ireq->ir_loc_addr = daddr;
1326 ireq->ir_rmt_addr = saddr;
bb5b7c11 1327 ireq->no_srccheck = inet_sk(sk)->transparent;
5dff747b 1328 ireq->opt = tcp_v4_save_options(skb);
bb5b7c11 1329
284904aa 1330 if (security_inet_conn_request(sk, skb, req))
bb5b7c11 1331 goto drop_and_free;
284904aa 1332
172d69e6 1333 if (!want_cookie || tmp_opt.tstamp_ok)
5d134f1c 1334 TCP_ECN_create_request(req, skb, sock_net(sk));
1da177e4
LT
1335
1336 if (want_cookie) {
1da177e4 1337 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
172d69e6 1338 req->cookie_ts = tmp_opt.tstamp_ok;
1da177e4 1339 } else if (!isn) {
1da177e4
LT
1340 /* VJ's idea. We save last timestamp seen
1341 * from the destination in peer table, when entering
1342 * state TIME-WAIT, and check against it before
1343 * accepting new connection request.
1344 *
1345 * If "isn" is not zero, this request hit alive
1346 * timewait bucket, so that all the necessary checks
1347 * are made in the function processing timewait state.
1348 */
1349 if (tmp_opt.saw_tstamp &&
295ff7ed 1350 tcp_death_row.sysctl_tw_recycle &&
ba3f7f04 1351 (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
81166dd6
DM
1352 fl4.daddr == saddr) {
1353 if (!tcp_peer_is_proven(req, dst, true)) {
de0744af 1354 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
7cd04fa7 1355 goto drop_and_release;
1da177e4
LT
1356 }
1357 }
1358 /* Kill the following clause, if you dislike this way. */
1359 else if (!sysctl_tcp_syncookies &&
463c84b9 1360 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1da177e4 1361 (sysctl_max_syn_backlog >> 2)) &&
81166dd6 1362 !tcp_peer_is_proven(req, dst, false)) {
1da177e4
LT
1363 /* Without syncookies last quarter of
1364 * backlog is filled with destinations,
1365 * proven to be alive.
1366 * It means that we continue to communicate
1367 * to destinations, already remembered
1368 * to the moment of synflood.
1369 */
afd46503 1370 LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
673d57e7 1371 &saddr, ntohs(tcp_hdr(skb)->source));
7cd04fa7 1372 goto drop_and_release;
1da177e4
LT
1373 }
1374
a94f723d 1375 isn = tcp_v4_init_sequence(skb);
1da177e4 1376 }
2e6599cb 1377 tcp_rsk(req)->snt_isn = isn;
1da177e4 1378
168a8f58
JC
1379 if (dst == NULL) {
1380 dst = inet_csk_route_req(sk, &fl4, req);
1381 if (dst == NULL)
1382 goto drop_and_free;
1383 }
1384 do_fastopen = tcp_fastopen_check(sk, skb, req, &foc, &valid_foc);
1385
1386 /* We don't call tcp_v4_send_synack() directly because we need
1387 * to make sure a child socket can be created successfully before
1388 * sending back synack!
1389 *
1390 * XXX (TFO) - Ideally one would simply call tcp_v4_send_synack()
1391 * (or better yet, call tcp_send_synack() in the child context
1392 * directly, but will have to fix bunch of other code first)
1393 * after syn_recv_sock() except one will need to first fix the
1394 * latter to remove its dependency on the current implementation
1395 * of tcp_v4_send_synack()->tcp_select_initial_window().
1396 */
1397 skb_synack = tcp_make_synack(sk, dst, req,
168a8f58
JC
1398 fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL);
1399
1400 if (skb_synack) {
634fb979 1401 __tcp_v4_send_check(skb_synack, ireq->ir_loc_addr, ireq->ir_rmt_addr);
168a8f58
JC
1402 skb_set_queue_mapping(skb_synack, skb_get_queue_mapping(skb));
1403 } else
1404 goto drop_and_free;
1405
1406 if (likely(!do_fastopen)) {
1407 int err;
634fb979
ED
1408 err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr,
1409 ireq->ir_rmt_addr, ireq->opt);
168a8f58
JC
1410 err = net_xmit_eval(err);
1411 if (err || want_cookie)
1412 goto drop_and_free;
1413
016818d0 1414 tcp_rsk(req)->snt_synack = tcp_time_stamp;
168a8f58
JC
1415 tcp_rsk(req)->listener = NULL;
1416 /* Add the request_sock to the SYN table */
1417 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1418 if (fastopen_cookie_present(&foc) && foc.len != 0)
1419 NET_INC_STATS_BH(sock_net(sk),
1420 LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
5b7ed089
YC
1421 } else if (tcp_fastopen_create_child(sk, skb, skb_synack, req))
1422 goto drop_and_release;
1da177e4 1423
1da177e4
LT
1424 return 0;
1425
7cd04fa7
DL
1426drop_and_release:
1427 dst_release(dst);
1da177e4 1428drop_and_free:
60236fdd 1429 reqsk_free(req);
1da177e4 1430drop:
848bf15f 1431 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4
LT
1432 return 0;
1433}
4bc2f18b 1434EXPORT_SYMBOL(tcp_v4_conn_request);
1da177e4
LT
1435
1436
1437/*
1438 * The three way handshake has completed - we got a valid synack -
1439 * now create the new socket.
1440 */
1441struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
60236fdd 1442 struct request_sock *req,
1da177e4
LT
1443 struct dst_entry *dst)
1444{
2e6599cb 1445 struct inet_request_sock *ireq;
1da177e4
LT
1446 struct inet_sock *newinet;
1447 struct tcp_sock *newtp;
1448 struct sock *newsk;
cfb6eeb4
YH
1449#ifdef CONFIG_TCP_MD5SIG
1450 struct tcp_md5sig_key *key;
1451#endif
f6d8bd05 1452 struct ip_options_rcu *inet_opt;
1da177e4
LT
1453
1454 if (sk_acceptq_is_full(sk))
1455 goto exit_overflow;
1456
1da177e4
LT
1457 newsk = tcp_create_openreq_child(sk, req, skb);
1458 if (!newsk)
093d2823 1459 goto exit_nonewsk;
1da177e4 1460
bcd76111 1461 newsk->sk_gso_type = SKB_GSO_TCPV4;
fae6ef87 1462 inet_sk_rx_dst_set(newsk, skb);
1da177e4
LT
1463
1464 newtp = tcp_sk(newsk);
1465 newinet = inet_sk(newsk);
2e6599cb 1466 ireq = inet_rsk(req);
634fb979
ED
1467 newinet->inet_daddr = ireq->ir_rmt_addr;
1468 newinet->inet_rcv_saddr = ireq->ir_loc_addr;
1469 newinet->inet_saddr = ireq->ir_loc_addr;
f6d8bd05
ED
1470 inet_opt = ireq->opt;
1471 rcu_assign_pointer(newinet->inet_opt, inet_opt);
2e6599cb 1472 ireq->opt = NULL;
463c84b9 1473 newinet->mc_index = inet_iif(skb);
eddc9ec5 1474 newinet->mc_ttl = ip_hdr(skb)->ttl;
4c507d28 1475 newinet->rcv_tos = ip_hdr(skb)->tos;
d83d8461 1476 inet_csk(newsk)->icsk_ext_hdr_len = 0;
f6d8bd05
ED
1477 if (inet_opt)
1478 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
c720c7e8 1479 newinet->inet_id = newtp->write_seq ^ jiffies;
1da177e4 1480
dfd25fff
ED
1481 if (!dst) {
1482 dst = inet_csk_route_child_sock(sk, newsk, req);
1483 if (!dst)
1484 goto put_and_exit;
1485 } else {
1486 /* syncookie case : see end of cookie_v4_check() */
1487 }
0e734419
DM
1488 sk_setup_caps(newsk, dst);
1489
1da177e4 1490 tcp_sync_mss(newsk, dst_mtu(dst));
0dbaee3b 1491 newtp->advmss = dst_metric_advmss(dst);
f5fff5dc
TQ
1492 if (tcp_sk(sk)->rx_opt.user_mss &&
1493 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1494 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1495
1da177e4
LT
1496 tcp_initialize_rcv_mss(newsk);
1497
cfb6eeb4
YH
1498#ifdef CONFIG_TCP_MD5SIG
1499 /* Copy over the MD5 key from the original socket */
a915da9b
ED
1500 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1501 AF_INET);
c720c7e8 1502 if (key != NULL) {
cfb6eeb4
YH
1503 /*
1504 * We're using one, so create a matching key
1505 * on the newsk structure. If we fail to get
1506 * memory, then we end up not copying the key
1507 * across. Shucks.
1508 */
a915da9b
ED
1509 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1510 AF_INET, key->key, key->keylen, GFP_ATOMIC);
a465419b 1511 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
cfb6eeb4
YH
1512 }
1513#endif
1514
0e734419
DM
1515 if (__inet_inherit_port(sk, newsk) < 0)
1516 goto put_and_exit;
9327f705 1517 __inet_hash_nolisten(newsk, NULL);
1da177e4
LT
1518
1519 return newsk;
1520
1521exit_overflow:
de0744af 1522 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
093d2823
BS
1523exit_nonewsk:
1524 dst_release(dst);
1da177e4 1525exit:
de0744af 1526 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4 1527 return NULL;
0e734419 1528put_and_exit:
e337e24d
CP
1529 inet_csk_prepare_forced_close(newsk);
1530 tcp_done(newsk);
0e734419 1531 goto exit;
1da177e4 1532}
4bc2f18b 1533EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1da177e4
LT
1534
1535static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1536{
aa8223c7 1537 struct tcphdr *th = tcp_hdr(skb);
eddc9ec5 1538 const struct iphdr *iph = ip_hdr(skb);
1da177e4 1539 struct sock *nsk;
60236fdd 1540 struct request_sock **prev;
1da177e4 1541 /* Find possible connection requests. */
463c84b9
ACM
1542 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1543 iph->saddr, iph->daddr);
1da177e4 1544 if (req)
8336886f 1545 return tcp_check_req(sk, skb, req, prev, false);
1da177e4 1546
3b1e0a65 1547 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
c67499c0 1548 th->source, iph->daddr, th->dest, inet_iif(skb));
1da177e4
LT
1549
1550 if (nsk) {
1551 if (nsk->sk_state != TCP_TIME_WAIT) {
1552 bh_lock_sock(nsk);
1553 return nsk;
1554 }
9469c7b4 1555 inet_twsk_put(inet_twsk(nsk));
1da177e4
LT
1556 return NULL;
1557 }
1558
1559#ifdef CONFIG_SYN_COOKIES
af9b4738 1560 if (!th->syn)
1da177e4
LT
1561 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1562#endif
1563 return sk;
1564}
1565
1da177e4
LT
1566/* The socket must have it's spinlock held when we get
1567 * here.
1568 *
1569 * We have a potential double-lock case here, so even when
1570 * doing backlog processing we use the BH locking scheme.
1571 * This is because we cannot sleep with the original spinlock
1572 * held.
1573 */
1574int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1575{
cfb6eeb4
YH
1576 struct sock *rsk;
1577#ifdef CONFIG_TCP_MD5SIG
1578 /*
1579 * We really want to reject the packet as early as possible
1580 * if:
1581 * o We're expecting an MD5'd packet and this is no MD5 tcp option
1582 * o There is an MD5 option and we're not expecting one
1583 */
7174259e 1584 if (tcp_v4_inbound_md5_hash(sk, skb))
cfb6eeb4
YH
1585 goto discard;
1586#endif
1587
1da177e4 1588 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
404e0a8b
ED
1589 struct dst_entry *dst = sk->sk_rx_dst;
1590
bdeab991 1591 sock_rps_save_rxhash(sk, skb);
404e0a8b 1592 if (dst) {
505fbcf0
ED
1593 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1594 dst->ops->check(dst, 0) == NULL) {
92101b3b
DM
1595 dst_release(dst);
1596 sk->sk_rx_dst = NULL;
1597 }
1598 }
c995ae22 1599 tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
1da177e4
LT
1600 return 0;
1601 }
1602
ab6a5bb6 1603 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1da177e4
LT
1604 goto csum_err;
1605
1606 if (sk->sk_state == TCP_LISTEN) {
1607 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1608 if (!nsk)
1609 goto discard;
1610
1611 if (nsk != sk) {
bdeab991 1612 sock_rps_save_rxhash(nsk, skb);
cfb6eeb4
YH
1613 if (tcp_child_process(sk, nsk, skb)) {
1614 rsk = nsk;
1da177e4 1615 goto reset;
cfb6eeb4 1616 }
1da177e4
LT
1617 return 0;
1618 }
ca55158c 1619 } else
bdeab991 1620 sock_rps_save_rxhash(sk, skb);
ca55158c 1621
aa8223c7 1622 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
cfb6eeb4 1623 rsk = sk;
1da177e4 1624 goto reset;
cfb6eeb4 1625 }
1da177e4
LT
1626 return 0;
1627
1628reset:
cfb6eeb4 1629 tcp_v4_send_reset(rsk, skb);
1da177e4
LT
1630discard:
1631 kfree_skb(skb);
1632 /* Be careful here. If this function gets more complicated and
1633 * gcc suffers from register pressure on the x86, sk (in %ebx)
1634 * might be destroyed here. This current version compiles correctly,
1635 * but you have been warned.
1636 */
1637 return 0;
1638
1639csum_err:
6a5dc9e5 1640 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
63231bdd 1641 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1da177e4
LT
1642 goto discard;
1643}
4bc2f18b 1644EXPORT_SYMBOL(tcp_v4_do_rcv);
1da177e4 1645
160eb5a6 1646void tcp_v4_early_demux(struct sk_buff *skb)
41063e9d 1647{
41063e9d
DM
1648 const struct iphdr *iph;
1649 const struct tcphdr *th;
1650 struct sock *sk;
41063e9d 1651
41063e9d 1652 if (skb->pkt_type != PACKET_HOST)
160eb5a6 1653 return;
41063e9d 1654
45f00f99 1655 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
160eb5a6 1656 return;
41063e9d
DM
1657
1658 iph = ip_hdr(skb);
45f00f99 1659 th = tcp_hdr(skb);
41063e9d
DM
1660
1661 if (th->doff < sizeof(struct tcphdr) / 4)
160eb5a6 1662 return;
41063e9d 1663
45f00f99 1664 sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
41063e9d 1665 iph->saddr, th->source,
7011d085 1666 iph->daddr, ntohs(th->dest),
9cb429d6 1667 skb->skb_iif);
41063e9d
DM
1668 if (sk) {
1669 skb->sk = sk;
1670 skb->destructor = sock_edemux;
1671 if (sk->sk_state != TCP_TIME_WAIT) {
1672 struct dst_entry *dst = sk->sk_rx_dst;
505fbcf0 1673
41063e9d
DM
1674 if (dst)
1675 dst = dst_check(dst, 0);
92101b3b 1676 if (dst &&
505fbcf0 1677 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
92101b3b 1678 skb_dst_set_noref(skb, dst);
41063e9d
DM
1679 }
1680 }
41063e9d
DM
1681}
1682
b2fb4f54
ED
1683/* Packet is added to VJ-style prequeue for processing in process
1684 * context, if a reader task is waiting. Apparently, this exciting
1685 * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
1686 * failed somewhere. Latency? Burstiness? Well, at least now we will
1687 * see, why it failed. 8)8) --ANK
1688 *
1689 */
1690bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1691{
1692 struct tcp_sock *tp = tcp_sk(sk);
1693
1694 if (sysctl_tcp_low_latency || !tp->ucopy.task)
1695 return false;
1696
1697 if (skb->len <= tcp_hdrlen(skb) &&
1698 skb_queue_len(&tp->ucopy.prequeue) == 0)
1699 return false;
1700
58717686 1701 skb_dst_force(skb);
b2fb4f54
ED
1702 __skb_queue_tail(&tp->ucopy.prequeue, skb);
1703 tp->ucopy.memory += skb->truesize;
1704 if (tp->ucopy.memory > sk->sk_rcvbuf) {
1705 struct sk_buff *skb1;
1706
1707 BUG_ON(sock_owned_by_user(sk));
1708
1709 while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
1710 sk_backlog_rcv(sk, skb1);
1711 NET_INC_STATS_BH(sock_net(sk),
1712 LINUX_MIB_TCPPREQUEUEDROPPED);
1713 }
1714
1715 tp->ucopy.memory = 0;
1716 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1717 wake_up_interruptible_sync_poll(sk_sleep(sk),
1718 POLLIN | POLLRDNORM | POLLRDBAND);
1719 if (!inet_csk_ack_scheduled(sk))
1720 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
1721 (3 * tcp_rto_min(sk)) / 4,
1722 TCP_RTO_MAX);
1723 }
1724 return true;
1725}
1726EXPORT_SYMBOL(tcp_prequeue);
1727
1da177e4
LT
1728/*
1729 * From tcp_input.c
1730 */
1731
1732int tcp_v4_rcv(struct sk_buff *skb)
1733{
eddc9ec5 1734 const struct iphdr *iph;
cf533ea5 1735 const struct tcphdr *th;
1da177e4
LT
1736 struct sock *sk;
1737 int ret;
a86b1e30 1738 struct net *net = dev_net(skb->dev);
1da177e4
LT
1739
1740 if (skb->pkt_type != PACKET_HOST)
1741 goto discard_it;
1742
1743 /* Count it even if it's bad */
63231bdd 1744 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1da177e4
LT
1745
1746 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1747 goto discard_it;
1748
aa8223c7 1749 th = tcp_hdr(skb);
1da177e4
LT
1750
1751 if (th->doff < sizeof(struct tcphdr) / 4)
1752 goto bad_packet;
1753 if (!pskb_may_pull(skb, th->doff * 4))
1754 goto discard_it;
1755
1756 /* An explanation is required here, I think.
1757 * Packet length and doff are validated by header prediction,
caa20d9a 1758 * provided case of th->doff==0 is eliminated.
1da177e4 1759 * So, we defer the checks. */
ed70fcfc
TH
1760
1761 if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
6a5dc9e5 1762 goto csum_error;
1da177e4 1763
aa8223c7 1764 th = tcp_hdr(skb);
eddc9ec5 1765 iph = ip_hdr(skb);
1da177e4
LT
1766 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1767 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1768 skb->len - th->doff * 4);
1769 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1770 TCP_SKB_CB(skb)->when = 0;
b82d1bb4 1771 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1da177e4
LT
1772 TCP_SKB_CB(skb)->sacked = 0;
1773
9a1f27c4 1774 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1da177e4
LT
1775 if (!sk)
1776 goto no_tcp_socket;
1777
bb134d5d
ED
1778process:
1779 if (sk->sk_state == TCP_TIME_WAIT)
1780 goto do_time_wait;
1781
6cce09f8
ED
1782 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1783 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
d218d111 1784 goto discard_and_relse;
6cce09f8 1785 }
d218d111 1786
1da177e4
LT
1787 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1788 goto discard_and_relse;
b59c2701 1789 nf_reset(skb);
1da177e4 1790
fda9ef5d 1791 if (sk_filter(sk, skb))
1da177e4
LT
1792 goto discard_and_relse;
1793
8b80cda5 1794 sk_mark_napi_id(sk, skb);
1da177e4
LT
1795 skb->dev = NULL;
1796
c6366184 1797 bh_lock_sock_nested(sk);
1da177e4
LT
1798 ret = 0;
1799 if (!sock_owned_by_user(sk)) {
1a2449a8
CL
1800#ifdef CONFIG_NET_DMA
1801 struct tcp_sock *tp = tcp_sk(sk);
1802 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
a2bd1140 1803 tp->ucopy.dma_chan = net_dma_find_channel();
1a2449a8 1804 if (tp->ucopy.dma_chan)
1da177e4 1805 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8
CL
1806 else
1807#endif
1808 {
1809 if (!tcp_prequeue(sk, skb))
ae8d7f88 1810 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8 1811 }
da882c1f
ED
1812 } else if (unlikely(sk_add_backlog(sk, skb,
1813 sk->sk_rcvbuf + sk->sk_sndbuf))) {
6b03a53a 1814 bh_unlock_sock(sk);
6cce09f8 1815 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
6b03a53a
ZY
1816 goto discard_and_relse;
1817 }
1da177e4
LT
1818 bh_unlock_sock(sk);
1819
1820 sock_put(sk);
1821
1822 return ret;
1823
1824no_tcp_socket:
1825 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1826 goto discard_it;
1827
1828 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
6a5dc9e5
ED
1829csum_error:
1830 TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
1da177e4 1831bad_packet:
63231bdd 1832 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1da177e4 1833 } else {
cfb6eeb4 1834 tcp_v4_send_reset(NULL, skb);
1da177e4
LT
1835 }
1836
1837discard_it:
1838 /* Discard frame. */
1839 kfree_skb(skb);
e905a9ed 1840 return 0;
1da177e4
LT
1841
1842discard_and_relse:
1843 sock_put(sk);
1844 goto discard_it;
1845
1846do_time_wait:
1847 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
9469c7b4 1848 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1849 goto discard_it;
1850 }
1851
6a5dc9e5 1852 if (skb->len < (th->doff << 2)) {
9469c7b4 1853 inet_twsk_put(inet_twsk(sk));
6a5dc9e5
ED
1854 goto bad_packet;
1855 }
1856 if (tcp_checksum_complete(skb)) {
1857 inet_twsk_put(inet_twsk(sk));
1858 goto csum_error;
1da177e4 1859 }
9469c7b4 1860 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1da177e4 1861 case TCP_TW_SYN: {
c346dca1 1862 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
c67499c0 1863 &tcp_hashinfo,
da5e3630 1864 iph->saddr, th->source,
eddc9ec5 1865 iph->daddr, th->dest,
463c84b9 1866 inet_iif(skb));
1da177e4 1867 if (sk2) {
9469c7b4
YH
1868 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1869 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1870 sk = sk2;
1871 goto process;
1872 }
1873 /* Fall through to ACK */
1874 }
1875 case TCP_TW_ACK:
1876 tcp_v4_timewait_ack(sk, skb);
1877 break;
1878 case TCP_TW_RST:
1879 goto no_tcp_socket;
1880 case TCP_TW_SUCCESS:;
1881 }
1882 goto discard_it;
1883}
1884
ccb7c410
DM
1885static struct timewait_sock_ops tcp_timewait_sock_ops = {
1886 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1887 .twsk_unique = tcp_twsk_unique,
1888 .twsk_destructor= tcp_twsk_destructor,
ccb7c410 1889};
1da177e4 1890
63d02d15 1891void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
5d299f3d
ED
1892{
1893 struct dst_entry *dst = skb_dst(skb);
1894
1895 dst_hold(dst);
1896 sk->sk_rx_dst = dst;
1897 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
1898}
63d02d15 1899EXPORT_SYMBOL(inet_sk_rx_dst_set);
5d299f3d 1900
3b401a81 1901const struct inet_connection_sock_af_ops ipv4_specific = {
543d9cfe
ACM
1902 .queue_xmit = ip_queue_xmit,
1903 .send_check = tcp_v4_send_check,
1904 .rebuild_header = inet_sk_rebuild_header,
5d299f3d 1905 .sk_rx_dst_set = inet_sk_rx_dst_set,
543d9cfe
ACM
1906 .conn_request = tcp_v4_conn_request,
1907 .syn_recv_sock = tcp_v4_syn_recv_sock,
543d9cfe
ACM
1908 .net_header_len = sizeof(struct iphdr),
1909 .setsockopt = ip_setsockopt,
1910 .getsockopt = ip_getsockopt,
1911 .addr2sockaddr = inet_csk_addr2sockaddr,
1912 .sockaddr_len = sizeof(struct sockaddr_in),
ab1e0a13 1913 .bind_conflict = inet_csk_bind_conflict,
3fdadf7d 1914#ifdef CONFIG_COMPAT
543d9cfe
ACM
1915 .compat_setsockopt = compat_ip_setsockopt,
1916 .compat_getsockopt = compat_ip_getsockopt,
3fdadf7d 1917#endif
1da177e4 1918};
4bc2f18b 1919EXPORT_SYMBOL(ipv4_specific);
1da177e4 1920
cfb6eeb4 1921#ifdef CONFIG_TCP_MD5SIG
b2e4b3de 1922static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
cfb6eeb4 1923 .md5_lookup = tcp_v4_md5_lookup,
49a72dfb 1924 .calc_md5_hash = tcp_v4_md5_hash_skb,
cfb6eeb4 1925 .md5_parse = tcp_v4_parse_md5_keys,
cfb6eeb4 1926};
b6332e6c 1927#endif
cfb6eeb4 1928
1da177e4
LT
1929/* NOTE: A lot of things set to zero explicitly by call to
1930 * sk_alloc() so need not be done here.
1931 */
1932static int tcp_v4_init_sock(struct sock *sk)
1933{
6687e988 1934 struct inet_connection_sock *icsk = inet_csk(sk);
1da177e4 1935
900f65d3 1936 tcp_init_sock(sk);
1da177e4 1937
8292a17a 1938 icsk->icsk_af_ops = &ipv4_specific;
900f65d3 1939
cfb6eeb4 1940#ifdef CONFIG_TCP_MD5SIG
ac807fa8 1941 tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
cfb6eeb4 1942#endif
1da177e4 1943
1da177e4
LT
1944 return 0;
1945}
1946
7d06b2e0 1947void tcp_v4_destroy_sock(struct sock *sk)
1da177e4
LT
1948{
1949 struct tcp_sock *tp = tcp_sk(sk);
1950
1951 tcp_clear_xmit_timers(sk);
1952
6687e988 1953 tcp_cleanup_congestion_control(sk);
317a76f9 1954
1da177e4 1955 /* Cleanup up the write buffer. */
fe067e8a 1956 tcp_write_queue_purge(sk);
1da177e4
LT
1957
1958 /* Cleans up our, hopefully empty, out_of_order_queue. */
e905a9ed 1959 __skb_queue_purge(&tp->out_of_order_queue);
1da177e4 1960
cfb6eeb4
YH
1961#ifdef CONFIG_TCP_MD5SIG
1962 /* Clean up the MD5 key list, if any */
1963 if (tp->md5sig_info) {
a915da9b 1964 tcp_clear_md5_list(sk);
a8afca03 1965 kfree_rcu(tp->md5sig_info, rcu);
cfb6eeb4
YH
1966 tp->md5sig_info = NULL;
1967 }
1968#endif
1969
1a2449a8
CL
1970#ifdef CONFIG_NET_DMA
1971 /* Cleans up our sk_async_wait_queue */
e905a9ed 1972 __skb_queue_purge(&sk->sk_async_wait_queue);
1a2449a8
CL
1973#endif
1974
1da177e4
LT
1975 /* Clean prequeue, it must be empty really */
1976 __skb_queue_purge(&tp->ucopy.prequeue);
1977
1978 /* Clean up a referenced TCP bind bucket. */
463c84b9 1979 if (inet_csk(sk)->icsk_bind_hash)
ab1e0a13 1980 inet_put_port(sk);
1da177e4 1981
168a8f58 1982 BUG_ON(tp->fastopen_rsk != NULL);
435cf559 1983
cf60af03
YC
1984 /* If socket is aborted during connect operation */
1985 tcp_free_fastopen_req(tp);
1986
180d8cd9 1987 sk_sockets_allocated_dec(sk);
d1a4c0b3 1988 sock_release_memcg(sk);
1da177e4 1989}
1da177e4
LT
1990EXPORT_SYMBOL(tcp_v4_destroy_sock);
1991
1992#ifdef CONFIG_PROC_FS
1993/* Proc filesystem TCP sock list dumping. */
1994
a8b690f9
TH
1995/*
1996 * Get next listener socket follow cur. If cur is NULL, get first socket
1997 * starting from bucket given in st->bucket; when st->bucket is zero the
1998 * very first socket in the hash table is returned.
1999 */
1da177e4
LT
2000static void *listening_get_next(struct seq_file *seq, void *cur)
2001{
463c84b9 2002 struct inet_connection_sock *icsk;
c25eb3bf 2003 struct hlist_nulls_node *node;
1da177e4 2004 struct sock *sk = cur;
5caea4ea 2005 struct inet_listen_hashbucket *ilb;
5799de0b 2006 struct tcp_iter_state *st = seq->private;
a4146b1b 2007 struct net *net = seq_file_net(seq);
1da177e4
LT
2008
2009 if (!sk) {
a8b690f9 2010 ilb = &tcp_hashinfo.listening_hash[st->bucket];
5caea4ea 2011 spin_lock_bh(&ilb->lock);
c25eb3bf 2012 sk = sk_nulls_head(&ilb->head);
a8b690f9 2013 st->offset = 0;
1da177e4
LT
2014 goto get_sk;
2015 }
5caea4ea 2016 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1da177e4 2017 ++st->num;
a8b690f9 2018 ++st->offset;
1da177e4
LT
2019
2020 if (st->state == TCP_SEQ_STATE_OPENREQ) {
60236fdd 2021 struct request_sock *req = cur;
1da177e4 2022
72a3effa 2023 icsk = inet_csk(st->syn_wait_sk);
1da177e4
LT
2024 req = req->dl_next;
2025 while (1) {
2026 while (req) {
bdccc4ca 2027 if (req->rsk_ops->family == st->family) {
1da177e4
LT
2028 cur = req;
2029 goto out;
2030 }
2031 req = req->dl_next;
2032 }
72a3effa 2033 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
1da177e4
LT
2034 break;
2035get_req:
463c84b9 2036 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
1da177e4 2037 }
1bde5ac4 2038 sk = sk_nulls_next(st->syn_wait_sk);
1da177e4 2039 st->state = TCP_SEQ_STATE_LISTENING;
463c84b9 2040 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2041 } else {
e905a9ed 2042 icsk = inet_csk(sk);
463c84b9
ACM
2043 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2044 if (reqsk_queue_len(&icsk->icsk_accept_queue))
1da177e4 2045 goto start_req;
463c84b9 2046 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1bde5ac4 2047 sk = sk_nulls_next(sk);
1da177e4
LT
2048 }
2049get_sk:
c25eb3bf 2050 sk_nulls_for_each_from(sk, node) {
8475ef9f
PE
2051 if (!net_eq(sock_net(sk), net))
2052 continue;
2053 if (sk->sk_family == st->family) {
1da177e4
LT
2054 cur = sk;
2055 goto out;
2056 }
e905a9ed 2057 icsk = inet_csk(sk);
463c84b9
ACM
2058 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2059 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
1da177e4
LT
2060start_req:
2061 st->uid = sock_i_uid(sk);
2062 st->syn_wait_sk = sk;
2063 st->state = TCP_SEQ_STATE_OPENREQ;
2064 st->sbucket = 0;
2065 goto get_req;
2066 }
463c84b9 2067 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2068 }
5caea4ea 2069 spin_unlock_bh(&ilb->lock);
a8b690f9 2070 st->offset = 0;
0f7ff927 2071 if (++st->bucket < INET_LHTABLE_SIZE) {
5caea4ea
ED
2072 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2073 spin_lock_bh(&ilb->lock);
c25eb3bf 2074 sk = sk_nulls_head(&ilb->head);
1da177e4
LT
2075 goto get_sk;
2076 }
2077 cur = NULL;
2078out:
2079 return cur;
2080}
2081
2082static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2083{
a8b690f9
TH
2084 struct tcp_iter_state *st = seq->private;
2085 void *rc;
2086
2087 st->bucket = 0;
2088 st->offset = 0;
2089 rc = listening_get_next(seq, NULL);
1da177e4
LT
2090
2091 while (rc && *pos) {
2092 rc = listening_get_next(seq, rc);
2093 --*pos;
2094 }
2095 return rc;
2096}
2097
05dbc7b5 2098static inline bool empty_bucket(const struct tcp_iter_state *st)
6eac5604 2099{
05dbc7b5 2100 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
6eac5604
AK
2101}
2102
a8b690f9
TH
2103/*
2104 * Get first established socket starting from bucket given in st->bucket.
2105 * If st->bucket is zero, the very first socket in the hash is returned.
2106 */
1da177e4
LT
2107static void *established_get_first(struct seq_file *seq)
2108{
5799de0b 2109 struct tcp_iter_state *st = seq->private;
a4146b1b 2110 struct net *net = seq_file_net(seq);
1da177e4
LT
2111 void *rc = NULL;
2112
a8b690f9
TH
2113 st->offset = 0;
2114 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
1da177e4 2115 struct sock *sk;
3ab5aee7 2116 struct hlist_nulls_node *node;
9db66bdc 2117 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
1da177e4 2118
6eac5604
AK
2119 /* Lockless fast path for the common case of empty buckets */
2120 if (empty_bucket(st))
2121 continue;
2122
9db66bdc 2123 spin_lock_bh(lock);
3ab5aee7 2124 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
f40c8174 2125 if (sk->sk_family != st->family ||
878628fb 2126 !net_eq(sock_net(sk), net)) {
1da177e4
LT
2127 continue;
2128 }
2129 rc = sk;
2130 goto out;
2131 }
9db66bdc 2132 spin_unlock_bh(lock);
1da177e4
LT
2133 }
2134out:
2135 return rc;
2136}
2137
2138static void *established_get_next(struct seq_file *seq, void *cur)
2139{
2140 struct sock *sk = cur;
3ab5aee7 2141 struct hlist_nulls_node *node;
5799de0b 2142 struct tcp_iter_state *st = seq->private;
a4146b1b 2143 struct net *net = seq_file_net(seq);
1da177e4
LT
2144
2145 ++st->num;
a8b690f9 2146 ++st->offset;
1da177e4 2147
05dbc7b5 2148 sk = sk_nulls_next(sk);
1da177e4 2149
3ab5aee7 2150 sk_nulls_for_each_from(sk, node) {
878628fb 2151 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
05dbc7b5 2152 return sk;
1da177e4
LT
2153 }
2154
05dbc7b5
ED
2155 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2156 ++st->bucket;
2157 return established_get_first(seq);
1da177e4
LT
2158}
2159
2160static void *established_get_idx(struct seq_file *seq, loff_t pos)
2161{
a8b690f9
TH
2162 struct tcp_iter_state *st = seq->private;
2163 void *rc;
2164
2165 st->bucket = 0;
2166 rc = established_get_first(seq);
1da177e4
LT
2167
2168 while (rc && pos) {
2169 rc = established_get_next(seq, rc);
2170 --pos;
7174259e 2171 }
1da177e4
LT
2172 return rc;
2173}
2174
2175static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2176{
2177 void *rc;
5799de0b 2178 struct tcp_iter_state *st = seq->private;
1da177e4 2179
1da177e4
LT
2180 st->state = TCP_SEQ_STATE_LISTENING;
2181 rc = listening_get_idx(seq, &pos);
2182
2183 if (!rc) {
1da177e4
LT
2184 st->state = TCP_SEQ_STATE_ESTABLISHED;
2185 rc = established_get_idx(seq, pos);
2186 }
2187
2188 return rc;
2189}
2190
a8b690f9
TH
2191static void *tcp_seek_last_pos(struct seq_file *seq)
2192{
2193 struct tcp_iter_state *st = seq->private;
2194 int offset = st->offset;
2195 int orig_num = st->num;
2196 void *rc = NULL;
2197
2198 switch (st->state) {
2199 case TCP_SEQ_STATE_OPENREQ:
2200 case TCP_SEQ_STATE_LISTENING:
2201 if (st->bucket >= INET_LHTABLE_SIZE)
2202 break;
2203 st->state = TCP_SEQ_STATE_LISTENING;
2204 rc = listening_get_next(seq, NULL);
2205 while (offset-- && rc)
2206 rc = listening_get_next(seq, rc);
2207 if (rc)
2208 break;
2209 st->bucket = 0;
05dbc7b5 2210 st->state = TCP_SEQ_STATE_ESTABLISHED;
a8b690f9
TH
2211 /* Fallthrough */
2212 case TCP_SEQ_STATE_ESTABLISHED:
a8b690f9
TH
2213 if (st->bucket > tcp_hashinfo.ehash_mask)
2214 break;
2215 rc = established_get_first(seq);
2216 while (offset-- && rc)
2217 rc = established_get_next(seq, rc);
2218 }
2219
2220 st->num = orig_num;
2221
2222 return rc;
2223}
2224
1da177e4
LT
2225static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2226{
5799de0b 2227 struct tcp_iter_state *st = seq->private;
a8b690f9
TH
2228 void *rc;
2229
2230 if (*pos && *pos == st->last_pos) {
2231 rc = tcp_seek_last_pos(seq);
2232 if (rc)
2233 goto out;
2234 }
2235
1da177e4
LT
2236 st->state = TCP_SEQ_STATE_LISTENING;
2237 st->num = 0;
a8b690f9
TH
2238 st->bucket = 0;
2239 st->offset = 0;
2240 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2241
2242out:
2243 st->last_pos = *pos;
2244 return rc;
1da177e4
LT
2245}
2246
2247static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2248{
a8b690f9 2249 struct tcp_iter_state *st = seq->private;
1da177e4 2250 void *rc = NULL;
1da177e4
LT
2251
2252 if (v == SEQ_START_TOKEN) {
2253 rc = tcp_get_idx(seq, 0);
2254 goto out;
2255 }
1da177e4
LT
2256
2257 switch (st->state) {
2258 case TCP_SEQ_STATE_OPENREQ:
2259 case TCP_SEQ_STATE_LISTENING:
2260 rc = listening_get_next(seq, v);
2261 if (!rc) {
1da177e4 2262 st->state = TCP_SEQ_STATE_ESTABLISHED;
a8b690f9
TH
2263 st->bucket = 0;
2264 st->offset = 0;
1da177e4
LT
2265 rc = established_get_first(seq);
2266 }
2267 break;
2268 case TCP_SEQ_STATE_ESTABLISHED:
1da177e4
LT
2269 rc = established_get_next(seq, v);
2270 break;
2271 }
2272out:
2273 ++*pos;
a8b690f9 2274 st->last_pos = *pos;
1da177e4
LT
2275 return rc;
2276}
2277
2278static void tcp_seq_stop(struct seq_file *seq, void *v)
2279{
5799de0b 2280 struct tcp_iter_state *st = seq->private;
1da177e4
LT
2281
2282 switch (st->state) {
2283 case TCP_SEQ_STATE_OPENREQ:
2284 if (v) {
463c84b9
ACM
2285 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2286 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4
LT
2287 }
2288 case TCP_SEQ_STATE_LISTENING:
2289 if (v != SEQ_START_TOKEN)
5caea4ea 2290 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
1da177e4 2291 break;
1da177e4
LT
2292 case TCP_SEQ_STATE_ESTABLISHED:
2293 if (v)
9db66bdc 2294 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1da177e4
LT
2295 break;
2296 }
2297}
2298
73cb88ec 2299int tcp_seq_open(struct inode *inode, struct file *file)
1da177e4 2300{
d9dda78b 2301 struct tcp_seq_afinfo *afinfo = PDE_DATA(inode);
1da177e4 2302 struct tcp_iter_state *s;
52d6f3f1 2303 int err;
1da177e4 2304
52d6f3f1
DL
2305 err = seq_open_net(inode, file, &afinfo->seq_ops,
2306 sizeof(struct tcp_iter_state));
2307 if (err < 0)
2308 return err;
f40c8174 2309
52d6f3f1 2310 s = ((struct seq_file *)file->private_data)->private;
1da177e4 2311 s->family = afinfo->family;
a8b690f9 2312 s->last_pos = 0;
f40c8174
DL
2313 return 0;
2314}
73cb88ec 2315EXPORT_SYMBOL(tcp_seq_open);
f40c8174 2316
6f8b13bc 2317int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4
LT
2318{
2319 int rc = 0;
2320 struct proc_dir_entry *p;
2321
9427c4b3
DL
2322 afinfo->seq_ops.start = tcp_seq_start;
2323 afinfo->seq_ops.next = tcp_seq_next;
2324 afinfo->seq_ops.stop = tcp_seq_stop;
2325
84841c3c 2326 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
73cb88ec 2327 afinfo->seq_fops, afinfo);
84841c3c 2328 if (!p)
1da177e4
LT
2329 rc = -ENOMEM;
2330 return rc;
2331}
4bc2f18b 2332EXPORT_SYMBOL(tcp_proc_register);
1da177e4 2333
6f8b13bc 2334void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4 2335{
ece31ffd 2336 remove_proc_entry(afinfo->name, net->proc_net);
1da177e4 2337}
4bc2f18b 2338EXPORT_SYMBOL(tcp_proc_unregister);
1da177e4 2339
cf533ea5 2340static void get_openreq4(const struct sock *sk, const struct request_sock *req,
652586df 2341 struct seq_file *f, int i, kuid_t uid)
1da177e4 2342{
2e6599cb 2343 const struct inet_request_sock *ireq = inet_rsk(req);
a399a805 2344 long delta = req->expires - jiffies;
1da177e4 2345
5e659e4c 2346 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
652586df 2347 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
1da177e4 2348 i,
634fb979 2349 ireq->ir_loc_addr,
c720c7e8 2350 ntohs(inet_sk(sk)->inet_sport),
634fb979
ED
2351 ireq->ir_rmt_addr,
2352 ntohs(ireq->ir_rmt_port),
1da177e4
LT
2353 TCP_SYN_RECV,
2354 0, 0, /* could print option size, but that is af dependent. */
2355 1, /* timers active (only the expire timer) */
a399a805 2356 jiffies_delta_to_clock_t(delta),
e6c022a4 2357 req->num_timeout,
a7cb5a49 2358 from_kuid_munged(seq_user_ns(f), uid),
1da177e4
LT
2359 0, /* non standard timer */
2360 0, /* open_requests have no inode */
2361 atomic_read(&sk->sk_refcnt),
652586df 2362 req);
1da177e4
LT
2363}
2364
652586df 2365static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
1da177e4
LT
2366{
2367 int timer_active;
2368 unsigned long timer_expires;
cf533ea5 2369 const struct tcp_sock *tp = tcp_sk(sk);
cf4c6bf8 2370 const struct inet_connection_sock *icsk = inet_csk(sk);
cf533ea5 2371 const struct inet_sock *inet = inet_sk(sk);
168a8f58 2372 struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
c720c7e8
ED
2373 __be32 dest = inet->inet_daddr;
2374 __be32 src = inet->inet_rcv_saddr;
2375 __u16 destp = ntohs(inet->inet_dport);
2376 __u16 srcp = ntohs(inet->inet_sport);
49d09007 2377 int rx_queue;
1da177e4 2378
6ba8a3b1
ND
2379 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2380 icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
2381 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1da177e4 2382 timer_active = 1;
463c84b9
ACM
2383 timer_expires = icsk->icsk_timeout;
2384 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1da177e4 2385 timer_active = 4;
463c84b9 2386 timer_expires = icsk->icsk_timeout;
cf4c6bf8 2387 } else if (timer_pending(&sk->sk_timer)) {
1da177e4 2388 timer_active = 2;
cf4c6bf8 2389 timer_expires = sk->sk_timer.expires;
1da177e4
LT
2390 } else {
2391 timer_active = 0;
2392 timer_expires = jiffies;
2393 }
2394
49d09007
ED
2395 if (sk->sk_state == TCP_LISTEN)
2396 rx_queue = sk->sk_ack_backlog;
2397 else
2398 /*
2399 * because we dont lock socket, we might find a transient negative value
2400 */
2401 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2402
5e659e4c 2403 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
652586df 2404 "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
cf4c6bf8 2405 i, src, srcp, dest, destp, sk->sk_state,
47da8ee6 2406 tp->write_seq - tp->snd_una,
49d09007 2407 rx_queue,
1da177e4 2408 timer_active,
a399a805 2409 jiffies_delta_to_clock_t(timer_expires - jiffies),
463c84b9 2410 icsk->icsk_retransmits,
a7cb5a49 2411 from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
6687e988 2412 icsk->icsk_probes_out,
cf4c6bf8
IJ
2413 sock_i_ino(sk),
2414 atomic_read(&sk->sk_refcnt), sk,
7be87351
SH
2415 jiffies_to_clock_t(icsk->icsk_rto),
2416 jiffies_to_clock_t(icsk->icsk_ack.ato),
463c84b9 2417 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1da177e4 2418 tp->snd_cwnd,
168a8f58
JC
2419 sk->sk_state == TCP_LISTEN ?
2420 (fastopenq ? fastopenq->max_qlen : 0) :
652586df 2421 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
1da177e4
LT
2422}
2423
cf533ea5 2424static void get_timewait4_sock(const struct inet_timewait_sock *tw,
652586df 2425 struct seq_file *f, int i)
1da177e4 2426{
23f33c2d 2427 __be32 dest, src;
1da177e4 2428 __u16 destp, srcp;
e2a1d3e4 2429 s32 delta = tw->tw_ttd - inet_tw_time_stamp();
1da177e4
LT
2430
2431 dest = tw->tw_daddr;
2432 src = tw->tw_rcv_saddr;
2433 destp = ntohs(tw->tw_dport);
2434 srcp = ntohs(tw->tw_sport);
2435
5e659e4c 2436 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
652586df 2437 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
1da177e4 2438 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
a399a805 2439 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
652586df 2440 atomic_read(&tw->tw_refcnt), tw);
1da177e4
LT
2441}
2442
2443#define TMPSZ 150
2444
2445static int tcp4_seq_show(struct seq_file *seq, void *v)
2446{
5799de0b 2447 struct tcp_iter_state *st;
05dbc7b5 2448 struct sock *sk = v;
1da177e4 2449
652586df 2450 seq_setwidth(seq, TMPSZ - 1);
1da177e4 2451 if (v == SEQ_START_TOKEN) {
652586df 2452 seq_puts(seq, " sl local_address rem_address st tx_queue "
1da177e4
LT
2453 "rx_queue tr tm->when retrnsmt uid timeout "
2454 "inode");
2455 goto out;
2456 }
2457 st = seq->private;
2458
2459 switch (st->state) {
2460 case TCP_SEQ_STATE_LISTENING:
2461 case TCP_SEQ_STATE_ESTABLISHED:
05dbc7b5 2462 if (sk->sk_state == TCP_TIME_WAIT)
652586df 2463 get_timewait4_sock(v, seq, st->num);
05dbc7b5 2464 else
652586df 2465 get_tcp4_sock(v, seq, st->num);
1da177e4
LT
2466 break;
2467 case TCP_SEQ_STATE_OPENREQ:
652586df 2468 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid);
1da177e4
LT
2469 break;
2470 }
1da177e4 2471out:
652586df 2472 seq_pad(seq, '\n');
1da177e4
LT
2473 return 0;
2474}
2475
73cb88ec
AV
2476static const struct file_operations tcp_afinfo_seq_fops = {
2477 .owner = THIS_MODULE,
2478 .open = tcp_seq_open,
2479 .read = seq_read,
2480 .llseek = seq_lseek,
2481 .release = seq_release_net
2482};
2483
1da177e4 2484static struct tcp_seq_afinfo tcp4_seq_afinfo = {
1da177e4
LT
2485 .name = "tcp",
2486 .family = AF_INET,
73cb88ec 2487 .seq_fops = &tcp_afinfo_seq_fops,
9427c4b3
DL
2488 .seq_ops = {
2489 .show = tcp4_seq_show,
2490 },
1da177e4
LT
2491};
2492
2c8c1e72 2493static int __net_init tcp4_proc_init_net(struct net *net)
757764f6
PE
2494{
2495 return tcp_proc_register(net, &tcp4_seq_afinfo);
2496}
2497
2c8c1e72 2498static void __net_exit tcp4_proc_exit_net(struct net *net)
757764f6
PE
2499{
2500 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2501}
2502
2503static struct pernet_operations tcp4_net_ops = {
2504 .init = tcp4_proc_init_net,
2505 .exit = tcp4_proc_exit_net,
2506};
2507
1da177e4
LT
2508int __init tcp4_proc_init(void)
2509{
757764f6 2510 return register_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2511}
2512
2513void tcp4_proc_exit(void)
2514{
757764f6 2515 unregister_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2516}
2517#endif /* CONFIG_PROC_FS */
2518
2519struct proto tcp_prot = {
2520 .name = "TCP",
2521 .owner = THIS_MODULE,
2522 .close = tcp_close,
2523 .connect = tcp_v4_connect,
2524 .disconnect = tcp_disconnect,
463c84b9 2525 .accept = inet_csk_accept,
1da177e4
LT
2526 .ioctl = tcp_ioctl,
2527 .init = tcp_v4_init_sock,
2528 .destroy = tcp_v4_destroy_sock,
2529 .shutdown = tcp_shutdown,
2530 .setsockopt = tcp_setsockopt,
2531 .getsockopt = tcp_getsockopt,
1da177e4 2532 .recvmsg = tcp_recvmsg,
7ba42910
CG
2533 .sendmsg = tcp_sendmsg,
2534 .sendpage = tcp_sendpage,
1da177e4 2535 .backlog_rcv = tcp_v4_do_rcv,
46d3ceab 2536 .release_cb = tcp_release_cb,
563d34d0 2537 .mtu_reduced = tcp_v4_mtu_reduced,
ab1e0a13
ACM
2538 .hash = inet_hash,
2539 .unhash = inet_unhash,
2540 .get_port = inet_csk_get_port,
1da177e4 2541 .enter_memory_pressure = tcp_enter_memory_pressure,
c9bee3b7 2542 .stream_memory_free = tcp_stream_memory_free,
1da177e4 2543 .sockets_allocated = &tcp_sockets_allocated,
0a5578cf 2544 .orphan_count = &tcp_orphan_count,
1da177e4
LT
2545 .memory_allocated = &tcp_memory_allocated,
2546 .memory_pressure = &tcp_memory_pressure,
a4fe34bf 2547 .sysctl_mem = sysctl_tcp_mem,
1da177e4
LT
2548 .sysctl_wmem = sysctl_tcp_wmem,
2549 .sysctl_rmem = sysctl_tcp_rmem,
2550 .max_header = MAX_TCP_HEADER,
2551 .obj_size = sizeof(struct tcp_sock),
3ab5aee7 2552 .slab_flags = SLAB_DESTROY_BY_RCU,
6d6ee43e 2553 .twsk_prot = &tcp_timewait_sock_ops,
60236fdd 2554 .rsk_prot = &tcp_request_sock_ops,
39d8cda7 2555 .h.hashinfo = &tcp_hashinfo,
7ba42910 2556 .no_autobind = true,
543d9cfe
ACM
2557#ifdef CONFIG_COMPAT
2558 .compat_setsockopt = compat_tcp_setsockopt,
2559 .compat_getsockopt = compat_tcp_getsockopt,
2560#endif
c255a458 2561#ifdef CONFIG_MEMCG_KMEM
d1a4c0b3
GC
2562 .init_cgroup = tcp_init_cgroup,
2563 .destroy_cgroup = tcp_destroy_cgroup,
2564 .proto_cgroup = tcp_proto_cgroup,
2565#endif
1da177e4 2566};
4bc2f18b 2567EXPORT_SYMBOL(tcp_prot);
1da177e4 2568
046ee902
DL
2569static int __net_init tcp_sk_init(struct net *net)
2570{
5d134f1c 2571 net->ipv4.sysctl_tcp_ecn = 2;
be9f4a44 2572 return 0;
046ee902
DL
2573}
2574
2575static void __net_exit tcp_sk_exit(struct net *net)
2576{
b099ce26
EB
2577}
2578
2579static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2580{
2581 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
046ee902
DL
2582}
2583
2584static struct pernet_operations __net_initdata tcp_sk_ops = {
b099ce26
EB
2585 .init = tcp_sk_init,
2586 .exit = tcp_sk_exit,
2587 .exit_batch = tcp_sk_exit_batch,
046ee902
DL
2588};
2589
9b0f976f 2590void __init tcp_v4_init(void)
1da177e4 2591{
5caea4ea 2592 inet_hashinfo_init(&tcp_hashinfo);
6a1b3054 2593 if (register_pernet_subsys(&tcp_sk_ops))
1da177e4 2594 panic("Failed to create the TCP control socket.\n");
1da177e4 2595}
This page took 1.47473 seconds and 5 git commands to generate.