tcp: TCP Small Queues
[deliverable/linux.git] / net / ipv4 / tcp_ipv4.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
1da177e4
LT
8 * IPv4 specific functions
9 *
10 *
11 * code split from:
12 * linux/ipv4/tcp.c
13 * linux/ipv4/tcp_input.c
14 * linux/ipv4/tcp_output.c
15 *
16 * See tcp.c for author information
17 *
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation; either version
21 * 2 of the License, or (at your option) any later version.
22 */
23
24/*
25 * Changes:
26 * David S. Miller : New socket lookup architecture.
27 * This code is dedicated to John Dyson.
28 * David S. Miller : Change semantics of established hash,
29 * half is devoted to TIME_WAIT sockets
30 * and the rest go in the other half.
31 * Andi Kleen : Add support for syncookies and fixed
32 * some bugs: ip options weren't passed to
33 * the TCP layer, missed a check for an
34 * ACK bit.
35 * Andi Kleen : Implemented fast path mtu discovery.
36 * Fixed many serious bugs in the
60236fdd 37 * request_sock handling and moved
1da177e4
LT
38 * most of it into the af independent code.
39 * Added tail drop and some other bugfixes.
caa20d9a 40 * Added new listen semantics.
1da177e4
LT
41 * Mike McLagan : Routing by source
42 * Juan Jose Ciarlante: ip_dynaddr bits
43 * Andi Kleen: various fixes.
44 * Vitaly E. Lavrov : Transparent proxy revived after year
45 * coma.
46 * Andi Kleen : Fix new listen.
47 * Andi Kleen : Fix accept error reporting.
48 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
49 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
50 * a single port at the same time.
51 */
52
afd46503 53#define pr_fmt(fmt) "TCP: " fmt
1da177e4 54
eb4dea58 55#include <linux/bottom_half.h>
1da177e4
LT
56#include <linux/types.h>
57#include <linux/fcntl.h>
58#include <linux/module.h>
59#include <linux/random.h>
60#include <linux/cache.h>
61#include <linux/jhash.h>
62#include <linux/init.h>
63#include <linux/times.h>
5a0e3ad6 64#include <linux/slab.h>
1da177e4 65
457c4cbc 66#include <net/net_namespace.h>
1da177e4 67#include <net/icmp.h>
304a1618 68#include <net/inet_hashtables.h>
1da177e4 69#include <net/tcp.h>
20380731 70#include <net/transp_v6.h>
1da177e4
LT
71#include <net/ipv6.h>
72#include <net/inet_common.h>
6d6ee43e 73#include <net/timewait_sock.h>
1da177e4 74#include <net/xfrm.h>
1a2449a8 75#include <net/netdma.h>
6e5714ea 76#include <net/secure_seq.h>
d1a4c0b3 77#include <net/tcp_memcontrol.h>
1da177e4
LT
78
79#include <linux/inet.h>
80#include <linux/ipv6.h>
81#include <linux/stddef.h>
82#include <linux/proc_fs.h>
83#include <linux/seq_file.h>
84
cfb6eeb4
YH
85#include <linux/crypto.h>
86#include <linux/scatterlist.h>
87
ab32ea5d
BH
88int sysctl_tcp_tw_reuse __read_mostly;
89int sysctl_tcp_low_latency __read_mostly;
4bc2f18b 90EXPORT_SYMBOL(sysctl_tcp_low_latency);
1da177e4 91
1da177e4 92
cfb6eeb4 93#ifdef CONFIG_TCP_MD5SIG
a915da9b 94static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
318cf7aa 95 __be32 daddr, __be32 saddr, const struct tcphdr *th);
cfb6eeb4
YH
96#endif
97
5caea4ea 98struct inet_hashinfo tcp_hashinfo;
4bc2f18b 99EXPORT_SYMBOL(tcp_hashinfo);
1da177e4 100
cf533ea5 101static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
1da177e4 102{
eddc9ec5
ACM
103 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
104 ip_hdr(skb)->saddr,
aa8223c7
ACM
105 tcp_hdr(skb)->dest,
106 tcp_hdr(skb)->source);
1da177e4
LT
107}
108
6d6ee43e
ACM
109int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
110{
111 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
112 struct tcp_sock *tp = tcp_sk(sk);
113
114 /* With PAWS, it is safe from the viewpoint
115 of data integrity. Even without PAWS it is safe provided sequence
116 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
117
118 Actually, the idea is close to VJ's one, only timestamp cache is
119 held not per host, but per port pair and TW bucket is used as state
120 holder.
121
122 If TW bucket has been already destroyed we fall back to VJ's scheme
123 and use initial timestamp retrieved from peer table.
124 */
125 if (tcptw->tw_ts_recent_stamp &&
126 (twp == NULL || (sysctl_tcp_tw_reuse &&
9d729f72 127 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
6d6ee43e
ACM
128 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
129 if (tp->write_seq == 0)
130 tp->write_seq = 1;
131 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
132 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
133 sock_hold(sktw);
134 return 1;
135 }
136
137 return 0;
138}
6d6ee43e
ACM
139EXPORT_SYMBOL_GPL(tcp_twsk_unique);
140
ee995283
PE
141static int tcp_repair_connect(struct sock *sk)
142{
143 tcp_connect_init(sk);
144 tcp_finish_connect(sk, NULL);
145
146 return 0;
147}
148
1da177e4
LT
149/* This will initiate an outgoing connection. */
150int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
151{
2d7192d6 152 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
1da177e4
LT
153 struct inet_sock *inet = inet_sk(sk);
154 struct tcp_sock *tp = tcp_sk(sk);
dca8b089 155 __be16 orig_sport, orig_dport;
bada8adc 156 __be32 daddr, nexthop;
da905bd1 157 struct flowi4 *fl4;
2d7192d6 158 struct rtable *rt;
1da177e4 159 int err;
f6d8bd05 160 struct ip_options_rcu *inet_opt;
1da177e4
LT
161
162 if (addr_len < sizeof(struct sockaddr_in))
163 return -EINVAL;
164
165 if (usin->sin_family != AF_INET)
166 return -EAFNOSUPPORT;
167
168 nexthop = daddr = usin->sin_addr.s_addr;
f6d8bd05
ED
169 inet_opt = rcu_dereference_protected(inet->inet_opt,
170 sock_owned_by_user(sk));
171 if (inet_opt && inet_opt->opt.srr) {
1da177e4
LT
172 if (!daddr)
173 return -EINVAL;
f6d8bd05 174 nexthop = inet_opt->opt.faddr;
1da177e4
LT
175 }
176
dca8b089
DM
177 orig_sport = inet->inet_sport;
178 orig_dport = usin->sin_port;
da905bd1
DM
179 fl4 = &inet->cork.fl.u.ip4;
180 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
b23dd4fe
DM
181 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
182 IPPROTO_TCP,
183 orig_sport, orig_dport, sk, true);
184 if (IS_ERR(rt)) {
185 err = PTR_ERR(rt);
186 if (err == -ENETUNREACH)
7c73a6fa 187 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
b23dd4fe 188 return err;
584bdf8c 189 }
1da177e4
LT
190
191 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
192 ip_rt_put(rt);
193 return -ENETUNREACH;
194 }
195
f6d8bd05 196 if (!inet_opt || !inet_opt->opt.srr)
da905bd1 197 daddr = fl4->daddr;
1da177e4 198
c720c7e8 199 if (!inet->inet_saddr)
da905bd1 200 inet->inet_saddr = fl4->saddr;
c720c7e8 201 inet->inet_rcv_saddr = inet->inet_saddr;
1da177e4 202
c720c7e8 203 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
1da177e4
LT
204 /* Reset inherited state */
205 tp->rx_opt.ts_recent = 0;
206 tp->rx_opt.ts_recent_stamp = 0;
ee995283
PE
207 if (likely(!tp->repair))
208 tp->write_seq = 0;
1da177e4
LT
209 }
210
295ff7ed 211 if (tcp_death_row.sysctl_tw_recycle &&
81166dd6
DM
212 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
213 tcp_fetch_timewait_stamp(sk, &rt->dst);
1da177e4 214
c720c7e8
ED
215 inet->inet_dport = usin->sin_port;
216 inet->inet_daddr = daddr;
1da177e4 217
d83d8461 218 inet_csk(sk)->icsk_ext_hdr_len = 0;
f6d8bd05
ED
219 if (inet_opt)
220 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1da177e4 221
bee7ca9e 222 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
1da177e4
LT
223
224 /* Socket identity is still unknown (sport may be zero).
225 * However we set state to SYN-SENT and not releasing socket
226 * lock select source port, enter ourselves into the hash tables and
227 * complete initialization after this.
228 */
229 tcp_set_state(sk, TCP_SYN_SENT);
a7f5e7f1 230 err = inet_hash_connect(&tcp_death_row, sk);
1da177e4
LT
231 if (err)
232 goto failure;
233
da905bd1 234 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
b23dd4fe
DM
235 inet->inet_sport, inet->inet_dport, sk);
236 if (IS_ERR(rt)) {
237 err = PTR_ERR(rt);
238 rt = NULL;
1da177e4 239 goto failure;
b23dd4fe 240 }
1da177e4 241 /* OK, now commit destination to socket. */
bcd76111 242 sk->sk_gso_type = SKB_GSO_TCPV4;
d8d1f30b 243 sk_setup_caps(sk, &rt->dst);
1da177e4 244
ee995283 245 if (!tp->write_seq && likely(!tp->repair))
c720c7e8
ED
246 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
247 inet->inet_daddr,
248 inet->inet_sport,
1da177e4
LT
249 usin->sin_port);
250
c720c7e8 251 inet->inet_id = tp->write_seq ^ jiffies;
1da177e4 252
ee995283
PE
253 if (likely(!tp->repair))
254 err = tcp_connect(sk);
255 else
256 err = tcp_repair_connect(sk);
257
1da177e4
LT
258 rt = NULL;
259 if (err)
260 goto failure;
261
262 return 0;
263
264failure:
7174259e
ACM
265 /*
266 * This unhashes the socket and releases the local port,
267 * if necessary.
268 */
1da177e4
LT
269 tcp_set_state(sk, TCP_CLOSE);
270 ip_rt_put(rt);
271 sk->sk_route_caps = 0;
c720c7e8 272 inet->inet_dport = 0;
1da177e4
LT
273 return err;
274}
4bc2f18b 275EXPORT_SYMBOL(tcp_v4_connect);
1da177e4 276
1da177e4
LT
277/*
278 * This routine does path mtu discovery as defined in RFC1191.
279 */
b71d1d42 280static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu)
1da177e4
LT
281{
282 struct dst_entry *dst;
283 struct inet_sock *inet = inet_sk(sk);
1da177e4
LT
284
285 /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
286 * send out by Linux are always <576bytes so they should go through
287 * unfragmented).
288 */
289 if (sk->sk_state == TCP_LISTEN)
290 return;
291
292 /* We don't check in the destentry if pmtu discovery is forbidden
293 * on this route. We just assume that no packet_to_big packets
294 * are send back when pmtu discovery is not active.
e905a9ed 295 * There is a small race when the user changes this flag in the
1da177e4
LT
296 * route, but I think that's acceptable.
297 */
298 if ((dst = __sk_dst_check(sk, 0)) == NULL)
299 return;
300
301 dst->ops->update_pmtu(dst, mtu);
302
303 /* Something is about to be wrong... Remember soft error
304 * for the case, if this connection will not able to recover.
305 */
306 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
307 sk->sk_err_soft = EMSGSIZE;
308
309 mtu = dst_mtu(dst);
310
311 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
d83d8461 312 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
1da177e4
LT
313 tcp_sync_mss(sk, mtu);
314
315 /* Resend the TCP packet because it's
316 * clear that the old packet has been
317 * dropped. This is the new "fast" path mtu
318 * discovery.
319 */
320 tcp_simple_retransmit(sk);
321 } /* else let the usual retransmit timer handle it */
322}
323
324/*
325 * This routine is called by the ICMP module when it gets some
326 * sort of error condition. If err < 0 then the socket should
327 * be closed and the error returned to the user. If err > 0
328 * it's just the icmp type << 8 | icmp code. After adjustment
329 * header points to the first 8 bytes of the tcp header. We need
330 * to find the appropriate port.
331 *
332 * The locking strategy used here is very "optimistic". When
333 * someone else accesses the socket the ICMP is just dropped
334 * and for some paths there is no check at all.
335 * A more general error queue to queue errors for later handling
336 * is probably better.
337 *
338 */
339
4d1a2d9e 340void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
1da177e4 341{
b71d1d42 342 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
4d1a2d9e 343 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
f1ecd5d9 344 struct inet_connection_sock *icsk;
1da177e4
LT
345 struct tcp_sock *tp;
346 struct inet_sock *inet;
4d1a2d9e
DL
347 const int type = icmp_hdr(icmp_skb)->type;
348 const int code = icmp_hdr(icmp_skb)->code;
1da177e4 349 struct sock *sk;
f1ecd5d9 350 struct sk_buff *skb;
1da177e4 351 __u32 seq;
f1ecd5d9 352 __u32 remaining;
1da177e4 353 int err;
4d1a2d9e 354 struct net *net = dev_net(icmp_skb->dev);
1da177e4 355
4d1a2d9e 356 if (icmp_skb->len < (iph->ihl << 2) + 8) {
dcfc23ca 357 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
358 return;
359 }
360
fd54d716 361 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
4d1a2d9e 362 iph->saddr, th->source, inet_iif(icmp_skb));
1da177e4 363 if (!sk) {
dcfc23ca 364 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
365 return;
366 }
367 if (sk->sk_state == TCP_TIME_WAIT) {
9469c7b4 368 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
369 return;
370 }
371
372 bh_lock_sock(sk);
373 /* If too many ICMPs get dropped on busy
374 * servers this needs to be solved differently.
375 */
376 if (sock_owned_by_user(sk))
de0744af 377 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
1da177e4
LT
378
379 if (sk->sk_state == TCP_CLOSE)
380 goto out;
381
97e3ecd1 382 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
383 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
384 goto out;
385 }
386
f1ecd5d9 387 icsk = inet_csk(sk);
1da177e4
LT
388 tp = tcp_sk(sk);
389 seq = ntohl(th->seq);
390 if (sk->sk_state != TCP_LISTEN &&
391 !between(seq, tp->snd_una, tp->snd_nxt)) {
de0744af 392 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
393 goto out;
394 }
395
396 switch (type) {
397 case ICMP_SOURCE_QUENCH:
398 /* Just silently ignore these. */
399 goto out;
400 case ICMP_PARAMETERPROB:
401 err = EPROTO;
402 break;
403 case ICMP_DEST_UNREACH:
404 if (code > NR_ICMP_UNREACH)
405 goto out;
406
407 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
408 if (!sock_owned_by_user(sk))
409 do_pmtu_discovery(sk, iph, info);
410 goto out;
411 }
412
413 err = icmp_err_convert[code].errno;
f1ecd5d9
DL
414 /* check if icmp_skb allows revert of backoff
415 * (see draft-zimmermann-tcp-lcd) */
416 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
417 break;
418 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
419 !icsk->icsk_backoff)
420 break;
421
8f49c270
DM
422 if (sock_owned_by_user(sk))
423 break;
424
f1ecd5d9 425 icsk->icsk_backoff--;
9ad7c049
JC
426 inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
427 TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
f1ecd5d9
DL
428 tcp_bound_rto(sk);
429
430 skb = tcp_write_queue_head(sk);
431 BUG_ON(!skb);
432
433 remaining = icsk->icsk_rto - min(icsk->icsk_rto,
434 tcp_time_stamp - TCP_SKB_CB(skb)->when);
435
436 if (remaining) {
437 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
438 remaining, TCP_RTO_MAX);
f1ecd5d9
DL
439 } else {
440 /* RTO revert clocked out retransmission.
441 * Will retransmit now */
442 tcp_retransmit_timer(sk);
443 }
444
1da177e4
LT
445 break;
446 case ICMP_TIME_EXCEEDED:
447 err = EHOSTUNREACH;
448 break;
449 default:
450 goto out;
451 }
452
453 switch (sk->sk_state) {
60236fdd 454 struct request_sock *req, **prev;
1da177e4
LT
455 case TCP_LISTEN:
456 if (sock_owned_by_user(sk))
457 goto out;
458
463c84b9
ACM
459 req = inet_csk_search_req(sk, &prev, th->dest,
460 iph->daddr, iph->saddr);
1da177e4
LT
461 if (!req)
462 goto out;
463
464 /* ICMPs are not backlogged, hence we cannot get
465 an established socket here.
466 */
547b792c 467 WARN_ON(req->sk);
1da177e4 468
2e6599cb 469 if (seq != tcp_rsk(req)->snt_isn) {
de0744af 470 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
471 goto out;
472 }
473
474 /*
475 * Still in SYN_RECV, just remove it silently.
476 * There is no good way to pass the error to the newly
477 * created socket, and POSIX does not want network
478 * errors returned from accept().
479 */
463c84b9 480 inet_csk_reqsk_queue_drop(sk, req, prev);
1da177e4
LT
481 goto out;
482
483 case TCP_SYN_SENT:
484 case TCP_SYN_RECV: /* Cannot happen.
485 It can f.e. if SYNs crossed.
486 */
487 if (!sock_owned_by_user(sk)) {
1da177e4
LT
488 sk->sk_err = err;
489
490 sk->sk_error_report(sk);
491
492 tcp_done(sk);
493 } else {
494 sk->sk_err_soft = err;
495 }
496 goto out;
497 }
498
499 /* If we've already connected we will keep trying
500 * until we time out, or the user gives up.
501 *
502 * rfc1122 4.2.3.9 allows to consider as hard errors
503 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
504 * but it is obsoleted by pmtu discovery).
505 *
506 * Note, that in modern internet, where routing is unreliable
507 * and in each dark corner broken firewalls sit, sending random
508 * errors ordered by their masters even this two messages finally lose
509 * their original sense (even Linux sends invalid PORT_UNREACHs)
510 *
511 * Now we are in compliance with RFCs.
512 * --ANK (980905)
513 */
514
515 inet = inet_sk(sk);
516 if (!sock_owned_by_user(sk) && inet->recverr) {
517 sk->sk_err = err;
518 sk->sk_error_report(sk);
519 } else { /* Only an error on timeout */
520 sk->sk_err_soft = err;
521 }
522
523out:
524 bh_unlock_sock(sk);
525 sock_put(sk);
526}
527
419f9f89
HX
528static void __tcp_v4_send_check(struct sk_buff *skb,
529 __be32 saddr, __be32 daddr)
1da177e4 530{
aa8223c7 531 struct tcphdr *th = tcp_hdr(skb);
1da177e4 532
84fa7933 533 if (skb->ip_summed == CHECKSUM_PARTIAL) {
419f9f89 534 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
663ead3b 535 skb->csum_start = skb_transport_header(skb) - skb->head;
ff1dcadb 536 skb->csum_offset = offsetof(struct tcphdr, check);
1da177e4 537 } else {
419f9f89 538 th->check = tcp_v4_check(skb->len, saddr, daddr,
07f0757a 539 csum_partial(th,
1da177e4
LT
540 th->doff << 2,
541 skb->csum));
542 }
543}
544
419f9f89 545/* This routine computes an IPv4 TCP checksum. */
bb296246 546void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
419f9f89 547{
cf533ea5 548 const struct inet_sock *inet = inet_sk(sk);
419f9f89
HX
549
550 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
551}
4bc2f18b 552EXPORT_SYMBOL(tcp_v4_send_check);
419f9f89 553
a430a43d
HX
554int tcp_v4_gso_send_check(struct sk_buff *skb)
555{
eddc9ec5 556 const struct iphdr *iph;
a430a43d
HX
557 struct tcphdr *th;
558
559 if (!pskb_may_pull(skb, sizeof(*th)))
560 return -EINVAL;
561
eddc9ec5 562 iph = ip_hdr(skb);
aa8223c7 563 th = tcp_hdr(skb);
a430a43d
HX
564
565 th->check = 0;
84fa7933 566 skb->ip_summed = CHECKSUM_PARTIAL;
419f9f89 567 __tcp_v4_send_check(skb, iph->saddr, iph->daddr);
a430a43d
HX
568 return 0;
569}
570
1da177e4
LT
571/*
572 * This routine will send an RST to the other tcp.
573 *
574 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
575 * for reset.
576 * Answer: if a packet caused RST, it is not for a socket
577 * existing in our system, if it is matched to a socket,
578 * it is just duplicate segment or bug in other side's TCP.
579 * So that we build reply only basing on parameters
580 * arrived with segment.
581 * Exception: precedence violation. We do not implement it in any case.
582 */
583
cfb6eeb4 584static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
1da177e4 585{
cf533ea5 586 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
587 struct {
588 struct tcphdr th;
589#ifdef CONFIG_TCP_MD5SIG
714e85be 590 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
cfb6eeb4
YH
591#endif
592 } rep;
1da177e4 593 struct ip_reply_arg arg;
cfb6eeb4
YH
594#ifdef CONFIG_TCP_MD5SIG
595 struct tcp_md5sig_key *key;
658ddaaf
SL
596 const __u8 *hash_location = NULL;
597 unsigned char newhash[16];
598 int genhash;
599 struct sock *sk1 = NULL;
cfb6eeb4 600#endif
a86b1e30 601 struct net *net;
1da177e4
LT
602
603 /* Never send a reset in response to a reset. */
604 if (th->rst)
605 return;
606
511c3f92 607 if (skb_rtable(skb)->rt_type != RTN_LOCAL)
1da177e4
LT
608 return;
609
610 /* Swap the send and the receive. */
cfb6eeb4
YH
611 memset(&rep, 0, sizeof(rep));
612 rep.th.dest = th->source;
613 rep.th.source = th->dest;
614 rep.th.doff = sizeof(struct tcphdr) / 4;
615 rep.th.rst = 1;
1da177e4
LT
616
617 if (th->ack) {
cfb6eeb4 618 rep.th.seq = th->ack_seq;
1da177e4 619 } else {
cfb6eeb4
YH
620 rep.th.ack = 1;
621 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
622 skb->len - (th->doff << 2));
1da177e4
LT
623 }
624
7174259e 625 memset(&arg, 0, sizeof(arg));
cfb6eeb4
YH
626 arg.iov[0].iov_base = (unsigned char *)&rep;
627 arg.iov[0].iov_len = sizeof(rep.th);
628
629#ifdef CONFIG_TCP_MD5SIG
658ddaaf
SL
630 hash_location = tcp_parse_md5sig_option(th);
631 if (!sk && hash_location) {
632 /*
633 * active side is lost. Try to find listening socket through
634 * source port, and then find md5 key through listening socket.
635 * we are not loose security here:
636 * Incoming packet is checked with md5 hash with finding key,
637 * no RST generated if md5 hash doesn't match.
638 */
639 sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
640 &tcp_hashinfo, ip_hdr(skb)->daddr,
641 ntohs(th->source), inet_iif(skb));
642 /* don't send rst if it can't find key */
643 if (!sk1)
644 return;
645 rcu_read_lock();
646 key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
647 &ip_hdr(skb)->saddr, AF_INET);
648 if (!key)
649 goto release_sk1;
650
651 genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
652 if (genhash || memcmp(hash_location, newhash, 16) != 0)
653 goto release_sk1;
654 } else {
655 key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
656 &ip_hdr(skb)->saddr,
657 AF_INET) : NULL;
658 }
659
cfb6eeb4
YH
660 if (key) {
661 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
662 (TCPOPT_NOP << 16) |
663 (TCPOPT_MD5SIG << 8) |
664 TCPOLEN_MD5SIG);
665 /* Update length and the length the header thinks exists */
666 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
667 rep.th.doff = arg.iov[0].iov_len / 4;
668
49a72dfb 669 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
78e645cb
IJ
670 key, ip_hdr(skb)->saddr,
671 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
672 }
673#endif
eddc9ec5
ACM
674 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
675 ip_hdr(skb)->saddr, /* XXX */
52cd5750 676 arg.iov[0].iov_len, IPPROTO_TCP, 0);
1da177e4 677 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
88ef4a5a 678 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
e2446eaa
SL
679 /* When socket is gone, all binding information is lost.
680 * routing might fail in this case. using iif for oif to
681 * make sure we can deliver it
682 */
683 arg.bound_dev_if = sk ? sk->sk_bound_dev_if : inet_iif(skb);
1da177e4 684
adf30907 685 net = dev_net(skb_dst(skb)->dev);
66b13d99 686 arg.tos = ip_hdr(skb)->tos;
70e73416
DM
687 ip_send_unicast_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
688 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
1da177e4 689
63231bdd
PE
690 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
691 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
658ddaaf
SL
692
693#ifdef CONFIG_TCP_MD5SIG
694release_sk1:
695 if (sk1) {
696 rcu_read_unlock();
697 sock_put(sk1);
698 }
699#endif
1da177e4
LT
700}
701
702/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
703 outside socket context is ugly, certainly. What can I do?
704 */
705
9501f972
YH
706static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
707 u32 win, u32 ts, int oif,
88ef4a5a 708 struct tcp_md5sig_key *key,
66b13d99 709 int reply_flags, u8 tos)
1da177e4 710{
cf533ea5 711 const struct tcphdr *th = tcp_hdr(skb);
1da177e4
LT
712 struct {
713 struct tcphdr th;
714e85be 714 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
cfb6eeb4 715#ifdef CONFIG_TCP_MD5SIG
714e85be 716 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
cfb6eeb4
YH
717#endif
718 ];
1da177e4
LT
719 } rep;
720 struct ip_reply_arg arg;
adf30907 721 struct net *net = dev_net(skb_dst(skb)->dev);
1da177e4
LT
722
723 memset(&rep.th, 0, sizeof(struct tcphdr));
7174259e 724 memset(&arg, 0, sizeof(arg));
1da177e4
LT
725
726 arg.iov[0].iov_base = (unsigned char *)&rep;
727 arg.iov[0].iov_len = sizeof(rep.th);
728 if (ts) {
cfb6eeb4
YH
729 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
730 (TCPOPT_TIMESTAMP << 8) |
731 TCPOLEN_TIMESTAMP);
732 rep.opt[1] = htonl(tcp_time_stamp);
733 rep.opt[2] = htonl(ts);
cb48cfe8 734 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
1da177e4
LT
735 }
736
737 /* Swap the send and the receive. */
738 rep.th.dest = th->source;
739 rep.th.source = th->dest;
740 rep.th.doff = arg.iov[0].iov_len / 4;
741 rep.th.seq = htonl(seq);
742 rep.th.ack_seq = htonl(ack);
743 rep.th.ack = 1;
744 rep.th.window = htons(win);
745
cfb6eeb4 746#ifdef CONFIG_TCP_MD5SIG
cfb6eeb4
YH
747 if (key) {
748 int offset = (ts) ? 3 : 0;
749
750 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
751 (TCPOPT_NOP << 16) |
752 (TCPOPT_MD5SIG << 8) |
753 TCPOLEN_MD5SIG);
754 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
755 rep.th.doff = arg.iov[0].iov_len/4;
756
49a72dfb 757 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
90b7e112
AL
758 key, ip_hdr(skb)->saddr,
759 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
760 }
761#endif
88ef4a5a 762 arg.flags = reply_flags;
eddc9ec5
ACM
763 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
764 ip_hdr(skb)->saddr, /* XXX */
1da177e4
LT
765 arg.iov[0].iov_len, IPPROTO_TCP, 0);
766 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
9501f972
YH
767 if (oif)
768 arg.bound_dev_if = oif;
66b13d99 769 arg.tos = tos;
70e73416
DM
770 ip_send_unicast_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
771 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
1da177e4 772
63231bdd 773 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
1da177e4
LT
774}
775
776static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
777{
8feaf0c0 778 struct inet_timewait_sock *tw = inet_twsk(sk);
cfb6eeb4 779 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1da177e4 780
9501f972 781 tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
7174259e 782 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
9501f972
YH
783 tcptw->tw_ts_recent,
784 tw->tw_bound_dev_if,
88ef4a5a 785 tcp_twsk_md5_key(tcptw),
66b13d99
ED
786 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
787 tw->tw_tos
9501f972 788 );
1da177e4 789
8feaf0c0 790 inet_twsk_put(tw);
1da177e4
LT
791}
792
6edafaaf 793static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
7174259e 794 struct request_sock *req)
1da177e4 795{
9501f972 796 tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
cfb6eeb4 797 tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
9501f972
YH
798 req->ts_recent,
799 0,
a915da9b
ED
800 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
801 AF_INET),
66b13d99
ED
802 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
803 ip_hdr(skb)->tos);
1da177e4
LT
804}
805
1da177e4 806/*
9bf1d83e 807 * Send a SYN-ACK after having received a SYN.
60236fdd 808 * This still operates on a request_sock only, not on a big
1da177e4
LT
809 * socket.
810 */
72659ecc
OP
811static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
812 struct request_sock *req,
fff32699 813 struct request_values *rvp,
7586eceb
ED
814 u16 queue_mapping,
815 bool nocache)
1da177e4 816{
2e6599cb 817 const struct inet_request_sock *ireq = inet_rsk(req);
6bd023f3 818 struct flowi4 fl4;
1da177e4
LT
819 int err = -1;
820 struct sk_buff * skb;
821
822 /* First, grab a route. */
7586eceb 823 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req, nocache)) == NULL)
fd80eb94 824 return -1;
1da177e4 825
e6b4d113 826 skb = tcp_make_synack(sk, dst, req, rvp);
1da177e4
LT
827
828 if (skb) {
419f9f89 829 __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
1da177e4 830
fff32699 831 skb_set_queue_mapping(skb, queue_mapping);
2e6599cb
ACM
832 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
833 ireq->rmt_addr,
834 ireq->opt);
b9df3cb8 835 err = net_xmit_eval(err);
1da177e4
LT
836 }
837
1da177e4
LT
838 return err;
839}
840
72659ecc 841static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
e6b4d113 842 struct request_values *rvp)
fd80eb94 843{
72659ecc 844 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
7586eceb 845 return tcp_v4_send_synack(sk, NULL, req, rvp, 0, false);
fd80eb94
DL
846}
847
1da177e4 848/*
60236fdd 849 * IPv4 request_sock destructor.
1da177e4 850 */
60236fdd 851static void tcp_v4_reqsk_destructor(struct request_sock *req)
1da177e4 852{
a51482bd 853 kfree(inet_rsk(req)->opt);
1da177e4
LT
854}
855
946cedcc 856/*
a2a385d6 857 * Return true if a syncookie should be sent
946cedcc 858 */
a2a385d6 859bool tcp_syn_flood_action(struct sock *sk,
946cedcc
ED
860 const struct sk_buff *skb,
861 const char *proto)
1da177e4 862{
946cedcc 863 const char *msg = "Dropping request";
a2a385d6 864 bool want_cookie = false;
946cedcc
ED
865 struct listen_sock *lopt;
866
867
1da177e4 868
2a1d4bd4 869#ifdef CONFIG_SYN_COOKIES
946cedcc 870 if (sysctl_tcp_syncookies) {
2a1d4bd4 871 msg = "Sending cookies";
a2a385d6 872 want_cookie = true;
946cedcc
ED
873 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
874 } else
80e40daa 875#endif
946cedcc
ED
876 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
877
878 lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
879 if (!lopt->synflood_warned) {
880 lopt->synflood_warned = 1;
afd46503 881 pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
946cedcc
ED
882 proto, ntohs(tcp_hdr(skb)->dest), msg);
883 }
884 return want_cookie;
2a1d4bd4 885}
946cedcc 886EXPORT_SYMBOL(tcp_syn_flood_action);
1da177e4
LT
887
888/*
60236fdd 889 * Save and compile IPv4 options into the request_sock if needed.
1da177e4 890 */
f6d8bd05
ED
891static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk,
892 struct sk_buff *skb)
1da177e4 893{
f6d8bd05
ED
894 const struct ip_options *opt = &(IPCB(skb)->opt);
895 struct ip_options_rcu *dopt = NULL;
1da177e4
LT
896
897 if (opt && opt->optlen) {
f6d8bd05
ED
898 int opt_size = sizeof(*dopt) + opt->optlen;
899
1da177e4
LT
900 dopt = kmalloc(opt_size, GFP_ATOMIC);
901 if (dopt) {
f6d8bd05 902 if (ip_options_echo(&dopt->opt, skb)) {
1da177e4
LT
903 kfree(dopt);
904 dopt = NULL;
905 }
906 }
907 }
908 return dopt;
909}
910
cfb6eeb4
YH
911#ifdef CONFIG_TCP_MD5SIG
912/*
913 * RFC2385 MD5 checksumming requires a mapping of
914 * IP address->MD5 Key.
915 * We need to maintain these in the sk structure.
916 */
917
918/* Find the Key structure for an address. */
a915da9b
ED
919struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
920 const union tcp_md5_addr *addr,
921 int family)
cfb6eeb4
YH
922{
923 struct tcp_sock *tp = tcp_sk(sk);
a915da9b
ED
924 struct tcp_md5sig_key *key;
925 struct hlist_node *pos;
926 unsigned int size = sizeof(struct in_addr);
a8afca03 927 struct tcp_md5sig_info *md5sig;
cfb6eeb4 928
a8afca03
ED
929 /* caller either holds rcu_read_lock() or socket lock */
930 md5sig = rcu_dereference_check(tp->md5sig_info,
b4fb05ea
ED
931 sock_owned_by_user(sk) ||
932 lockdep_is_held(&sk->sk_lock.slock));
a8afca03 933 if (!md5sig)
cfb6eeb4 934 return NULL;
a915da9b
ED
935#if IS_ENABLED(CONFIG_IPV6)
936 if (family == AF_INET6)
937 size = sizeof(struct in6_addr);
938#endif
a8afca03 939 hlist_for_each_entry_rcu(key, pos, &md5sig->head, node) {
a915da9b
ED
940 if (key->family != family)
941 continue;
942 if (!memcmp(&key->addr, addr, size))
943 return key;
cfb6eeb4
YH
944 }
945 return NULL;
946}
a915da9b 947EXPORT_SYMBOL(tcp_md5_do_lookup);
cfb6eeb4
YH
948
949struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
950 struct sock *addr_sk)
951{
a915da9b
ED
952 union tcp_md5_addr *addr;
953
954 addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
955 return tcp_md5_do_lookup(sk, addr, AF_INET);
cfb6eeb4 956}
cfb6eeb4
YH
957EXPORT_SYMBOL(tcp_v4_md5_lookup);
958
f5b99bcd
AB
959static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
960 struct request_sock *req)
cfb6eeb4 961{
a915da9b
ED
962 union tcp_md5_addr *addr;
963
964 addr = (union tcp_md5_addr *)&inet_rsk(req)->rmt_addr;
965 return tcp_md5_do_lookup(sk, addr, AF_INET);
cfb6eeb4
YH
966}
967
968/* This can be called on a newly created socket, from other files */
a915da9b
ED
969int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
970 int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
cfb6eeb4
YH
971{
972 /* Add Key to the list */
b0a713e9 973 struct tcp_md5sig_key *key;
cfb6eeb4 974 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 975 struct tcp_md5sig_info *md5sig;
cfb6eeb4 976
a915da9b 977 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
cfb6eeb4
YH
978 if (key) {
979 /* Pre-existing entry - just update that one. */
a915da9b 980 memcpy(key->key, newkey, newkeylen);
b0a713e9 981 key->keylen = newkeylen;
a915da9b
ED
982 return 0;
983 }
260fcbeb 984
a8afca03
ED
985 md5sig = rcu_dereference_protected(tp->md5sig_info,
986 sock_owned_by_user(sk));
a915da9b
ED
987 if (!md5sig) {
988 md5sig = kmalloc(sizeof(*md5sig), gfp);
989 if (!md5sig)
cfb6eeb4 990 return -ENOMEM;
cfb6eeb4 991
a915da9b
ED
992 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
993 INIT_HLIST_HEAD(&md5sig->head);
a8afca03 994 rcu_assign_pointer(tp->md5sig_info, md5sig);
a915da9b 995 }
cfb6eeb4 996
5f3d9cb2 997 key = sock_kmalloc(sk, sizeof(*key), gfp);
a915da9b
ED
998 if (!key)
999 return -ENOMEM;
1000 if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) {
5f3d9cb2 1001 sock_kfree_s(sk, key, sizeof(*key));
a915da9b 1002 return -ENOMEM;
cfb6eeb4 1003 }
a915da9b
ED
1004
1005 memcpy(key->key, newkey, newkeylen);
1006 key->keylen = newkeylen;
1007 key->family = family;
1008 memcpy(&key->addr, addr,
1009 (family == AF_INET6) ? sizeof(struct in6_addr) :
1010 sizeof(struct in_addr));
1011 hlist_add_head_rcu(&key->node, &md5sig->head);
cfb6eeb4
YH
1012 return 0;
1013}
a915da9b 1014EXPORT_SYMBOL(tcp_md5_do_add);
cfb6eeb4 1015
a915da9b 1016int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
cfb6eeb4
YH
1017{
1018 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 1019 struct tcp_md5sig_key *key;
a8afca03 1020 struct tcp_md5sig_info *md5sig;
a915da9b
ED
1021
1022 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
1023 if (!key)
1024 return -ENOENT;
1025 hlist_del_rcu(&key->node);
5f3d9cb2 1026 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
a915da9b 1027 kfree_rcu(key, rcu);
a8afca03
ED
1028 md5sig = rcu_dereference_protected(tp->md5sig_info,
1029 sock_owned_by_user(sk));
1030 if (hlist_empty(&md5sig->head))
a915da9b
ED
1031 tcp_free_md5sig_pool();
1032 return 0;
cfb6eeb4 1033}
a915da9b 1034EXPORT_SYMBOL(tcp_md5_do_del);
cfb6eeb4 1035
a915da9b 1036void tcp_clear_md5_list(struct sock *sk)
cfb6eeb4
YH
1037{
1038 struct tcp_sock *tp = tcp_sk(sk);
a915da9b
ED
1039 struct tcp_md5sig_key *key;
1040 struct hlist_node *pos, *n;
a8afca03 1041 struct tcp_md5sig_info *md5sig;
cfb6eeb4 1042
a8afca03
ED
1043 md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1044
1045 if (!hlist_empty(&md5sig->head))
cfb6eeb4 1046 tcp_free_md5sig_pool();
a8afca03 1047 hlist_for_each_entry_safe(key, pos, n, &md5sig->head, node) {
a915da9b 1048 hlist_del_rcu(&key->node);
5f3d9cb2 1049 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
a915da9b 1050 kfree_rcu(key, rcu);
cfb6eeb4
YH
1051 }
1052}
1053
7174259e
ACM
1054static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1055 int optlen)
cfb6eeb4
YH
1056{
1057 struct tcp_md5sig cmd;
1058 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
cfb6eeb4
YH
1059
1060 if (optlen < sizeof(cmd))
1061 return -EINVAL;
1062
7174259e 1063 if (copy_from_user(&cmd, optval, sizeof(cmd)))
cfb6eeb4
YH
1064 return -EFAULT;
1065
1066 if (sin->sin_family != AF_INET)
1067 return -EINVAL;
1068
a8afca03 1069 if (!cmd.tcpm_key || !cmd.tcpm_keylen)
a915da9b
ED
1070 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1071 AF_INET);
cfb6eeb4
YH
1072
1073 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1074 return -EINVAL;
1075
a915da9b
ED
1076 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1077 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1078 GFP_KERNEL);
cfb6eeb4
YH
1079}
1080
49a72dfb
AL
1081static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1082 __be32 daddr, __be32 saddr, int nbytes)
cfb6eeb4 1083{
cfb6eeb4 1084 struct tcp4_pseudohdr *bp;
49a72dfb 1085 struct scatterlist sg;
cfb6eeb4
YH
1086
1087 bp = &hp->md5_blk.ip4;
cfb6eeb4
YH
1088
1089 /*
49a72dfb 1090 * 1. the TCP pseudo-header (in the order: source IP address,
cfb6eeb4
YH
1091 * destination IP address, zero-padded protocol number, and
1092 * segment length)
1093 */
1094 bp->saddr = saddr;
1095 bp->daddr = daddr;
1096 bp->pad = 0;
076fb722 1097 bp->protocol = IPPROTO_TCP;
49a72dfb 1098 bp->len = cpu_to_be16(nbytes);
c7da57a1 1099
49a72dfb
AL
1100 sg_init_one(&sg, bp, sizeof(*bp));
1101 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1102}
1103
a915da9b 1104static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
318cf7aa 1105 __be32 daddr, __be32 saddr, const struct tcphdr *th)
49a72dfb
AL
1106{
1107 struct tcp_md5sig_pool *hp;
1108 struct hash_desc *desc;
1109
1110 hp = tcp_get_md5sig_pool();
1111 if (!hp)
1112 goto clear_hash_noput;
1113 desc = &hp->md5_desc;
1114
1115 if (crypto_hash_init(desc))
1116 goto clear_hash;
1117 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1118 goto clear_hash;
1119 if (tcp_md5_hash_header(hp, th))
1120 goto clear_hash;
1121 if (tcp_md5_hash_key(hp, key))
1122 goto clear_hash;
1123 if (crypto_hash_final(desc, md5_hash))
cfb6eeb4
YH
1124 goto clear_hash;
1125
cfb6eeb4 1126 tcp_put_md5sig_pool();
cfb6eeb4 1127 return 0;
49a72dfb 1128
cfb6eeb4
YH
1129clear_hash:
1130 tcp_put_md5sig_pool();
1131clear_hash_noput:
1132 memset(md5_hash, 0, 16);
49a72dfb 1133 return 1;
cfb6eeb4
YH
1134}
1135
49a72dfb 1136int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
318cf7aa
ED
1137 const struct sock *sk, const struct request_sock *req,
1138 const struct sk_buff *skb)
cfb6eeb4 1139{
49a72dfb
AL
1140 struct tcp_md5sig_pool *hp;
1141 struct hash_desc *desc;
318cf7aa 1142 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
1143 __be32 saddr, daddr;
1144
1145 if (sk) {
c720c7e8
ED
1146 saddr = inet_sk(sk)->inet_saddr;
1147 daddr = inet_sk(sk)->inet_daddr;
49a72dfb
AL
1148 } else if (req) {
1149 saddr = inet_rsk(req)->loc_addr;
1150 daddr = inet_rsk(req)->rmt_addr;
cfb6eeb4 1151 } else {
49a72dfb
AL
1152 const struct iphdr *iph = ip_hdr(skb);
1153 saddr = iph->saddr;
1154 daddr = iph->daddr;
cfb6eeb4 1155 }
49a72dfb
AL
1156
1157 hp = tcp_get_md5sig_pool();
1158 if (!hp)
1159 goto clear_hash_noput;
1160 desc = &hp->md5_desc;
1161
1162 if (crypto_hash_init(desc))
1163 goto clear_hash;
1164
1165 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1166 goto clear_hash;
1167 if (tcp_md5_hash_header(hp, th))
1168 goto clear_hash;
1169 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1170 goto clear_hash;
1171 if (tcp_md5_hash_key(hp, key))
1172 goto clear_hash;
1173 if (crypto_hash_final(desc, md5_hash))
1174 goto clear_hash;
1175
1176 tcp_put_md5sig_pool();
1177 return 0;
1178
1179clear_hash:
1180 tcp_put_md5sig_pool();
1181clear_hash_noput:
1182 memset(md5_hash, 0, 16);
1183 return 1;
cfb6eeb4 1184}
49a72dfb 1185EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
cfb6eeb4 1186
a2a385d6 1187static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
cfb6eeb4
YH
1188{
1189 /*
1190 * This gets called for each TCP segment that arrives
1191 * so we want to be efficient.
1192 * We have 3 drop cases:
1193 * o No MD5 hash and one expected.
1194 * o MD5 hash and we're not expecting one.
1195 * o MD5 hash and its wrong.
1196 */
cf533ea5 1197 const __u8 *hash_location = NULL;
cfb6eeb4 1198 struct tcp_md5sig_key *hash_expected;
eddc9ec5 1199 const struct iphdr *iph = ip_hdr(skb);
cf533ea5 1200 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4 1201 int genhash;
cfb6eeb4
YH
1202 unsigned char newhash[16];
1203
a915da9b
ED
1204 hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1205 AF_INET);
7d5d5525 1206 hash_location = tcp_parse_md5sig_option(th);
cfb6eeb4 1207
cfb6eeb4
YH
1208 /* We've parsed the options - do we have a hash? */
1209 if (!hash_expected && !hash_location)
a2a385d6 1210 return false;
cfb6eeb4
YH
1211
1212 if (hash_expected && !hash_location) {
785957d3 1213 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
a2a385d6 1214 return true;
cfb6eeb4
YH
1215 }
1216
1217 if (!hash_expected && hash_location) {
785957d3 1218 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
a2a385d6 1219 return true;
cfb6eeb4
YH
1220 }
1221
1222 /* Okay, so this is hash_expected and hash_location -
1223 * so we need to calculate the checksum.
1224 */
49a72dfb
AL
1225 genhash = tcp_v4_md5_hash_skb(newhash,
1226 hash_expected,
1227 NULL, NULL, skb);
cfb6eeb4
YH
1228
1229 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
e87cc472
JP
1230 net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1231 &iph->saddr, ntohs(th->source),
1232 &iph->daddr, ntohs(th->dest),
1233 genhash ? " tcp_v4_calc_md5_hash failed"
1234 : "");
a2a385d6 1235 return true;
cfb6eeb4 1236 }
a2a385d6 1237 return false;
cfb6eeb4
YH
1238}
1239
1240#endif
1241
72a3effa 1242struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1da177e4 1243 .family = PF_INET,
2e6599cb 1244 .obj_size = sizeof(struct tcp_request_sock),
72659ecc 1245 .rtx_syn_ack = tcp_v4_rtx_synack,
60236fdd
ACM
1246 .send_ack = tcp_v4_reqsk_send_ack,
1247 .destructor = tcp_v4_reqsk_destructor,
1da177e4 1248 .send_reset = tcp_v4_send_reset,
72659ecc 1249 .syn_ack_timeout = tcp_syn_ack_timeout,
1da177e4
LT
1250};
1251
cfb6eeb4 1252#ifdef CONFIG_TCP_MD5SIG
b2e4b3de 1253static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
cfb6eeb4 1254 .md5_lookup = tcp_v4_reqsk_md5_lookup,
e3afe7b7 1255 .calc_md5_hash = tcp_v4_md5_hash_skb,
cfb6eeb4 1256};
b6332e6c 1257#endif
cfb6eeb4 1258
1da177e4
LT
1259int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1260{
4957faad 1261 struct tcp_extend_values tmp_ext;
1da177e4 1262 struct tcp_options_received tmp_opt;
cf533ea5 1263 const u8 *hash_location;
60236fdd 1264 struct request_sock *req;
e6b4d113 1265 struct inet_request_sock *ireq;
4957faad 1266 struct tcp_sock *tp = tcp_sk(sk);
e6b4d113 1267 struct dst_entry *dst = NULL;
eddc9ec5
ACM
1268 __be32 saddr = ip_hdr(skb)->saddr;
1269 __be32 daddr = ip_hdr(skb)->daddr;
1da177e4 1270 __u32 isn = TCP_SKB_CB(skb)->when;
a2a385d6 1271 bool want_cookie = false;
1da177e4
LT
1272
1273 /* Never answer to SYNs send to broadcast or multicast */
511c3f92 1274 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1da177e4
LT
1275 goto drop;
1276
1277 /* TW buckets are converted to open requests without
1278 * limitations, they conserve resources and peer is
1279 * evidently real one.
1280 */
463c84b9 1281 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
946cedcc
ED
1282 want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1283 if (!want_cookie)
1284 goto drop;
1da177e4
LT
1285 }
1286
1287 /* Accept backlog is full. If we have already queued enough
1288 * of warm entries in syn queue, drop request. It is better than
1289 * clogging syn queue with openreqs with exponentially increasing
1290 * timeout.
1291 */
463c84b9 1292 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1da177e4
LT
1293 goto drop;
1294
ce4a7d0d 1295 req = inet_reqsk_alloc(&tcp_request_sock_ops);
1da177e4
LT
1296 if (!req)
1297 goto drop;
1298
cfb6eeb4
YH
1299#ifdef CONFIG_TCP_MD5SIG
1300 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1301#endif
1302
1da177e4 1303 tcp_clear_options(&tmp_opt);
bee7ca9e 1304 tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
4957faad 1305 tmp_opt.user_mss = tp->rx_opt.user_mss;
bb5b7c11 1306 tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
4957faad
WAS
1307
1308 if (tmp_opt.cookie_plus > 0 &&
1309 tmp_opt.saw_tstamp &&
1310 !tp->rx_opt.cookie_out_never &&
1311 (sysctl_tcp_cookie_size > 0 ||
1312 (tp->cookie_values != NULL &&
1313 tp->cookie_values->cookie_desired > 0))) {
1314 u8 *c;
1315 u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1316 int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1317
1318 if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1319 goto drop_and_release;
1320
1321 /* Secret recipe starts with IP addresses */
0eae88f3
ED
1322 *mess++ ^= (__force u32)daddr;
1323 *mess++ ^= (__force u32)saddr;
1da177e4 1324
4957faad
WAS
1325 /* plus variable length Initiator Cookie */
1326 c = (u8 *)mess;
1327 while (l-- > 0)
1328 *c++ ^= *hash_location++;
1329
a2a385d6 1330 want_cookie = false; /* not our kind of cookie */
4957faad
WAS
1331 tmp_ext.cookie_out_never = 0; /* false */
1332 tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1333 } else if (!tp->rx_opt.cookie_in_always) {
1334 /* redundant indications, but ensure initialization. */
1335 tmp_ext.cookie_out_never = 1; /* true */
1336 tmp_ext.cookie_plus = 0;
1337 } else {
1338 goto drop_and_release;
1339 }
1340 tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1da177e4 1341
4dfc2817 1342 if (want_cookie && !tmp_opt.saw_tstamp)
1da177e4 1343 tcp_clear_options(&tmp_opt);
1da177e4 1344
1da177e4 1345 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1da177e4
LT
1346 tcp_openreq_init(req, &tmp_opt, skb);
1347
bb5b7c11
DM
1348 ireq = inet_rsk(req);
1349 ireq->loc_addr = daddr;
1350 ireq->rmt_addr = saddr;
1351 ireq->no_srccheck = inet_sk(sk)->transparent;
1352 ireq->opt = tcp_v4_save_options(sk, skb);
1353
284904aa 1354 if (security_inet_conn_request(sk, skb, req))
bb5b7c11 1355 goto drop_and_free;
284904aa 1356
172d69e6 1357 if (!want_cookie || tmp_opt.tstamp_ok)
bd14b1b2 1358 TCP_ECN_create_request(req, skb);
1da177e4
LT
1359
1360 if (want_cookie) {
1da177e4 1361 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
172d69e6 1362 req->cookie_ts = tmp_opt.tstamp_ok;
1da177e4 1363 } else if (!isn) {
6bd023f3 1364 struct flowi4 fl4;
1da177e4
LT
1365
1366 /* VJ's idea. We save last timestamp seen
1367 * from the destination in peer table, when entering
1368 * state TIME-WAIT, and check against it before
1369 * accepting new connection request.
1370 *
1371 * If "isn" is not zero, this request hit alive
1372 * timewait bucket, so that all the necessary checks
1373 * are made in the function processing timewait state.
1374 */
1375 if (tmp_opt.saw_tstamp &&
295ff7ed 1376 tcp_death_row.sysctl_tw_recycle &&
7586eceb 1377 (dst = inet_csk_route_req(sk, &fl4, req, want_cookie)) != NULL &&
81166dd6
DM
1378 fl4.daddr == saddr) {
1379 if (!tcp_peer_is_proven(req, dst, true)) {
de0744af 1380 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
7cd04fa7 1381 goto drop_and_release;
1da177e4
LT
1382 }
1383 }
1384 /* Kill the following clause, if you dislike this way. */
1385 else if (!sysctl_tcp_syncookies &&
463c84b9 1386 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1da177e4 1387 (sysctl_max_syn_backlog >> 2)) &&
81166dd6 1388 !tcp_peer_is_proven(req, dst, false)) {
1da177e4
LT
1389 /* Without syncookies last quarter of
1390 * backlog is filled with destinations,
1391 * proven to be alive.
1392 * It means that we continue to communicate
1393 * to destinations, already remembered
1394 * to the moment of synflood.
1395 */
afd46503 1396 LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
673d57e7 1397 &saddr, ntohs(tcp_hdr(skb)->source));
7cd04fa7 1398 goto drop_and_release;
1da177e4
LT
1399 }
1400
a94f723d 1401 isn = tcp_v4_init_sequence(skb);
1da177e4 1402 }
2e6599cb 1403 tcp_rsk(req)->snt_isn = isn;
9ad7c049 1404 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1da177e4 1405
72659ecc 1406 if (tcp_v4_send_synack(sk, dst, req,
fff32699 1407 (struct request_values *)&tmp_ext,
7586eceb
ED
1408 skb_get_queue_mapping(skb),
1409 want_cookie) ||
4957faad 1410 want_cookie)
1da177e4
LT
1411 goto drop_and_free;
1412
7cd04fa7 1413 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1da177e4
LT
1414 return 0;
1415
7cd04fa7
DL
1416drop_and_release:
1417 dst_release(dst);
1da177e4 1418drop_and_free:
60236fdd 1419 reqsk_free(req);
1da177e4 1420drop:
1da177e4
LT
1421 return 0;
1422}
4bc2f18b 1423EXPORT_SYMBOL(tcp_v4_conn_request);
1da177e4
LT
1424
1425
1426/*
1427 * The three way handshake has completed - we got a valid synack -
1428 * now create the new socket.
1429 */
1430struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
60236fdd 1431 struct request_sock *req,
1da177e4
LT
1432 struct dst_entry *dst)
1433{
2e6599cb 1434 struct inet_request_sock *ireq;
1da177e4
LT
1435 struct inet_sock *newinet;
1436 struct tcp_sock *newtp;
1437 struct sock *newsk;
cfb6eeb4
YH
1438#ifdef CONFIG_TCP_MD5SIG
1439 struct tcp_md5sig_key *key;
1440#endif
f6d8bd05 1441 struct ip_options_rcu *inet_opt;
1da177e4
LT
1442
1443 if (sk_acceptq_is_full(sk))
1444 goto exit_overflow;
1445
1da177e4
LT
1446 newsk = tcp_create_openreq_child(sk, req, skb);
1447 if (!newsk)
093d2823 1448 goto exit_nonewsk;
1da177e4 1449
bcd76111 1450 newsk->sk_gso_type = SKB_GSO_TCPV4;
1da177e4
LT
1451
1452 newtp = tcp_sk(newsk);
1453 newinet = inet_sk(newsk);
2e6599cb 1454 ireq = inet_rsk(req);
c720c7e8
ED
1455 newinet->inet_daddr = ireq->rmt_addr;
1456 newinet->inet_rcv_saddr = ireq->loc_addr;
1457 newinet->inet_saddr = ireq->loc_addr;
f6d8bd05
ED
1458 inet_opt = ireq->opt;
1459 rcu_assign_pointer(newinet->inet_opt, inet_opt);
2e6599cb 1460 ireq->opt = NULL;
463c84b9 1461 newinet->mc_index = inet_iif(skb);
eddc9ec5 1462 newinet->mc_ttl = ip_hdr(skb)->ttl;
4c507d28 1463 newinet->rcv_tos = ip_hdr(skb)->tos;
d83d8461 1464 inet_csk(newsk)->icsk_ext_hdr_len = 0;
f6d8bd05
ED
1465 if (inet_opt)
1466 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
c720c7e8 1467 newinet->inet_id = newtp->write_seq ^ jiffies;
1da177e4 1468
dfd25fff
ED
1469 if (!dst) {
1470 dst = inet_csk_route_child_sock(sk, newsk, req);
1471 if (!dst)
1472 goto put_and_exit;
1473 } else {
1474 /* syncookie case : see end of cookie_v4_check() */
1475 }
0e734419
DM
1476 sk_setup_caps(newsk, dst);
1477
5d424d5a 1478 tcp_mtup_init(newsk);
1da177e4 1479 tcp_sync_mss(newsk, dst_mtu(dst));
0dbaee3b 1480 newtp->advmss = dst_metric_advmss(dst);
f5fff5dc
TQ
1481 if (tcp_sk(sk)->rx_opt.user_mss &&
1482 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1483 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1484
1da177e4 1485 tcp_initialize_rcv_mss(newsk);
9ad7c049
JC
1486 if (tcp_rsk(req)->snt_synack)
1487 tcp_valid_rtt_meas(newsk,
1488 tcp_time_stamp - tcp_rsk(req)->snt_synack);
1489 newtp->total_retrans = req->retrans;
1da177e4 1490
cfb6eeb4
YH
1491#ifdef CONFIG_TCP_MD5SIG
1492 /* Copy over the MD5 key from the original socket */
a915da9b
ED
1493 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1494 AF_INET);
c720c7e8 1495 if (key != NULL) {
cfb6eeb4
YH
1496 /*
1497 * We're using one, so create a matching key
1498 * on the newsk structure. If we fail to get
1499 * memory, then we end up not copying the key
1500 * across. Shucks.
1501 */
a915da9b
ED
1502 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1503 AF_INET, key->key, key->keylen, GFP_ATOMIC);
a465419b 1504 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
cfb6eeb4
YH
1505 }
1506#endif
1507
0e734419
DM
1508 if (__inet_inherit_port(sk, newsk) < 0)
1509 goto put_and_exit;
9327f705 1510 __inet_hash_nolisten(newsk, NULL);
1da177e4
LT
1511
1512 return newsk;
1513
1514exit_overflow:
de0744af 1515 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
093d2823
BS
1516exit_nonewsk:
1517 dst_release(dst);
1da177e4 1518exit:
de0744af 1519 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4 1520 return NULL;
0e734419 1521put_and_exit:
709e8697 1522 tcp_clear_xmit_timers(newsk);
d8a6e65f 1523 tcp_cleanup_congestion_control(newsk);
918eb399 1524 bh_unlock_sock(newsk);
0e734419
DM
1525 sock_put(newsk);
1526 goto exit;
1da177e4 1527}
4bc2f18b 1528EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1da177e4
LT
1529
1530static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1531{
aa8223c7 1532 struct tcphdr *th = tcp_hdr(skb);
eddc9ec5 1533 const struct iphdr *iph = ip_hdr(skb);
1da177e4 1534 struct sock *nsk;
60236fdd 1535 struct request_sock **prev;
1da177e4 1536 /* Find possible connection requests. */
463c84b9
ACM
1537 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1538 iph->saddr, iph->daddr);
1da177e4
LT
1539 if (req)
1540 return tcp_check_req(sk, skb, req, prev);
1541
3b1e0a65 1542 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
c67499c0 1543 th->source, iph->daddr, th->dest, inet_iif(skb));
1da177e4
LT
1544
1545 if (nsk) {
1546 if (nsk->sk_state != TCP_TIME_WAIT) {
1547 bh_lock_sock(nsk);
1548 return nsk;
1549 }
9469c7b4 1550 inet_twsk_put(inet_twsk(nsk));
1da177e4
LT
1551 return NULL;
1552 }
1553
1554#ifdef CONFIG_SYN_COOKIES
af9b4738 1555 if (!th->syn)
1da177e4
LT
1556 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1557#endif
1558 return sk;
1559}
1560
b51655b9 1561static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1da177e4 1562{
eddc9ec5
ACM
1563 const struct iphdr *iph = ip_hdr(skb);
1564
84fa7933 1565 if (skb->ip_summed == CHECKSUM_COMPLETE) {
eddc9ec5
ACM
1566 if (!tcp_v4_check(skb->len, iph->saddr,
1567 iph->daddr, skb->csum)) {
fb286bb2 1568 skb->ip_summed = CHECKSUM_UNNECESSARY;
1da177e4 1569 return 0;
fb286bb2 1570 }
1da177e4 1571 }
fb286bb2 1572
eddc9ec5 1573 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
fb286bb2
HX
1574 skb->len, IPPROTO_TCP, 0);
1575
1da177e4 1576 if (skb->len <= 76) {
fb286bb2 1577 return __skb_checksum_complete(skb);
1da177e4
LT
1578 }
1579 return 0;
1580}
1581
1582
1583/* The socket must have it's spinlock held when we get
1584 * here.
1585 *
1586 * We have a potential double-lock case here, so even when
1587 * doing backlog processing we use the BH locking scheme.
1588 * This is because we cannot sleep with the original spinlock
1589 * held.
1590 */
1591int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1592{
cfb6eeb4
YH
1593 struct sock *rsk;
1594#ifdef CONFIG_TCP_MD5SIG
1595 /*
1596 * We really want to reject the packet as early as possible
1597 * if:
1598 * o We're expecting an MD5'd packet and this is no MD5 tcp option
1599 * o There is an MD5 option and we're not expecting one
1600 */
7174259e 1601 if (tcp_v4_inbound_md5_hash(sk, skb))
cfb6eeb4
YH
1602 goto discard;
1603#endif
1604
1da177e4 1605 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
bdeab991 1606 sock_rps_save_rxhash(sk, skb);
aa8223c7 1607 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
cfb6eeb4 1608 rsk = sk;
1da177e4 1609 goto reset;
cfb6eeb4 1610 }
1da177e4
LT
1611 return 0;
1612 }
1613
ab6a5bb6 1614 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1da177e4
LT
1615 goto csum_err;
1616
1617 if (sk->sk_state == TCP_LISTEN) {
1618 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1619 if (!nsk)
1620 goto discard;
1621
1622 if (nsk != sk) {
bdeab991 1623 sock_rps_save_rxhash(nsk, skb);
cfb6eeb4
YH
1624 if (tcp_child_process(sk, nsk, skb)) {
1625 rsk = nsk;
1da177e4 1626 goto reset;
cfb6eeb4 1627 }
1da177e4
LT
1628 return 0;
1629 }
ca55158c 1630 } else
bdeab991 1631 sock_rps_save_rxhash(sk, skb);
ca55158c 1632
aa8223c7 1633 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
cfb6eeb4 1634 rsk = sk;
1da177e4 1635 goto reset;
cfb6eeb4 1636 }
1da177e4
LT
1637 return 0;
1638
1639reset:
cfb6eeb4 1640 tcp_v4_send_reset(rsk, skb);
1da177e4
LT
1641discard:
1642 kfree_skb(skb);
1643 /* Be careful here. If this function gets more complicated and
1644 * gcc suffers from register pressure on the x86, sk (in %ebx)
1645 * might be destroyed here. This current version compiles correctly,
1646 * but you have been warned.
1647 */
1648 return 0;
1649
1650csum_err:
63231bdd 1651 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1da177e4
LT
1652 goto discard;
1653}
4bc2f18b 1654EXPORT_SYMBOL(tcp_v4_do_rcv);
1da177e4 1655
160eb5a6 1656void tcp_v4_early_demux(struct sk_buff *skb)
41063e9d
DM
1657{
1658 struct net *net = dev_net(skb->dev);
1659 const struct iphdr *iph;
1660 const struct tcphdr *th;
fd62e09b 1661 struct net_device *dev;
41063e9d 1662 struct sock *sk;
41063e9d 1663
41063e9d 1664 if (skb->pkt_type != PACKET_HOST)
160eb5a6 1665 return;
41063e9d
DM
1666
1667 if (!pskb_may_pull(skb, ip_hdrlen(skb) + sizeof(struct tcphdr)))
160eb5a6 1668 return;
41063e9d
DM
1669
1670 iph = ip_hdr(skb);
1671 th = (struct tcphdr *) ((char *)iph + ip_hdrlen(skb));
1672
1673 if (th->doff < sizeof(struct tcphdr) / 4)
160eb5a6 1674 return;
41063e9d
DM
1675
1676 if (!pskb_may_pull(skb, ip_hdrlen(skb) + th->doff * 4))
160eb5a6 1677 return;
41063e9d 1678
fd62e09b 1679 dev = skb->dev;
41063e9d
DM
1680 sk = __inet_lookup_established(net, &tcp_hashinfo,
1681 iph->saddr, th->source,
7011d085 1682 iph->daddr, ntohs(th->dest),
fd62e09b 1683 dev->ifindex);
41063e9d
DM
1684 if (sk) {
1685 skb->sk = sk;
1686 skb->destructor = sock_edemux;
1687 if (sk->sk_state != TCP_TIME_WAIT) {
1688 struct dst_entry *dst = sk->sk_rx_dst;
1689 if (dst)
1690 dst = dst_check(dst, 0);
1691 if (dst) {
fd62e09b
DM
1692 struct rtable *rt = (struct rtable *) dst;
1693
160eb5a6 1694 if (rt->rt_iif == dev->ifindex)
fd62e09b 1695 skb_dst_set_noref(skb, dst);
41063e9d
DM
1696 }
1697 }
1698 }
41063e9d
DM
1699}
1700
1da177e4
LT
1701/*
1702 * From tcp_input.c
1703 */
1704
1705int tcp_v4_rcv(struct sk_buff *skb)
1706{
eddc9ec5 1707 const struct iphdr *iph;
cf533ea5 1708 const struct tcphdr *th;
1da177e4
LT
1709 struct sock *sk;
1710 int ret;
a86b1e30 1711 struct net *net = dev_net(skb->dev);
1da177e4
LT
1712
1713 if (skb->pkt_type != PACKET_HOST)
1714 goto discard_it;
1715
1716 /* Count it even if it's bad */
63231bdd 1717 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1da177e4
LT
1718
1719 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1720 goto discard_it;
1721
aa8223c7 1722 th = tcp_hdr(skb);
1da177e4
LT
1723
1724 if (th->doff < sizeof(struct tcphdr) / 4)
1725 goto bad_packet;
1726 if (!pskb_may_pull(skb, th->doff * 4))
1727 goto discard_it;
1728
1729 /* An explanation is required here, I think.
1730 * Packet length and doff are validated by header prediction,
caa20d9a 1731 * provided case of th->doff==0 is eliminated.
1da177e4 1732 * So, we defer the checks. */
60476372 1733 if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1da177e4
LT
1734 goto bad_packet;
1735
aa8223c7 1736 th = tcp_hdr(skb);
eddc9ec5 1737 iph = ip_hdr(skb);
1da177e4
LT
1738 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1739 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1740 skb->len - th->doff * 4);
1741 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1742 TCP_SKB_CB(skb)->when = 0;
b82d1bb4 1743 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1da177e4
LT
1744 TCP_SKB_CB(skb)->sacked = 0;
1745
9a1f27c4 1746 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1da177e4
LT
1747 if (!sk)
1748 goto no_tcp_socket;
1749
bb134d5d
ED
1750process:
1751 if (sk->sk_state == TCP_TIME_WAIT)
1752 goto do_time_wait;
1753
6cce09f8
ED
1754 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1755 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
d218d111 1756 goto discard_and_relse;
6cce09f8 1757 }
d218d111 1758
1da177e4
LT
1759 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1760 goto discard_and_relse;
b59c2701 1761 nf_reset(skb);
1da177e4 1762
fda9ef5d 1763 if (sk_filter(sk, skb))
1da177e4
LT
1764 goto discard_and_relse;
1765
1766 skb->dev = NULL;
1767
c6366184 1768 bh_lock_sock_nested(sk);
1da177e4
LT
1769 ret = 0;
1770 if (!sock_owned_by_user(sk)) {
1a2449a8
CL
1771#ifdef CONFIG_NET_DMA
1772 struct tcp_sock *tp = tcp_sk(sk);
1773 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
a2bd1140 1774 tp->ucopy.dma_chan = net_dma_find_channel();
1a2449a8 1775 if (tp->ucopy.dma_chan)
1da177e4 1776 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8
CL
1777 else
1778#endif
1779 {
1780 if (!tcp_prequeue(sk, skb))
ae8d7f88 1781 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8 1782 }
da882c1f
ED
1783 } else if (unlikely(sk_add_backlog(sk, skb,
1784 sk->sk_rcvbuf + sk->sk_sndbuf))) {
6b03a53a 1785 bh_unlock_sock(sk);
6cce09f8 1786 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
6b03a53a
ZY
1787 goto discard_and_relse;
1788 }
1da177e4
LT
1789 bh_unlock_sock(sk);
1790
1791 sock_put(sk);
1792
1793 return ret;
1794
1795no_tcp_socket:
1796 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1797 goto discard_it;
1798
1799 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1800bad_packet:
63231bdd 1801 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1da177e4 1802 } else {
cfb6eeb4 1803 tcp_v4_send_reset(NULL, skb);
1da177e4
LT
1804 }
1805
1806discard_it:
1807 /* Discard frame. */
1808 kfree_skb(skb);
e905a9ed 1809 return 0;
1da177e4
LT
1810
1811discard_and_relse:
1812 sock_put(sk);
1813 goto discard_it;
1814
1815do_time_wait:
1816 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
9469c7b4 1817 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1818 goto discard_it;
1819 }
1820
1821 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
63231bdd 1822 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
9469c7b4 1823 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1824 goto discard_it;
1825 }
9469c7b4 1826 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1da177e4 1827 case TCP_TW_SYN: {
c346dca1 1828 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
c67499c0 1829 &tcp_hashinfo,
eddc9ec5 1830 iph->daddr, th->dest,
463c84b9 1831 inet_iif(skb));
1da177e4 1832 if (sk2) {
9469c7b4
YH
1833 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1834 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1835 sk = sk2;
1836 goto process;
1837 }
1838 /* Fall through to ACK */
1839 }
1840 case TCP_TW_ACK:
1841 tcp_v4_timewait_ack(sk, skb);
1842 break;
1843 case TCP_TW_RST:
1844 goto no_tcp_socket;
1845 case TCP_TW_SUCCESS:;
1846 }
1847 goto discard_it;
1848}
1849
ccb7c410
DM
1850static struct timewait_sock_ops tcp_timewait_sock_ops = {
1851 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1852 .twsk_unique = tcp_twsk_unique,
1853 .twsk_destructor= tcp_twsk_destructor,
ccb7c410 1854};
1da177e4 1855
3b401a81 1856const struct inet_connection_sock_af_ops ipv4_specific = {
543d9cfe
ACM
1857 .queue_xmit = ip_queue_xmit,
1858 .send_check = tcp_v4_send_check,
1859 .rebuild_header = inet_sk_rebuild_header,
1860 .conn_request = tcp_v4_conn_request,
1861 .syn_recv_sock = tcp_v4_syn_recv_sock,
543d9cfe
ACM
1862 .net_header_len = sizeof(struct iphdr),
1863 .setsockopt = ip_setsockopt,
1864 .getsockopt = ip_getsockopt,
1865 .addr2sockaddr = inet_csk_addr2sockaddr,
1866 .sockaddr_len = sizeof(struct sockaddr_in),
ab1e0a13 1867 .bind_conflict = inet_csk_bind_conflict,
3fdadf7d 1868#ifdef CONFIG_COMPAT
543d9cfe
ACM
1869 .compat_setsockopt = compat_ip_setsockopt,
1870 .compat_getsockopt = compat_ip_getsockopt,
3fdadf7d 1871#endif
1da177e4 1872};
4bc2f18b 1873EXPORT_SYMBOL(ipv4_specific);
1da177e4 1874
cfb6eeb4 1875#ifdef CONFIG_TCP_MD5SIG
b2e4b3de 1876static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
cfb6eeb4 1877 .md5_lookup = tcp_v4_md5_lookup,
49a72dfb 1878 .calc_md5_hash = tcp_v4_md5_hash_skb,
cfb6eeb4 1879 .md5_parse = tcp_v4_parse_md5_keys,
cfb6eeb4 1880};
b6332e6c 1881#endif
cfb6eeb4 1882
1da177e4
LT
1883/* NOTE: A lot of things set to zero explicitly by call to
1884 * sk_alloc() so need not be done here.
1885 */
1886static int tcp_v4_init_sock(struct sock *sk)
1887{
6687e988 1888 struct inet_connection_sock *icsk = inet_csk(sk);
1da177e4 1889
900f65d3 1890 tcp_init_sock(sk);
1da177e4 1891
8292a17a 1892 icsk->icsk_af_ops = &ipv4_specific;
900f65d3 1893
cfb6eeb4 1894#ifdef CONFIG_TCP_MD5SIG
ac807fa8 1895 tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
cfb6eeb4 1896#endif
1da177e4 1897
1da177e4
LT
1898 return 0;
1899}
1900
7d06b2e0 1901void tcp_v4_destroy_sock(struct sock *sk)
1da177e4
LT
1902{
1903 struct tcp_sock *tp = tcp_sk(sk);
1904
1905 tcp_clear_xmit_timers(sk);
1906
6687e988 1907 tcp_cleanup_congestion_control(sk);
317a76f9 1908
1da177e4 1909 /* Cleanup up the write buffer. */
fe067e8a 1910 tcp_write_queue_purge(sk);
1da177e4
LT
1911
1912 /* Cleans up our, hopefully empty, out_of_order_queue. */
e905a9ed 1913 __skb_queue_purge(&tp->out_of_order_queue);
1da177e4 1914
cfb6eeb4
YH
1915#ifdef CONFIG_TCP_MD5SIG
1916 /* Clean up the MD5 key list, if any */
1917 if (tp->md5sig_info) {
a915da9b 1918 tcp_clear_md5_list(sk);
a8afca03 1919 kfree_rcu(tp->md5sig_info, rcu);
cfb6eeb4
YH
1920 tp->md5sig_info = NULL;
1921 }
1922#endif
1923
1a2449a8
CL
1924#ifdef CONFIG_NET_DMA
1925 /* Cleans up our sk_async_wait_queue */
e905a9ed 1926 __skb_queue_purge(&sk->sk_async_wait_queue);
1a2449a8
CL
1927#endif
1928
1da177e4
LT
1929 /* Clean prequeue, it must be empty really */
1930 __skb_queue_purge(&tp->ucopy.prequeue);
1931
1932 /* Clean up a referenced TCP bind bucket. */
463c84b9 1933 if (inet_csk(sk)->icsk_bind_hash)
ab1e0a13 1934 inet_put_port(sk);
1da177e4
LT
1935
1936 /*
1937 * If sendmsg cached page exists, toss it.
1938 */
1939 if (sk->sk_sndmsg_page) {
1940 __free_page(sk->sk_sndmsg_page);
1941 sk->sk_sndmsg_page = NULL;
1942 }
1943
435cf559
WAS
1944 /* TCP Cookie Transactions */
1945 if (tp->cookie_values != NULL) {
1946 kref_put(&tp->cookie_values->kref,
1947 tcp_cookie_values_release);
1948 tp->cookie_values = NULL;
1949 }
1950
180d8cd9 1951 sk_sockets_allocated_dec(sk);
d1a4c0b3 1952 sock_release_memcg(sk);
1da177e4 1953}
1da177e4
LT
1954EXPORT_SYMBOL(tcp_v4_destroy_sock);
1955
1956#ifdef CONFIG_PROC_FS
1957/* Proc filesystem TCP sock list dumping. */
1958
3ab5aee7 1959static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
1da177e4 1960{
3ab5aee7 1961 return hlist_nulls_empty(head) ? NULL :
8feaf0c0 1962 list_entry(head->first, struct inet_timewait_sock, tw_node);
1da177e4
LT
1963}
1964
8feaf0c0 1965static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1da177e4 1966{
3ab5aee7
ED
1967 return !is_a_nulls(tw->tw_node.next) ?
1968 hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1da177e4
LT
1969}
1970
a8b690f9
TH
1971/*
1972 * Get next listener socket follow cur. If cur is NULL, get first socket
1973 * starting from bucket given in st->bucket; when st->bucket is zero the
1974 * very first socket in the hash table is returned.
1975 */
1da177e4
LT
1976static void *listening_get_next(struct seq_file *seq, void *cur)
1977{
463c84b9 1978 struct inet_connection_sock *icsk;
c25eb3bf 1979 struct hlist_nulls_node *node;
1da177e4 1980 struct sock *sk = cur;
5caea4ea 1981 struct inet_listen_hashbucket *ilb;
5799de0b 1982 struct tcp_iter_state *st = seq->private;
a4146b1b 1983 struct net *net = seq_file_net(seq);
1da177e4
LT
1984
1985 if (!sk) {
a8b690f9 1986 ilb = &tcp_hashinfo.listening_hash[st->bucket];
5caea4ea 1987 spin_lock_bh(&ilb->lock);
c25eb3bf 1988 sk = sk_nulls_head(&ilb->head);
a8b690f9 1989 st->offset = 0;
1da177e4
LT
1990 goto get_sk;
1991 }
5caea4ea 1992 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1da177e4 1993 ++st->num;
a8b690f9 1994 ++st->offset;
1da177e4
LT
1995
1996 if (st->state == TCP_SEQ_STATE_OPENREQ) {
60236fdd 1997 struct request_sock *req = cur;
1da177e4 1998
72a3effa 1999 icsk = inet_csk(st->syn_wait_sk);
1da177e4
LT
2000 req = req->dl_next;
2001 while (1) {
2002 while (req) {
bdccc4ca 2003 if (req->rsk_ops->family == st->family) {
1da177e4
LT
2004 cur = req;
2005 goto out;
2006 }
2007 req = req->dl_next;
2008 }
72a3effa 2009 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
1da177e4
LT
2010 break;
2011get_req:
463c84b9 2012 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
1da177e4 2013 }
1bde5ac4 2014 sk = sk_nulls_next(st->syn_wait_sk);
1da177e4 2015 st->state = TCP_SEQ_STATE_LISTENING;
463c84b9 2016 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2017 } else {
e905a9ed 2018 icsk = inet_csk(sk);
463c84b9
ACM
2019 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2020 if (reqsk_queue_len(&icsk->icsk_accept_queue))
1da177e4 2021 goto start_req;
463c84b9 2022 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1bde5ac4 2023 sk = sk_nulls_next(sk);
1da177e4
LT
2024 }
2025get_sk:
c25eb3bf 2026 sk_nulls_for_each_from(sk, node) {
8475ef9f
PE
2027 if (!net_eq(sock_net(sk), net))
2028 continue;
2029 if (sk->sk_family == st->family) {
1da177e4
LT
2030 cur = sk;
2031 goto out;
2032 }
e905a9ed 2033 icsk = inet_csk(sk);
463c84b9
ACM
2034 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2035 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
1da177e4
LT
2036start_req:
2037 st->uid = sock_i_uid(sk);
2038 st->syn_wait_sk = sk;
2039 st->state = TCP_SEQ_STATE_OPENREQ;
2040 st->sbucket = 0;
2041 goto get_req;
2042 }
463c84b9 2043 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2044 }
5caea4ea 2045 spin_unlock_bh(&ilb->lock);
a8b690f9 2046 st->offset = 0;
0f7ff927 2047 if (++st->bucket < INET_LHTABLE_SIZE) {
5caea4ea
ED
2048 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2049 spin_lock_bh(&ilb->lock);
c25eb3bf 2050 sk = sk_nulls_head(&ilb->head);
1da177e4
LT
2051 goto get_sk;
2052 }
2053 cur = NULL;
2054out:
2055 return cur;
2056}
2057
2058static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2059{
a8b690f9
TH
2060 struct tcp_iter_state *st = seq->private;
2061 void *rc;
2062
2063 st->bucket = 0;
2064 st->offset = 0;
2065 rc = listening_get_next(seq, NULL);
1da177e4
LT
2066
2067 while (rc && *pos) {
2068 rc = listening_get_next(seq, rc);
2069 --*pos;
2070 }
2071 return rc;
2072}
2073
a2a385d6 2074static inline bool empty_bucket(struct tcp_iter_state *st)
6eac5604 2075{
3ab5aee7
ED
2076 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
2077 hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
6eac5604
AK
2078}
2079
a8b690f9
TH
2080/*
2081 * Get first established socket starting from bucket given in st->bucket.
2082 * If st->bucket is zero, the very first socket in the hash is returned.
2083 */
1da177e4
LT
2084static void *established_get_first(struct seq_file *seq)
2085{
5799de0b 2086 struct tcp_iter_state *st = seq->private;
a4146b1b 2087 struct net *net = seq_file_net(seq);
1da177e4
LT
2088 void *rc = NULL;
2089
a8b690f9
TH
2090 st->offset = 0;
2091 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
1da177e4 2092 struct sock *sk;
3ab5aee7 2093 struct hlist_nulls_node *node;
8feaf0c0 2094 struct inet_timewait_sock *tw;
9db66bdc 2095 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
1da177e4 2096
6eac5604
AK
2097 /* Lockless fast path for the common case of empty buckets */
2098 if (empty_bucket(st))
2099 continue;
2100
9db66bdc 2101 spin_lock_bh(lock);
3ab5aee7 2102 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
f40c8174 2103 if (sk->sk_family != st->family ||
878628fb 2104 !net_eq(sock_net(sk), net)) {
1da177e4
LT
2105 continue;
2106 }
2107 rc = sk;
2108 goto out;
2109 }
2110 st->state = TCP_SEQ_STATE_TIME_WAIT;
8feaf0c0 2111 inet_twsk_for_each(tw, node,
dbca9b27 2112 &tcp_hashinfo.ehash[st->bucket].twchain) {
28518fc1 2113 if (tw->tw_family != st->family ||
878628fb 2114 !net_eq(twsk_net(tw), net)) {
1da177e4
LT
2115 continue;
2116 }
2117 rc = tw;
2118 goto out;
2119 }
9db66bdc 2120 spin_unlock_bh(lock);
1da177e4
LT
2121 st->state = TCP_SEQ_STATE_ESTABLISHED;
2122 }
2123out:
2124 return rc;
2125}
2126
2127static void *established_get_next(struct seq_file *seq, void *cur)
2128{
2129 struct sock *sk = cur;
8feaf0c0 2130 struct inet_timewait_sock *tw;
3ab5aee7 2131 struct hlist_nulls_node *node;
5799de0b 2132 struct tcp_iter_state *st = seq->private;
a4146b1b 2133 struct net *net = seq_file_net(seq);
1da177e4
LT
2134
2135 ++st->num;
a8b690f9 2136 ++st->offset;
1da177e4
LT
2137
2138 if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2139 tw = cur;
2140 tw = tw_next(tw);
2141get_tw:
878628fb 2142 while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
1da177e4
LT
2143 tw = tw_next(tw);
2144 }
2145 if (tw) {
2146 cur = tw;
2147 goto out;
2148 }
9db66bdc 2149 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1da177e4
LT
2150 st->state = TCP_SEQ_STATE_ESTABLISHED;
2151
6eac5604 2152 /* Look for next non empty bucket */
a8b690f9 2153 st->offset = 0;
f373b53b 2154 while (++st->bucket <= tcp_hashinfo.ehash_mask &&
6eac5604
AK
2155 empty_bucket(st))
2156 ;
f373b53b 2157 if (st->bucket > tcp_hashinfo.ehash_mask)
6eac5604
AK
2158 return NULL;
2159
9db66bdc 2160 spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
3ab5aee7 2161 sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
1da177e4 2162 } else
3ab5aee7 2163 sk = sk_nulls_next(sk);
1da177e4 2164
3ab5aee7 2165 sk_nulls_for_each_from(sk, node) {
878628fb 2166 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
1da177e4
LT
2167 goto found;
2168 }
2169
2170 st->state = TCP_SEQ_STATE_TIME_WAIT;
dbca9b27 2171 tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
1da177e4
LT
2172 goto get_tw;
2173found:
2174 cur = sk;
2175out:
2176 return cur;
2177}
2178
2179static void *established_get_idx(struct seq_file *seq, loff_t pos)
2180{
a8b690f9
TH
2181 struct tcp_iter_state *st = seq->private;
2182 void *rc;
2183
2184 st->bucket = 0;
2185 rc = established_get_first(seq);
1da177e4
LT
2186
2187 while (rc && pos) {
2188 rc = established_get_next(seq, rc);
2189 --pos;
7174259e 2190 }
1da177e4
LT
2191 return rc;
2192}
2193
2194static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2195{
2196 void *rc;
5799de0b 2197 struct tcp_iter_state *st = seq->private;
1da177e4 2198
1da177e4
LT
2199 st->state = TCP_SEQ_STATE_LISTENING;
2200 rc = listening_get_idx(seq, &pos);
2201
2202 if (!rc) {
1da177e4
LT
2203 st->state = TCP_SEQ_STATE_ESTABLISHED;
2204 rc = established_get_idx(seq, pos);
2205 }
2206
2207 return rc;
2208}
2209
a8b690f9
TH
2210static void *tcp_seek_last_pos(struct seq_file *seq)
2211{
2212 struct tcp_iter_state *st = seq->private;
2213 int offset = st->offset;
2214 int orig_num = st->num;
2215 void *rc = NULL;
2216
2217 switch (st->state) {
2218 case TCP_SEQ_STATE_OPENREQ:
2219 case TCP_SEQ_STATE_LISTENING:
2220 if (st->bucket >= INET_LHTABLE_SIZE)
2221 break;
2222 st->state = TCP_SEQ_STATE_LISTENING;
2223 rc = listening_get_next(seq, NULL);
2224 while (offset-- && rc)
2225 rc = listening_get_next(seq, rc);
2226 if (rc)
2227 break;
2228 st->bucket = 0;
2229 /* Fallthrough */
2230 case TCP_SEQ_STATE_ESTABLISHED:
2231 case TCP_SEQ_STATE_TIME_WAIT:
2232 st->state = TCP_SEQ_STATE_ESTABLISHED;
2233 if (st->bucket > tcp_hashinfo.ehash_mask)
2234 break;
2235 rc = established_get_first(seq);
2236 while (offset-- && rc)
2237 rc = established_get_next(seq, rc);
2238 }
2239
2240 st->num = orig_num;
2241
2242 return rc;
2243}
2244
1da177e4
LT
2245static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2246{
5799de0b 2247 struct tcp_iter_state *st = seq->private;
a8b690f9
TH
2248 void *rc;
2249
2250 if (*pos && *pos == st->last_pos) {
2251 rc = tcp_seek_last_pos(seq);
2252 if (rc)
2253 goto out;
2254 }
2255
1da177e4
LT
2256 st->state = TCP_SEQ_STATE_LISTENING;
2257 st->num = 0;
a8b690f9
TH
2258 st->bucket = 0;
2259 st->offset = 0;
2260 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2261
2262out:
2263 st->last_pos = *pos;
2264 return rc;
1da177e4
LT
2265}
2266
2267static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2268{
a8b690f9 2269 struct tcp_iter_state *st = seq->private;
1da177e4 2270 void *rc = NULL;
1da177e4
LT
2271
2272 if (v == SEQ_START_TOKEN) {
2273 rc = tcp_get_idx(seq, 0);
2274 goto out;
2275 }
1da177e4
LT
2276
2277 switch (st->state) {
2278 case TCP_SEQ_STATE_OPENREQ:
2279 case TCP_SEQ_STATE_LISTENING:
2280 rc = listening_get_next(seq, v);
2281 if (!rc) {
1da177e4 2282 st->state = TCP_SEQ_STATE_ESTABLISHED;
a8b690f9
TH
2283 st->bucket = 0;
2284 st->offset = 0;
1da177e4
LT
2285 rc = established_get_first(seq);
2286 }
2287 break;
2288 case TCP_SEQ_STATE_ESTABLISHED:
2289 case TCP_SEQ_STATE_TIME_WAIT:
2290 rc = established_get_next(seq, v);
2291 break;
2292 }
2293out:
2294 ++*pos;
a8b690f9 2295 st->last_pos = *pos;
1da177e4
LT
2296 return rc;
2297}
2298
2299static void tcp_seq_stop(struct seq_file *seq, void *v)
2300{
5799de0b 2301 struct tcp_iter_state *st = seq->private;
1da177e4
LT
2302
2303 switch (st->state) {
2304 case TCP_SEQ_STATE_OPENREQ:
2305 if (v) {
463c84b9
ACM
2306 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2307 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4
LT
2308 }
2309 case TCP_SEQ_STATE_LISTENING:
2310 if (v != SEQ_START_TOKEN)
5caea4ea 2311 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
1da177e4
LT
2312 break;
2313 case TCP_SEQ_STATE_TIME_WAIT:
2314 case TCP_SEQ_STATE_ESTABLISHED:
2315 if (v)
9db66bdc 2316 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1da177e4
LT
2317 break;
2318 }
2319}
2320
73cb88ec 2321int tcp_seq_open(struct inode *inode, struct file *file)
1da177e4
LT
2322{
2323 struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
1da177e4 2324 struct tcp_iter_state *s;
52d6f3f1 2325 int err;
1da177e4 2326
52d6f3f1
DL
2327 err = seq_open_net(inode, file, &afinfo->seq_ops,
2328 sizeof(struct tcp_iter_state));
2329 if (err < 0)
2330 return err;
f40c8174 2331
52d6f3f1 2332 s = ((struct seq_file *)file->private_data)->private;
1da177e4 2333 s->family = afinfo->family;
a8b690f9 2334 s->last_pos = 0;
f40c8174
DL
2335 return 0;
2336}
73cb88ec 2337EXPORT_SYMBOL(tcp_seq_open);
f40c8174 2338
6f8b13bc 2339int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4
LT
2340{
2341 int rc = 0;
2342 struct proc_dir_entry *p;
2343
9427c4b3
DL
2344 afinfo->seq_ops.start = tcp_seq_start;
2345 afinfo->seq_ops.next = tcp_seq_next;
2346 afinfo->seq_ops.stop = tcp_seq_stop;
2347
84841c3c 2348 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
73cb88ec 2349 afinfo->seq_fops, afinfo);
84841c3c 2350 if (!p)
1da177e4
LT
2351 rc = -ENOMEM;
2352 return rc;
2353}
4bc2f18b 2354EXPORT_SYMBOL(tcp_proc_register);
1da177e4 2355
6f8b13bc 2356void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4 2357{
6f8b13bc 2358 proc_net_remove(net, afinfo->name);
1da177e4 2359}
4bc2f18b 2360EXPORT_SYMBOL(tcp_proc_unregister);
1da177e4 2361
cf533ea5 2362static void get_openreq4(const struct sock *sk, const struct request_sock *req,
5e659e4c 2363 struct seq_file *f, int i, int uid, int *len)
1da177e4 2364{
2e6599cb 2365 const struct inet_request_sock *ireq = inet_rsk(req);
1da177e4
LT
2366 int ttd = req->expires - jiffies;
2367
5e659e4c 2368 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
71338aa7 2369 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
1da177e4 2370 i,
2e6599cb 2371 ireq->loc_addr,
c720c7e8 2372 ntohs(inet_sk(sk)->inet_sport),
2e6599cb
ACM
2373 ireq->rmt_addr,
2374 ntohs(ireq->rmt_port),
1da177e4
LT
2375 TCP_SYN_RECV,
2376 0, 0, /* could print option size, but that is af dependent. */
2377 1, /* timers active (only the expire timer) */
2378 jiffies_to_clock_t(ttd),
2379 req->retrans,
2380 uid,
2381 0, /* non standard timer */
2382 0, /* open_requests have no inode */
2383 atomic_read(&sk->sk_refcnt),
5e659e4c
PE
2384 req,
2385 len);
1da177e4
LT
2386}
2387
5e659e4c 2388static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
1da177e4
LT
2389{
2390 int timer_active;
2391 unsigned long timer_expires;
cf533ea5 2392 const struct tcp_sock *tp = tcp_sk(sk);
cf4c6bf8 2393 const struct inet_connection_sock *icsk = inet_csk(sk);
cf533ea5 2394 const struct inet_sock *inet = inet_sk(sk);
c720c7e8
ED
2395 __be32 dest = inet->inet_daddr;
2396 __be32 src = inet->inet_rcv_saddr;
2397 __u16 destp = ntohs(inet->inet_dport);
2398 __u16 srcp = ntohs(inet->inet_sport);
49d09007 2399 int rx_queue;
1da177e4 2400
463c84b9 2401 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1da177e4 2402 timer_active = 1;
463c84b9
ACM
2403 timer_expires = icsk->icsk_timeout;
2404 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1da177e4 2405 timer_active = 4;
463c84b9 2406 timer_expires = icsk->icsk_timeout;
cf4c6bf8 2407 } else if (timer_pending(&sk->sk_timer)) {
1da177e4 2408 timer_active = 2;
cf4c6bf8 2409 timer_expires = sk->sk_timer.expires;
1da177e4
LT
2410 } else {
2411 timer_active = 0;
2412 timer_expires = jiffies;
2413 }
2414
49d09007
ED
2415 if (sk->sk_state == TCP_LISTEN)
2416 rx_queue = sk->sk_ack_backlog;
2417 else
2418 /*
2419 * because we dont lock socket, we might find a transient negative value
2420 */
2421 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2422
5e659e4c 2423 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
71338aa7 2424 "%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n",
cf4c6bf8 2425 i, src, srcp, dest, destp, sk->sk_state,
47da8ee6 2426 tp->write_seq - tp->snd_una,
49d09007 2427 rx_queue,
1da177e4
LT
2428 timer_active,
2429 jiffies_to_clock_t(timer_expires - jiffies),
463c84b9 2430 icsk->icsk_retransmits,
cf4c6bf8 2431 sock_i_uid(sk),
6687e988 2432 icsk->icsk_probes_out,
cf4c6bf8
IJ
2433 sock_i_ino(sk),
2434 atomic_read(&sk->sk_refcnt), sk,
7be87351
SH
2435 jiffies_to_clock_t(icsk->icsk_rto),
2436 jiffies_to_clock_t(icsk->icsk_ack.ato),
463c84b9 2437 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1da177e4 2438 tp->snd_cwnd,
0b6a05c1 2439 tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
5e659e4c 2440 len);
1da177e4
LT
2441}
2442
cf533ea5 2443static void get_timewait4_sock(const struct inet_timewait_sock *tw,
5e659e4c 2444 struct seq_file *f, int i, int *len)
1da177e4 2445{
23f33c2d 2446 __be32 dest, src;
1da177e4
LT
2447 __u16 destp, srcp;
2448 int ttd = tw->tw_ttd - jiffies;
2449
2450 if (ttd < 0)
2451 ttd = 0;
2452
2453 dest = tw->tw_daddr;
2454 src = tw->tw_rcv_saddr;
2455 destp = ntohs(tw->tw_dport);
2456 srcp = ntohs(tw->tw_sport);
2457
5e659e4c 2458 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
71338aa7 2459 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n",
1da177e4
LT
2460 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2461 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
5e659e4c 2462 atomic_read(&tw->tw_refcnt), tw, len);
1da177e4
LT
2463}
2464
2465#define TMPSZ 150
2466
2467static int tcp4_seq_show(struct seq_file *seq, void *v)
2468{
5799de0b 2469 struct tcp_iter_state *st;
5e659e4c 2470 int len;
1da177e4
LT
2471
2472 if (v == SEQ_START_TOKEN) {
2473 seq_printf(seq, "%-*s\n", TMPSZ - 1,
2474 " sl local_address rem_address st tx_queue "
2475 "rx_queue tr tm->when retrnsmt uid timeout "
2476 "inode");
2477 goto out;
2478 }
2479 st = seq->private;
2480
2481 switch (st->state) {
2482 case TCP_SEQ_STATE_LISTENING:
2483 case TCP_SEQ_STATE_ESTABLISHED:
5e659e4c 2484 get_tcp4_sock(v, seq, st->num, &len);
1da177e4
LT
2485 break;
2486 case TCP_SEQ_STATE_OPENREQ:
5e659e4c 2487 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
1da177e4
LT
2488 break;
2489 case TCP_SEQ_STATE_TIME_WAIT:
5e659e4c 2490 get_timewait4_sock(v, seq, st->num, &len);
1da177e4
LT
2491 break;
2492 }
5e659e4c 2493 seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
1da177e4
LT
2494out:
2495 return 0;
2496}
2497
73cb88ec
AV
2498static const struct file_operations tcp_afinfo_seq_fops = {
2499 .owner = THIS_MODULE,
2500 .open = tcp_seq_open,
2501 .read = seq_read,
2502 .llseek = seq_lseek,
2503 .release = seq_release_net
2504};
2505
1da177e4 2506static struct tcp_seq_afinfo tcp4_seq_afinfo = {
1da177e4
LT
2507 .name = "tcp",
2508 .family = AF_INET,
73cb88ec 2509 .seq_fops = &tcp_afinfo_seq_fops,
9427c4b3
DL
2510 .seq_ops = {
2511 .show = tcp4_seq_show,
2512 },
1da177e4
LT
2513};
2514
2c8c1e72 2515static int __net_init tcp4_proc_init_net(struct net *net)
757764f6
PE
2516{
2517 return tcp_proc_register(net, &tcp4_seq_afinfo);
2518}
2519
2c8c1e72 2520static void __net_exit tcp4_proc_exit_net(struct net *net)
757764f6
PE
2521{
2522 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2523}
2524
2525static struct pernet_operations tcp4_net_ops = {
2526 .init = tcp4_proc_init_net,
2527 .exit = tcp4_proc_exit_net,
2528};
2529
1da177e4
LT
2530int __init tcp4_proc_init(void)
2531{
757764f6 2532 return register_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2533}
2534
2535void tcp4_proc_exit(void)
2536{
757764f6 2537 unregister_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2538}
2539#endif /* CONFIG_PROC_FS */
2540
bf296b12
HX
2541struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2542{
b71d1d42 2543 const struct iphdr *iph = skb_gro_network_header(skb);
bf296b12
HX
2544
2545 switch (skb->ip_summed) {
2546 case CHECKSUM_COMPLETE:
86911732 2547 if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
bf296b12
HX
2548 skb->csum)) {
2549 skb->ip_summed = CHECKSUM_UNNECESSARY;
2550 break;
2551 }
2552
2553 /* fall through */
2554 case CHECKSUM_NONE:
2555 NAPI_GRO_CB(skb)->flush = 1;
2556 return NULL;
2557 }
2558
2559 return tcp_gro_receive(head, skb);
2560}
bf296b12
HX
2561
2562int tcp4_gro_complete(struct sk_buff *skb)
2563{
b71d1d42 2564 const struct iphdr *iph = ip_hdr(skb);
bf296b12
HX
2565 struct tcphdr *th = tcp_hdr(skb);
2566
2567 th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2568 iph->saddr, iph->daddr, 0);
2569 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2570
2571 return tcp_gro_complete(skb);
2572}
bf296b12 2573
1da177e4
LT
2574struct proto tcp_prot = {
2575 .name = "TCP",
2576 .owner = THIS_MODULE,
2577 .close = tcp_close,
2578 .connect = tcp_v4_connect,
2579 .disconnect = tcp_disconnect,
463c84b9 2580 .accept = inet_csk_accept,
1da177e4
LT
2581 .ioctl = tcp_ioctl,
2582 .init = tcp_v4_init_sock,
2583 .destroy = tcp_v4_destroy_sock,
2584 .shutdown = tcp_shutdown,
2585 .setsockopt = tcp_setsockopt,
2586 .getsockopt = tcp_getsockopt,
1da177e4 2587 .recvmsg = tcp_recvmsg,
7ba42910
CG
2588 .sendmsg = tcp_sendmsg,
2589 .sendpage = tcp_sendpage,
1da177e4 2590 .backlog_rcv = tcp_v4_do_rcv,
46d3ceab 2591 .release_cb = tcp_release_cb,
ab1e0a13
ACM
2592 .hash = inet_hash,
2593 .unhash = inet_unhash,
2594 .get_port = inet_csk_get_port,
1da177e4
LT
2595 .enter_memory_pressure = tcp_enter_memory_pressure,
2596 .sockets_allocated = &tcp_sockets_allocated,
0a5578cf 2597 .orphan_count = &tcp_orphan_count,
1da177e4
LT
2598 .memory_allocated = &tcp_memory_allocated,
2599 .memory_pressure = &tcp_memory_pressure,
1da177e4
LT
2600 .sysctl_wmem = sysctl_tcp_wmem,
2601 .sysctl_rmem = sysctl_tcp_rmem,
2602 .max_header = MAX_TCP_HEADER,
2603 .obj_size = sizeof(struct tcp_sock),
3ab5aee7 2604 .slab_flags = SLAB_DESTROY_BY_RCU,
6d6ee43e 2605 .twsk_prot = &tcp_timewait_sock_ops,
60236fdd 2606 .rsk_prot = &tcp_request_sock_ops,
39d8cda7 2607 .h.hashinfo = &tcp_hashinfo,
7ba42910 2608 .no_autobind = true,
543d9cfe
ACM
2609#ifdef CONFIG_COMPAT
2610 .compat_setsockopt = compat_tcp_setsockopt,
2611 .compat_getsockopt = compat_tcp_getsockopt,
2612#endif
d1a4c0b3
GC
2613#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
2614 .init_cgroup = tcp_init_cgroup,
2615 .destroy_cgroup = tcp_destroy_cgroup,
2616 .proto_cgroup = tcp_proto_cgroup,
2617#endif
1da177e4 2618};
4bc2f18b 2619EXPORT_SYMBOL(tcp_prot);
1da177e4 2620
046ee902
DL
2621static int __net_init tcp_sk_init(struct net *net)
2622{
2623 return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2624 PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2625}
2626
2627static void __net_exit tcp_sk_exit(struct net *net)
2628{
2629 inet_ctl_sock_destroy(net->ipv4.tcp_sock);
b099ce26
EB
2630}
2631
2632static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2633{
2634 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
046ee902
DL
2635}
2636
2637static struct pernet_operations __net_initdata tcp_sk_ops = {
b099ce26
EB
2638 .init = tcp_sk_init,
2639 .exit = tcp_sk_exit,
2640 .exit_batch = tcp_sk_exit_batch,
046ee902
DL
2641};
2642
9b0f976f 2643void __init tcp_v4_init(void)
1da177e4 2644{
5caea4ea 2645 inet_hashinfo_init(&tcp_hashinfo);
6a1b3054 2646 if (register_pernet_subsys(&tcp_sk_ops))
1da177e4 2647 panic("Failed to create the TCP control socket.\n");
1da177e4 2648}
This page took 1.324423 seconds and 5 git commands to generate.