net: ipv4: Standardize prefixes for message logging
[deliverable/linux.git] / net / ipv4 / tcp_ipv4.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
1da177e4
LT
8 * IPv4 specific functions
9 *
10 *
11 * code split from:
12 * linux/ipv4/tcp.c
13 * linux/ipv4/tcp_input.c
14 * linux/ipv4/tcp_output.c
15 *
16 * See tcp.c for author information
17 *
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation; either version
21 * 2 of the License, or (at your option) any later version.
22 */
23
24/*
25 * Changes:
26 * David S. Miller : New socket lookup architecture.
27 * This code is dedicated to John Dyson.
28 * David S. Miller : Change semantics of established hash,
29 * half is devoted to TIME_WAIT sockets
30 * and the rest go in the other half.
31 * Andi Kleen : Add support for syncookies and fixed
32 * some bugs: ip options weren't passed to
33 * the TCP layer, missed a check for an
34 * ACK bit.
35 * Andi Kleen : Implemented fast path mtu discovery.
36 * Fixed many serious bugs in the
60236fdd 37 * request_sock handling and moved
1da177e4
LT
38 * most of it into the af independent code.
39 * Added tail drop and some other bugfixes.
caa20d9a 40 * Added new listen semantics.
1da177e4
LT
41 * Mike McLagan : Routing by source
42 * Juan Jose Ciarlante: ip_dynaddr bits
43 * Andi Kleen: various fixes.
44 * Vitaly E. Lavrov : Transparent proxy revived after year
45 * coma.
46 * Andi Kleen : Fix new listen.
47 * Andi Kleen : Fix accept error reporting.
48 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
49 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
50 * a single port at the same time.
51 */
52
afd46503 53#define pr_fmt(fmt) "TCP: " fmt
1da177e4 54
eb4dea58 55#include <linux/bottom_half.h>
1da177e4
LT
56#include <linux/types.h>
57#include <linux/fcntl.h>
58#include <linux/module.h>
59#include <linux/random.h>
60#include <linux/cache.h>
61#include <linux/jhash.h>
62#include <linux/init.h>
63#include <linux/times.h>
5a0e3ad6 64#include <linux/slab.h>
1da177e4 65
457c4cbc 66#include <net/net_namespace.h>
1da177e4 67#include <net/icmp.h>
304a1618 68#include <net/inet_hashtables.h>
1da177e4 69#include <net/tcp.h>
20380731 70#include <net/transp_v6.h>
1da177e4
LT
71#include <net/ipv6.h>
72#include <net/inet_common.h>
6d6ee43e 73#include <net/timewait_sock.h>
1da177e4 74#include <net/xfrm.h>
1a2449a8 75#include <net/netdma.h>
6e5714ea 76#include <net/secure_seq.h>
d1a4c0b3 77#include <net/tcp_memcontrol.h>
1da177e4
LT
78
79#include <linux/inet.h>
80#include <linux/ipv6.h>
81#include <linux/stddef.h>
82#include <linux/proc_fs.h>
83#include <linux/seq_file.h>
84
cfb6eeb4
YH
85#include <linux/crypto.h>
86#include <linux/scatterlist.h>
87
ab32ea5d
BH
88int sysctl_tcp_tw_reuse __read_mostly;
89int sysctl_tcp_low_latency __read_mostly;
4bc2f18b 90EXPORT_SYMBOL(sysctl_tcp_low_latency);
1da177e4 91
1da177e4 92
cfb6eeb4 93#ifdef CONFIG_TCP_MD5SIG
a915da9b 94static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
318cf7aa 95 __be32 daddr, __be32 saddr, const struct tcphdr *th);
cfb6eeb4
YH
96#endif
97
5caea4ea 98struct inet_hashinfo tcp_hashinfo;
4bc2f18b 99EXPORT_SYMBOL(tcp_hashinfo);
1da177e4 100
cf533ea5 101static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
1da177e4 102{
eddc9ec5
ACM
103 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
104 ip_hdr(skb)->saddr,
aa8223c7
ACM
105 tcp_hdr(skb)->dest,
106 tcp_hdr(skb)->source);
1da177e4
LT
107}
108
6d6ee43e
ACM
109int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
110{
111 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
112 struct tcp_sock *tp = tcp_sk(sk);
113
114 /* With PAWS, it is safe from the viewpoint
115 of data integrity. Even without PAWS it is safe provided sequence
116 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
117
118 Actually, the idea is close to VJ's one, only timestamp cache is
119 held not per host, but per port pair and TW bucket is used as state
120 holder.
121
122 If TW bucket has been already destroyed we fall back to VJ's scheme
123 and use initial timestamp retrieved from peer table.
124 */
125 if (tcptw->tw_ts_recent_stamp &&
126 (twp == NULL || (sysctl_tcp_tw_reuse &&
9d729f72 127 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
6d6ee43e
ACM
128 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
129 if (tp->write_seq == 0)
130 tp->write_seq = 1;
131 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
132 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
133 sock_hold(sktw);
134 return 1;
135 }
136
137 return 0;
138}
6d6ee43e
ACM
139EXPORT_SYMBOL_GPL(tcp_twsk_unique);
140
1da177e4
LT
141/* This will initiate an outgoing connection. */
142int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
143{
2d7192d6 144 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
1da177e4
LT
145 struct inet_sock *inet = inet_sk(sk);
146 struct tcp_sock *tp = tcp_sk(sk);
dca8b089 147 __be16 orig_sport, orig_dport;
bada8adc 148 __be32 daddr, nexthop;
da905bd1 149 struct flowi4 *fl4;
2d7192d6 150 struct rtable *rt;
1da177e4 151 int err;
f6d8bd05 152 struct ip_options_rcu *inet_opt;
1da177e4
LT
153
154 if (addr_len < sizeof(struct sockaddr_in))
155 return -EINVAL;
156
157 if (usin->sin_family != AF_INET)
158 return -EAFNOSUPPORT;
159
160 nexthop = daddr = usin->sin_addr.s_addr;
f6d8bd05
ED
161 inet_opt = rcu_dereference_protected(inet->inet_opt,
162 sock_owned_by_user(sk));
163 if (inet_opt && inet_opt->opt.srr) {
1da177e4
LT
164 if (!daddr)
165 return -EINVAL;
f6d8bd05 166 nexthop = inet_opt->opt.faddr;
1da177e4
LT
167 }
168
dca8b089
DM
169 orig_sport = inet->inet_sport;
170 orig_dport = usin->sin_port;
da905bd1
DM
171 fl4 = &inet->cork.fl.u.ip4;
172 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
b23dd4fe
DM
173 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
174 IPPROTO_TCP,
175 orig_sport, orig_dport, sk, true);
176 if (IS_ERR(rt)) {
177 err = PTR_ERR(rt);
178 if (err == -ENETUNREACH)
7c73a6fa 179 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
b23dd4fe 180 return err;
584bdf8c 181 }
1da177e4
LT
182
183 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
184 ip_rt_put(rt);
185 return -ENETUNREACH;
186 }
187
f6d8bd05 188 if (!inet_opt || !inet_opt->opt.srr)
da905bd1 189 daddr = fl4->daddr;
1da177e4 190
c720c7e8 191 if (!inet->inet_saddr)
da905bd1 192 inet->inet_saddr = fl4->saddr;
c720c7e8 193 inet->inet_rcv_saddr = inet->inet_saddr;
1da177e4 194
c720c7e8 195 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
1da177e4
LT
196 /* Reset inherited state */
197 tp->rx_opt.ts_recent = 0;
198 tp->rx_opt.ts_recent_stamp = 0;
199 tp->write_seq = 0;
200 }
201
295ff7ed 202 if (tcp_death_row.sysctl_tw_recycle &&
da905bd1 203 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) {
ed2361e6 204 struct inet_peer *peer = rt_get_peer(rt, fl4->daddr);
7174259e
ACM
205 /*
206 * VJ's idea. We save last timestamp seen from
207 * the destination in peer table, when entering state
208 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
209 * when trying new connection.
1da177e4 210 */
317fe0e6
ED
211 if (peer) {
212 inet_peer_refcheck(peer);
213 if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
214 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
215 tp->rx_opt.ts_recent = peer->tcp_ts;
216 }
1da177e4
LT
217 }
218 }
219
c720c7e8
ED
220 inet->inet_dport = usin->sin_port;
221 inet->inet_daddr = daddr;
1da177e4 222
d83d8461 223 inet_csk(sk)->icsk_ext_hdr_len = 0;
f6d8bd05
ED
224 if (inet_opt)
225 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1da177e4 226
bee7ca9e 227 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
1da177e4
LT
228
229 /* Socket identity is still unknown (sport may be zero).
230 * However we set state to SYN-SENT and not releasing socket
231 * lock select source port, enter ourselves into the hash tables and
232 * complete initialization after this.
233 */
234 tcp_set_state(sk, TCP_SYN_SENT);
a7f5e7f1 235 err = inet_hash_connect(&tcp_death_row, sk);
1da177e4
LT
236 if (err)
237 goto failure;
238
da905bd1 239 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
b23dd4fe
DM
240 inet->inet_sport, inet->inet_dport, sk);
241 if (IS_ERR(rt)) {
242 err = PTR_ERR(rt);
243 rt = NULL;
1da177e4 244 goto failure;
b23dd4fe 245 }
1da177e4 246 /* OK, now commit destination to socket. */
bcd76111 247 sk->sk_gso_type = SKB_GSO_TCPV4;
d8d1f30b 248 sk_setup_caps(sk, &rt->dst);
1da177e4
LT
249
250 if (!tp->write_seq)
c720c7e8
ED
251 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
252 inet->inet_daddr,
253 inet->inet_sport,
1da177e4
LT
254 usin->sin_port);
255
c720c7e8 256 inet->inet_id = tp->write_seq ^ jiffies;
1da177e4
LT
257
258 err = tcp_connect(sk);
259 rt = NULL;
260 if (err)
261 goto failure;
262
263 return 0;
264
265failure:
7174259e
ACM
266 /*
267 * This unhashes the socket and releases the local port,
268 * if necessary.
269 */
1da177e4
LT
270 tcp_set_state(sk, TCP_CLOSE);
271 ip_rt_put(rt);
272 sk->sk_route_caps = 0;
c720c7e8 273 inet->inet_dport = 0;
1da177e4
LT
274 return err;
275}
4bc2f18b 276EXPORT_SYMBOL(tcp_v4_connect);
1da177e4 277
1da177e4
LT
278/*
279 * This routine does path mtu discovery as defined in RFC1191.
280 */
b71d1d42 281static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu)
1da177e4
LT
282{
283 struct dst_entry *dst;
284 struct inet_sock *inet = inet_sk(sk);
1da177e4
LT
285
286 /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
287 * send out by Linux are always <576bytes so they should go through
288 * unfragmented).
289 */
290 if (sk->sk_state == TCP_LISTEN)
291 return;
292
293 /* We don't check in the destentry if pmtu discovery is forbidden
294 * on this route. We just assume that no packet_to_big packets
295 * are send back when pmtu discovery is not active.
e905a9ed 296 * There is a small race when the user changes this flag in the
1da177e4
LT
297 * route, but I think that's acceptable.
298 */
299 if ((dst = __sk_dst_check(sk, 0)) == NULL)
300 return;
301
302 dst->ops->update_pmtu(dst, mtu);
303
304 /* Something is about to be wrong... Remember soft error
305 * for the case, if this connection will not able to recover.
306 */
307 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
308 sk->sk_err_soft = EMSGSIZE;
309
310 mtu = dst_mtu(dst);
311
312 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
d83d8461 313 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
1da177e4
LT
314 tcp_sync_mss(sk, mtu);
315
316 /* Resend the TCP packet because it's
317 * clear that the old packet has been
318 * dropped. This is the new "fast" path mtu
319 * discovery.
320 */
321 tcp_simple_retransmit(sk);
322 } /* else let the usual retransmit timer handle it */
323}
324
325/*
326 * This routine is called by the ICMP module when it gets some
327 * sort of error condition. If err < 0 then the socket should
328 * be closed and the error returned to the user. If err > 0
329 * it's just the icmp type << 8 | icmp code. After adjustment
330 * header points to the first 8 bytes of the tcp header. We need
331 * to find the appropriate port.
332 *
333 * The locking strategy used here is very "optimistic". When
334 * someone else accesses the socket the ICMP is just dropped
335 * and for some paths there is no check at all.
336 * A more general error queue to queue errors for later handling
337 * is probably better.
338 *
339 */
340
4d1a2d9e 341void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
1da177e4 342{
b71d1d42 343 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
4d1a2d9e 344 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
f1ecd5d9 345 struct inet_connection_sock *icsk;
1da177e4
LT
346 struct tcp_sock *tp;
347 struct inet_sock *inet;
4d1a2d9e
DL
348 const int type = icmp_hdr(icmp_skb)->type;
349 const int code = icmp_hdr(icmp_skb)->code;
1da177e4 350 struct sock *sk;
f1ecd5d9 351 struct sk_buff *skb;
1da177e4 352 __u32 seq;
f1ecd5d9 353 __u32 remaining;
1da177e4 354 int err;
4d1a2d9e 355 struct net *net = dev_net(icmp_skb->dev);
1da177e4 356
4d1a2d9e 357 if (icmp_skb->len < (iph->ihl << 2) + 8) {
dcfc23ca 358 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
359 return;
360 }
361
fd54d716 362 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
4d1a2d9e 363 iph->saddr, th->source, inet_iif(icmp_skb));
1da177e4 364 if (!sk) {
dcfc23ca 365 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
366 return;
367 }
368 if (sk->sk_state == TCP_TIME_WAIT) {
9469c7b4 369 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
370 return;
371 }
372
373 bh_lock_sock(sk);
374 /* If too many ICMPs get dropped on busy
375 * servers this needs to be solved differently.
376 */
377 if (sock_owned_by_user(sk))
de0744af 378 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
1da177e4
LT
379
380 if (sk->sk_state == TCP_CLOSE)
381 goto out;
382
97e3ecd1 383 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
384 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
385 goto out;
386 }
387
f1ecd5d9 388 icsk = inet_csk(sk);
1da177e4
LT
389 tp = tcp_sk(sk);
390 seq = ntohl(th->seq);
391 if (sk->sk_state != TCP_LISTEN &&
392 !between(seq, tp->snd_una, tp->snd_nxt)) {
de0744af 393 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
394 goto out;
395 }
396
397 switch (type) {
398 case ICMP_SOURCE_QUENCH:
399 /* Just silently ignore these. */
400 goto out;
401 case ICMP_PARAMETERPROB:
402 err = EPROTO;
403 break;
404 case ICMP_DEST_UNREACH:
405 if (code > NR_ICMP_UNREACH)
406 goto out;
407
408 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
409 if (!sock_owned_by_user(sk))
410 do_pmtu_discovery(sk, iph, info);
411 goto out;
412 }
413
414 err = icmp_err_convert[code].errno;
f1ecd5d9
DL
415 /* check if icmp_skb allows revert of backoff
416 * (see draft-zimmermann-tcp-lcd) */
417 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
418 break;
419 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
420 !icsk->icsk_backoff)
421 break;
422
8f49c270
DM
423 if (sock_owned_by_user(sk))
424 break;
425
f1ecd5d9 426 icsk->icsk_backoff--;
9ad7c049
JC
427 inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
428 TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
f1ecd5d9
DL
429 tcp_bound_rto(sk);
430
431 skb = tcp_write_queue_head(sk);
432 BUG_ON(!skb);
433
434 remaining = icsk->icsk_rto - min(icsk->icsk_rto,
435 tcp_time_stamp - TCP_SKB_CB(skb)->when);
436
437 if (remaining) {
438 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
439 remaining, TCP_RTO_MAX);
f1ecd5d9
DL
440 } else {
441 /* RTO revert clocked out retransmission.
442 * Will retransmit now */
443 tcp_retransmit_timer(sk);
444 }
445
1da177e4
LT
446 break;
447 case ICMP_TIME_EXCEEDED:
448 err = EHOSTUNREACH;
449 break;
450 default:
451 goto out;
452 }
453
454 switch (sk->sk_state) {
60236fdd 455 struct request_sock *req, **prev;
1da177e4
LT
456 case TCP_LISTEN:
457 if (sock_owned_by_user(sk))
458 goto out;
459
463c84b9
ACM
460 req = inet_csk_search_req(sk, &prev, th->dest,
461 iph->daddr, iph->saddr);
1da177e4
LT
462 if (!req)
463 goto out;
464
465 /* ICMPs are not backlogged, hence we cannot get
466 an established socket here.
467 */
547b792c 468 WARN_ON(req->sk);
1da177e4 469
2e6599cb 470 if (seq != tcp_rsk(req)->snt_isn) {
de0744af 471 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
472 goto out;
473 }
474
475 /*
476 * Still in SYN_RECV, just remove it silently.
477 * There is no good way to pass the error to the newly
478 * created socket, and POSIX does not want network
479 * errors returned from accept().
480 */
463c84b9 481 inet_csk_reqsk_queue_drop(sk, req, prev);
1da177e4
LT
482 goto out;
483
484 case TCP_SYN_SENT:
485 case TCP_SYN_RECV: /* Cannot happen.
486 It can f.e. if SYNs crossed.
487 */
488 if (!sock_owned_by_user(sk)) {
1da177e4
LT
489 sk->sk_err = err;
490
491 sk->sk_error_report(sk);
492
493 tcp_done(sk);
494 } else {
495 sk->sk_err_soft = err;
496 }
497 goto out;
498 }
499
500 /* If we've already connected we will keep trying
501 * until we time out, or the user gives up.
502 *
503 * rfc1122 4.2.3.9 allows to consider as hard errors
504 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
505 * but it is obsoleted by pmtu discovery).
506 *
507 * Note, that in modern internet, where routing is unreliable
508 * and in each dark corner broken firewalls sit, sending random
509 * errors ordered by their masters even this two messages finally lose
510 * their original sense (even Linux sends invalid PORT_UNREACHs)
511 *
512 * Now we are in compliance with RFCs.
513 * --ANK (980905)
514 */
515
516 inet = inet_sk(sk);
517 if (!sock_owned_by_user(sk) && inet->recverr) {
518 sk->sk_err = err;
519 sk->sk_error_report(sk);
520 } else { /* Only an error on timeout */
521 sk->sk_err_soft = err;
522 }
523
524out:
525 bh_unlock_sock(sk);
526 sock_put(sk);
527}
528
419f9f89
HX
529static void __tcp_v4_send_check(struct sk_buff *skb,
530 __be32 saddr, __be32 daddr)
1da177e4 531{
aa8223c7 532 struct tcphdr *th = tcp_hdr(skb);
1da177e4 533
84fa7933 534 if (skb->ip_summed == CHECKSUM_PARTIAL) {
419f9f89 535 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
663ead3b 536 skb->csum_start = skb_transport_header(skb) - skb->head;
ff1dcadb 537 skb->csum_offset = offsetof(struct tcphdr, check);
1da177e4 538 } else {
419f9f89 539 th->check = tcp_v4_check(skb->len, saddr, daddr,
07f0757a 540 csum_partial(th,
1da177e4
LT
541 th->doff << 2,
542 skb->csum));
543 }
544}
545
419f9f89 546/* This routine computes an IPv4 TCP checksum. */
bb296246 547void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
419f9f89 548{
cf533ea5 549 const struct inet_sock *inet = inet_sk(sk);
419f9f89
HX
550
551 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
552}
4bc2f18b 553EXPORT_SYMBOL(tcp_v4_send_check);
419f9f89 554
a430a43d
HX
555int tcp_v4_gso_send_check(struct sk_buff *skb)
556{
eddc9ec5 557 const struct iphdr *iph;
a430a43d
HX
558 struct tcphdr *th;
559
560 if (!pskb_may_pull(skb, sizeof(*th)))
561 return -EINVAL;
562
eddc9ec5 563 iph = ip_hdr(skb);
aa8223c7 564 th = tcp_hdr(skb);
a430a43d
HX
565
566 th->check = 0;
84fa7933 567 skb->ip_summed = CHECKSUM_PARTIAL;
419f9f89 568 __tcp_v4_send_check(skb, iph->saddr, iph->daddr);
a430a43d
HX
569 return 0;
570}
571
1da177e4
LT
572/*
573 * This routine will send an RST to the other tcp.
574 *
575 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
576 * for reset.
577 * Answer: if a packet caused RST, it is not for a socket
578 * existing in our system, if it is matched to a socket,
579 * it is just duplicate segment or bug in other side's TCP.
580 * So that we build reply only basing on parameters
581 * arrived with segment.
582 * Exception: precedence violation. We do not implement it in any case.
583 */
584
cfb6eeb4 585static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
1da177e4 586{
cf533ea5 587 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
588 struct {
589 struct tcphdr th;
590#ifdef CONFIG_TCP_MD5SIG
714e85be 591 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
cfb6eeb4
YH
592#endif
593 } rep;
1da177e4 594 struct ip_reply_arg arg;
cfb6eeb4
YH
595#ifdef CONFIG_TCP_MD5SIG
596 struct tcp_md5sig_key *key;
658ddaaf
SL
597 const __u8 *hash_location = NULL;
598 unsigned char newhash[16];
599 int genhash;
600 struct sock *sk1 = NULL;
cfb6eeb4 601#endif
a86b1e30 602 struct net *net;
1da177e4
LT
603
604 /* Never send a reset in response to a reset. */
605 if (th->rst)
606 return;
607
511c3f92 608 if (skb_rtable(skb)->rt_type != RTN_LOCAL)
1da177e4
LT
609 return;
610
611 /* Swap the send and the receive. */
cfb6eeb4
YH
612 memset(&rep, 0, sizeof(rep));
613 rep.th.dest = th->source;
614 rep.th.source = th->dest;
615 rep.th.doff = sizeof(struct tcphdr) / 4;
616 rep.th.rst = 1;
1da177e4
LT
617
618 if (th->ack) {
cfb6eeb4 619 rep.th.seq = th->ack_seq;
1da177e4 620 } else {
cfb6eeb4
YH
621 rep.th.ack = 1;
622 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
623 skb->len - (th->doff << 2));
1da177e4
LT
624 }
625
7174259e 626 memset(&arg, 0, sizeof(arg));
cfb6eeb4
YH
627 arg.iov[0].iov_base = (unsigned char *)&rep;
628 arg.iov[0].iov_len = sizeof(rep.th);
629
630#ifdef CONFIG_TCP_MD5SIG
658ddaaf
SL
631 hash_location = tcp_parse_md5sig_option(th);
632 if (!sk && hash_location) {
633 /*
634 * active side is lost. Try to find listening socket through
635 * source port, and then find md5 key through listening socket.
636 * we are not loose security here:
637 * Incoming packet is checked with md5 hash with finding key,
638 * no RST generated if md5 hash doesn't match.
639 */
640 sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
641 &tcp_hashinfo, ip_hdr(skb)->daddr,
642 ntohs(th->source), inet_iif(skb));
643 /* don't send rst if it can't find key */
644 if (!sk1)
645 return;
646 rcu_read_lock();
647 key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
648 &ip_hdr(skb)->saddr, AF_INET);
649 if (!key)
650 goto release_sk1;
651
652 genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
653 if (genhash || memcmp(hash_location, newhash, 16) != 0)
654 goto release_sk1;
655 } else {
656 key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
657 &ip_hdr(skb)->saddr,
658 AF_INET) : NULL;
659 }
660
cfb6eeb4
YH
661 if (key) {
662 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
663 (TCPOPT_NOP << 16) |
664 (TCPOPT_MD5SIG << 8) |
665 TCPOLEN_MD5SIG);
666 /* Update length and the length the header thinks exists */
667 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
668 rep.th.doff = arg.iov[0].iov_len / 4;
669
49a72dfb 670 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
78e645cb
IJ
671 key, ip_hdr(skb)->saddr,
672 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
673 }
674#endif
eddc9ec5
ACM
675 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
676 ip_hdr(skb)->saddr, /* XXX */
52cd5750 677 arg.iov[0].iov_len, IPPROTO_TCP, 0);
1da177e4 678 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
88ef4a5a 679 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
e2446eaa
SL
680 /* When socket is gone, all binding information is lost.
681 * routing might fail in this case. using iif for oif to
682 * make sure we can deliver it
683 */
684 arg.bound_dev_if = sk ? sk->sk_bound_dev_if : inet_iif(skb);
1da177e4 685
adf30907 686 net = dev_net(skb_dst(skb)->dev);
66b13d99 687 arg.tos = ip_hdr(skb)->tos;
0a5ebb80 688 ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
7feb49c8 689 &arg, arg.iov[0].iov_len);
1da177e4 690
63231bdd
PE
691 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
692 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
658ddaaf
SL
693
694#ifdef CONFIG_TCP_MD5SIG
695release_sk1:
696 if (sk1) {
697 rcu_read_unlock();
698 sock_put(sk1);
699 }
700#endif
1da177e4
LT
701}
702
703/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
704 outside socket context is ugly, certainly. What can I do?
705 */
706
9501f972
YH
707static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
708 u32 win, u32 ts, int oif,
88ef4a5a 709 struct tcp_md5sig_key *key,
66b13d99 710 int reply_flags, u8 tos)
1da177e4 711{
cf533ea5 712 const struct tcphdr *th = tcp_hdr(skb);
1da177e4
LT
713 struct {
714 struct tcphdr th;
714e85be 715 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
cfb6eeb4 716#ifdef CONFIG_TCP_MD5SIG
714e85be 717 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
cfb6eeb4
YH
718#endif
719 ];
1da177e4
LT
720 } rep;
721 struct ip_reply_arg arg;
adf30907 722 struct net *net = dev_net(skb_dst(skb)->dev);
1da177e4
LT
723
724 memset(&rep.th, 0, sizeof(struct tcphdr));
7174259e 725 memset(&arg, 0, sizeof(arg));
1da177e4
LT
726
727 arg.iov[0].iov_base = (unsigned char *)&rep;
728 arg.iov[0].iov_len = sizeof(rep.th);
729 if (ts) {
cfb6eeb4
YH
730 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
731 (TCPOPT_TIMESTAMP << 8) |
732 TCPOLEN_TIMESTAMP);
733 rep.opt[1] = htonl(tcp_time_stamp);
734 rep.opt[2] = htonl(ts);
cb48cfe8 735 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
1da177e4
LT
736 }
737
738 /* Swap the send and the receive. */
739 rep.th.dest = th->source;
740 rep.th.source = th->dest;
741 rep.th.doff = arg.iov[0].iov_len / 4;
742 rep.th.seq = htonl(seq);
743 rep.th.ack_seq = htonl(ack);
744 rep.th.ack = 1;
745 rep.th.window = htons(win);
746
cfb6eeb4 747#ifdef CONFIG_TCP_MD5SIG
cfb6eeb4
YH
748 if (key) {
749 int offset = (ts) ? 3 : 0;
750
751 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
752 (TCPOPT_NOP << 16) |
753 (TCPOPT_MD5SIG << 8) |
754 TCPOLEN_MD5SIG);
755 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
756 rep.th.doff = arg.iov[0].iov_len/4;
757
49a72dfb 758 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
90b7e112
AL
759 key, ip_hdr(skb)->saddr,
760 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
761 }
762#endif
88ef4a5a 763 arg.flags = reply_flags;
eddc9ec5
ACM
764 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
765 ip_hdr(skb)->saddr, /* XXX */
1da177e4
LT
766 arg.iov[0].iov_len, IPPROTO_TCP, 0);
767 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
9501f972
YH
768 if (oif)
769 arg.bound_dev_if = oif;
66b13d99 770 arg.tos = tos;
0a5ebb80 771 ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
7feb49c8 772 &arg, arg.iov[0].iov_len);
1da177e4 773
63231bdd 774 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
1da177e4
LT
775}
776
777static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
778{
8feaf0c0 779 struct inet_timewait_sock *tw = inet_twsk(sk);
cfb6eeb4 780 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1da177e4 781
9501f972 782 tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
7174259e 783 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
9501f972
YH
784 tcptw->tw_ts_recent,
785 tw->tw_bound_dev_if,
88ef4a5a 786 tcp_twsk_md5_key(tcptw),
66b13d99
ED
787 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
788 tw->tw_tos
9501f972 789 );
1da177e4 790
8feaf0c0 791 inet_twsk_put(tw);
1da177e4
LT
792}
793
6edafaaf 794static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
7174259e 795 struct request_sock *req)
1da177e4 796{
9501f972 797 tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
cfb6eeb4 798 tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
9501f972
YH
799 req->ts_recent,
800 0,
a915da9b
ED
801 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
802 AF_INET),
66b13d99
ED
803 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
804 ip_hdr(skb)->tos);
1da177e4
LT
805}
806
1da177e4 807/*
9bf1d83e 808 * Send a SYN-ACK after having received a SYN.
60236fdd 809 * This still operates on a request_sock only, not on a big
1da177e4
LT
810 * socket.
811 */
72659ecc
OP
812static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
813 struct request_sock *req,
814 struct request_values *rvp)
1da177e4 815{
2e6599cb 816 const struct inet_request_sock *ireq = inet_rsk(req);
6bd023f3 817 struct flowi4 fl4;
1da177e4
LT
818 int err = -1;
819 struct sk_buff * skb;
820
821 /* First, grab a route. */
6bd023f3 822 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
fd80eb94 823 return -1;
1da177e4 824
e6b4d113 825 skb = tcp_make_synack(sk, dst, req, rvp);
1da177e4
LT
826
827 if (skb) {
419f9f89 828 __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
1da177e4 829
2e6599cb
ACM
830 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
831 ireq->rmt_addr,
832 ireq->opt);
b9df3cb8 833 err = net_xmit_eval(err);
1da177e4
LT
834 }
835
1da177e4
LT
836 dst_release(dst);
837 return err;
838}
839
72659ecc 840static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
e6b4d113 841 struct request_values *rvp)
fd80eb94 842{
72659ecc
OP
843 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
844 return tcp_v4_send_synack(sk, NULL, req, rvp);
fd80eb94
DL
845}
846
1da177e4 847/*
60236fdd 848 * IPv4 request_sock destructor.
1da177e4 849 */
60236fdd 850static void tcp_v4_reqsk_destructor(struct request_sock *req)
1da177e4 851{
a51482bd 852 kfree(inet_rsk(req)->opt);
1da177e4
LT
853}
854
946cedcc
ED
855/*
856 * Return 1 if a syncookie should be sent
857 */
858int tcp_syn_flood_action(struct sock *sk,
859 const struct sk_buff *skb,
860 const char *proto)
1da177e4 861{
946cedcc
ED
862 const char *msg = "Dropping request";
863 int want_cookie = 0;
864 struct listen_sock *lopt;
865
866
1da177e4 867
2a1d4bd4 868#ifdef CONFIG_SYN_COOKIES
946cedcc 869 if (sysctl_tcp_syncookies) {
2a1d4bd4 870 msg = "Sending cookies";
946cedcc
ED
871 want_cookie = 1;
872 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
873 } else
80e40daa 874#endif
946cedcc
ED
875 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
876
877 lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
878 if (!lopt->synflood_warned) {
879 lopt->synflood_warned = 1;
afd46503 880 pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
946cedcc
ED
881 proto, ntohs(tcp_hdr(skb)->dest), msg);
882 }
883 return want_cookie;
2a1d4bd4 884}
946cedcc 885EXPORT_SYMBOL(tcp_syn_flood_action);
1da177e4
LT
886
887/*
60236fdd 888 * Save and compile IPv4 options into the request_sock if needed.
1da177e4 889 */
f6d8bd05
ED
890static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk,
891 struct sk_buff *skb)
1da177e4 892{
f6d8bd05
ED
893 const struct ip_options *opt = &(IPCB(skb)->opt);
894 struct ip_options_rcu *dopt = NULL;
1da177e4
LT
895
896 if (opt && opt->optlen) {
f6d8bd05
ED
897 int opt_size = sizeof(*dopt) + opt->optlen;
898
1da177e4
LT
899 dopt = kmalloc(opt_size, GFP_ATOMIC);
900 if (dopt) {
f6d8bd05 901 if (ip_options_echo(&dopt->opt, skb)) {
1da177e4
LT
902 kfree(dopt);
903 dopt = NULL;
904 }
905 }
906 }
907 return dopt;
908}
909
cfb6eeb4
YH
910#ifdef CONFIG_TCP_MD5SIG
911/*
912 * RFC2385 MD5 checksumming requires a mapping of
913 * IP address->MD5 Key.
914 * We need to maintain these in the sk structure.
915 */
916
917/* Find the Key structure for an address. */
a915da9b
ED
918struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
919 const union tcp_md5_addr *addr,
920 int family)
cfb6eeb4
YH
921{
922 struct tcp_sock *tp = tcp_sk(sk);
a915da9b
ED
923 struct tcp_md5sig_key *key;
924 struct hlist_node *pos;
925 unsigned int size = sizeof(struct in_addr);
a8afca03 926 struct tcp_md5sig_info *md5sig;
cfb6eeb4 927
a8afca03
ED
928 /* caller either holds rcu_read_lock() or socket lock */
929 md5sig = rcu_dereference_check(tp->md5sig_info,
b4fb05ea
ED
930 sock_owned_by_user(sk) ||
931 lockdep_is_held(&sk->sk_lock.slock));
a8afca03 932 if (!md5sig)
cfb6eeb4 933 return NULL;
a915da9b
ED
934#if IS_ENABLED(CONFIG_IPV6)
935 if (family == AF_INET6)
936 size = sizeof(struct in6_addr);
937#endif
a8afca03 938 hlist_for_each_entry_rcu(key, pos, &md5sig->head, node) {
a915da9b
ED
939 if (key->family != family)
940 continue;
941 if (!memcmp(&key->addr, addr, size))
942 return key;
cfb6eeb4
YH
943 }
944 return NULL;
945}
a915da9b 946EXPORT_SYMBOL(tcp_md5_do_lookup);
cfb6eeb4
YH
947
948struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
949 struct sock *addr_sk)
950{
a915da9b
ED
951 union tcp_md5_addr *addr;
952
953 addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
954 return tcp_md5_do_lookup(sk, addr, AF_INET);
cfb6eeb4 955}
cfb6eeb4
YH
956EXPORT_SYMBOL(tcp_v4_md5_lookup);
957
f5b99bcd
AB
958static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
959 struct request_sock *req)
cfb6eeb4 960{
a915da9b
ED
961 union tcp_md5_addr *addr;
962
963 addr = (union tcp_md5_addr *)&inet_rsk(req)->rmt_addr;
964 return tcp_md5_do_lookup(sk, addr, AF_INET);
cfb6eeb4
YH
965}
966
967/* This can be called on a newly created socket, from other files */
a915da9b
ED
968int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
969 int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
cfb6eeb4
YH
970{
971 /* Add Key to the list */
b0a713e9 972 struct tcp_md5sig_key *key;
cfb6eeb4 973 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 974 struct tcp_md5sig_info *md5sig;
cfb6eeb4 975
a915da9b 976 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
cfb6eeb4
YH
977 if (key) {
978 /* Pre-existing entry - just update that one. */
a915da9b 979 memcpy(key->key, newkey, newkeylen);
b0a713e9 980 key->keylen = newkeylen;
a915da9b
ED
981 return 0;
982 }
260fcbeb 983
a8afca03
ED
984 md5sig = rcu_dereference_protected(tp->md5sig_info,
985 sock_owned_by_user(sk));
a915da9b
ED
986 if (!md5sig) {
987 md5sig = kmalloc(sizeof(*md5sig), gfp);
988 if (!md5sig)
cfb6eeb4 989 return -ENOMEM;
cfb6eeb4 990
a915da9b
ED
991 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
992 INIT_HLIST_HEAD(&md5sig->head);
a8afca03 993 rcu_assign_pointer(tp->md5sig_info, md5sig);
a915da9b 994 }
cfb6eeb4 995
5f3d9cb2 996 key = sock_kmalloc(sk, sizeof(*key), gfp);
a915da9b
ED
997 if (!key)
998 return -ENOMEM;
999 if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) {
5f3d9cb2 1000 sock_kfree_s(sk, key, sizeof(*key));
a915da9b 1001 return -ENOMEM;
cfb6eeb4 1002 }
a915da9b
ED
1003
1004 memcpy(key->key, newkey, newkeylen);
1005 key->keylen = newkeylen;
1006 key->family = family;
1007 memcpy(&key->addr, addr,
1008 (family == AF_INET6) ? sizeof(struct in6_addr) :
1009 sizeof(struct in_addr));
1010 hlist_add_head_rcu(&key->node, &md5sig->head);
cfb6eeb4
YH
1011 return 0;
1012}
a915da9b 1013EXPORT_SYMBOL(tcp_md5_do_add);
cfb6eeb4 1014
a915da9b 1015int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
cfb6eeb4
YH
1016{
1017 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 1018 struct tcp_md5sig_key *key;
a8afca03 1019 struct tcp_md5sig_info *md5sig;
a915da9b
ED
1020
1021 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
1022 if (!key)
1023 return -ENOENT;
1024 hlist_del_rcu(&key->node);
5f3d9cb2 1025 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
a915da9b 1026 kfree_rcu(key, rcu);
a8afca03
ED
1027 md5sig = rcu_dereference_protected(tp->md5sig_info,
1028 sock_owned_by_user(sk));
1029 if (hlist_empty(&md5sig->head))
a915da9b
ED
1030 tcp_free_md5sig_pool();
1031 return 0;
cfb6eeb4 1032}
a915da9b 1033EXPORT_SYMBOL(tcp_md5_do_del);
cfb6eeb4 1034
a915da9b 1035void tcp_clear_md5_list(struct sock *sk)
cfb6eeb4
YH
1036{
1037 struct tcp_sock *tp = tcp_sk(sk);
a915da9b
ED
1038 struct tcp_md5sig_key *key;
1039 struct hlist_node *pos, *n;
a8afca03 1040 struct tcp_md5sig_info *md5sig;
cfb6eeb4 1041
a8afca03
ED
1042 md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1043
1044 if (!hlist_empty(&md5sig->head))
cfb6eeb4 1045 tcp_free_md5sig_pool();
a8afca03 1046 hlist_for_each_entry_safe(key, pos, n, &md5sig->head, node) {
a915da9b 1047 hlist_del_rcu(&key->node);
5f3d9cb2 1048 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
a915da9b 1049 kfree_rcu(key, rcu);
cfb6eeb4
YH
1050 }
1051}
1052
7174259e
ACM
1053static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1054 int optlen)
cfb6eeb4
YH
1055{
1056 struct tcp_md5sig cmd;
1057 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
cfb6eeb4
YH
1058
1059 if (optlen < sizeof(cmd))
1060 return -EINVAL;
1061
7174259e 1062 if (copy_from_user(&cmd, optval, sizeof(cmd)))
cfb6eeb4
YH
1063 return -EFAULT;
1064
1065 if (sin->sin_family != AF_INET)
1066 return -EINVAL;
1067
a8afca03 1068 if (!cmd.tcpm_key || !cmd.tcpm_keylen)
a915da9b
ED
1069 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1070 AF_INET);
cfb6eeb4
YH
1071
1072 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1073 return -EINVAL;
1074
a915da9b
ED
1075 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1076 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1077 GFP_KERNEL);
cfb6eeb4
YH
1078}
1079
49a72dfb
AL
1080static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1081 __be32 daddr, __be32 saddr, int nbytes)
cfb6eeb4 1082{
cfb6eeb4 1083 struct tcp4_pseudohdr *bp;
49a72dfb 1084 struct scatterlist sg;
cfb6eeb4
YH
1085
1086 bp = &hp->md5_blk.ip4;
cfb6eeb4
YH
1087
1088 /*
49a72dfb 1089 * 1. the TCP pseudo-header (in the order: source IP address,
cfb6eeb4
YH
1090 * destination IP address, zero-padded protocol number, and
1091 * segment length)
1092 */
1093 bp->saddr = saddr;
1094 bp->daddr = daddr;
1095 bp->pad = 0;
076fb722 1096 bp->protocol = IPPROTO_TCP;
49a72dfb 1097 bp->len = cpu_to_be16(nbytes);
c7da57a1 1098
49a72dfb
AL
1099 sg_init_one(&sg, bp, sizeof(*bp));
1100 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1101}
1102
a915da9b 1103static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
318cf7aa 1104 __be32 daddr, __be32 saddr, const struct tcphdr *th)
49a72dfb
AL
1105{
1106 struct tcp_md5sig_pool *hp;
1107 struct hash_desc *desc;
1108
1109 hp = tcp_get_md5sig_pool();
1110 if (!hp)
1111 goto clear_hash_noput;
1112 desc = &hp->md5_desc;
1113
1114 if (crypto_hash_init(desc))
1115 goto clear_hash;
1116 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1117 goto clear_hash;
1118 if (tcp_md5_hash_header(hp, th))
1119 goto clear_hash;
1120 if (tcp_md5_hash_key(hp, key))
1121 goto clear_hash;
1122 if (crypto_hash_final(desc, md5_hash))
cfb6eeb4
YH
1123 goto clear_hash;
1124
cfb6eeb4 1125 tcp_put_md5sig_pool();
cfb6eeb4 1126 return 0;
49a72dfb 1127
cfb6eeb4
YH
1128clear_hash:
1129 tcp_put_md5sig_pool();
1130clear_hash_noput:
1131 memset(md5_hash, 0, 16);
49a72dfb 1132 return 1;
cfb6eeb4
YH
1133}
1134
49a72dfb 1135int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
318cf7aa
ED
1136 const struct sock *sk, const struct request_sock *req,
1137 const struct sk_buff *skb)
cfb6eeb4 1138{
49a72dfb
AL
1139 struct tcp_md5sig_pool *hp;
1140 struct hash_desc *desc;
318cf7aa 1141 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
1142 __be32 saddr, daddr;
1143
1144 if (sk) {
c720c7e8
ED
1145 saddr = inet_sk(sk)->inet_saddr;
1146 daddr = inet_sk(sk)->inet_daddr;
49a72dfb
AL
1147 } else if (req) {
1148 saddr = inet_rsk(req)->loc_addr;
1149 daddr = inet_rsk(req)->rmt_addr;
cfb6eeb4 1150 } else {
49a72dfb
AL
1151 const struct iphdr *iph = ip_hdr(skb);
1152 saddr = iph->saddr;
1153 daddr = iph->daddr;
cfb6eeb4 1154 }
49a72dfb
AL
1155
1156 hp = tcp_get_md5sig_pool();
1157 if (!hp)
1158 goto clear_hash_noput;
1159 desc = &hp->md5_desc;
1160
1161 if (crypto_hash_init(desc))
1162 goto clear_hash;
1163
1164 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1165 goto clear_hash;
1166 if (tcp_md5_hash_header(hp, th))
1167 goto clear_hash;
1168 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1169 goto clear_hash;
1170 if (tcp_md5_hash_key(hp, key))
1171 goto clear_hash;
1172 if (crypto_hash_final(desc, md5_hash))
1173 goto clear_hash;
1174
1175 tcp_put_md5sig_pool();
1176 return 0;
1177
1178clear_hash:
1179 tcp_put_md5sig_pool();
1180clear_hash_noput:
1181 memset(md5_hash, 0, 16);
1182 return 1;
cfb6eeb4 1183}
49a72dfb 1184EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
cfb6eeb4 1185
318cf7aa 1186static int tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
cfb6eeb4
YH
1187{
1188 /*
1189 * This gets called for each TCP segment that arrives
1190 * so we want to be efficient.
1191 * We have 3 drop cases:
1192 * o No MD5 hash and one expected.
1193 * o MD5 hash and we're not expecting one.
1194 * o MD5 hash and its wrong.
1195 */
cf533ea5 1196 const __u8 *hash_location = NULL;
cfb6eeb4 1197 struct tcp_md5sig_key *hash_expected;
eddc9ec5 1198 const struct iphdr *iph = ip_hdr(skb);
cf533ea5 1199 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4 1200 int genhash;
cfb6eeb4
YH
1201 unsigned char newhash[16];
1202
a915da9b
ED
1203 hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1204 AF_INET);
7d5d5525 1205 hash_location = tcp_parse_md5sig_option(th);
cfb6eeb4 1206
cfb6eeb4
YH
1207 /* We've parsed the options - do we have a hash? */
1208 if (!hash_expected && !hash_location)
1209 return 0;
1210
1211 if (hash_expected && !hash_location) {
785957d3 1212 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
cfb6eeb4
YH
1213 return 1;
1214 }
1215
1216 if (!hash_expected && hash_location) {
785957d3 1217 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
cfb6eeb4
YH
1218 return 1;
1219 }
1220
1221 /* Okay, so this is hash_expected and hash_location -
1222 * so we need to calculate the checksum.
1223 */
49a72dfb
AL
1224 genhash = tcp_v4_md5_hash_skb(newhash,
1225 hash_expected,
1226 NULL, NULL, skb);
cfb6eeb4
YH
1227
1228 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1229 if (net_ratelimit()) {
058bd4d2
JP
1230 pr_info("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1231 &iph->saddr, ntohs(th->source),
1232 &iph->daddr, ntohs(th->dest),
1233 genhash ? " tcp_v4_calc_md5_hash failed" : "");
cfb6eeb4
YH
1234 }
1235 return 1;
1236 }
1237 return 0;
1238}
1239
1240#endif
1241
72a3effa 1242struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1da177e4 1243 .family = PF_INET,
2e6599cb 1244 .obj_size = sizeof(struct tcp_request_sock),
72659ecc 1245 .rtx_syn_ack = tcp_v4_rtx_synack,
60236fdd
ACM
1246 .send_ack = tcp_v4_reqsk_send_ack,
1247 .destructor = tcp_v4_reqsk_destructor,
1da177e4 1248 .send_reset = tcp_v4_send_reset,
72659ecc 1249 .syn_ack_timeout = tcp_syn_ack_timeout,
1da177e4
LT
1250};
1251
cfb6eeb4 1252#ifdef CONFIG_TCP_MD5SIG
b2e4b3de 1253static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
cfb6eeb4 1254 .md5_lookup = tcp_v4_reqsk_md5_lookup,
e3afe7b7 1255 .calc_md5_hash = tcp_v4_md5_hash_skb,
cfb6eeb4 1256};
b6332e6c 1257#endif
cfb6eeb4 1258
1da177e4
LT
1259int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1260{
4957faad 1261 struct tcp_extend_values tmp_ext;
1da177e4 1262 struct tcp_options_received tmp_opt;
cf533ea5 1263 const u8 *hash_location;
60236fdd 1264 struct request_sock *req;
e6b4d113 1265 struct inet_request_sock *ireq;
4957faad 1266 struct tcp_sock *tp = tcp_sk(sk);
e6b4d113 1267 struct dst_entry *dst = NULL;
eddc9ec5
ACM
1268 __be32 saddr = ip_hdr(skb)->saddr;
1269 __be32 daddr = ip_hdr(skb)->daddr;
1da177e4 1270 __u32 isn = TCP_SKB_CB(skb)->when;
1da177e4 1271 int want_cookie = 0;
1da177e4
LT
1272
1273 /* Never answer to SYNs send to broadcast or multicast */
511c3f92 1274 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1da177e4
LT
1275 goto drop;
1276
1277 /* TW buckets are converted to open requests without
1278 * limitations, they conserve resources and peer is
1279 * evidently real one.
1280 */
463c84b9 1281 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
946cedcc
ED
1282 want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1283 if (!want_cookie)
1284 goto drop;
1da177e4
LT
1285 }
1286
1287 /* Accept backlog is full. If we have already queued enough
1288 * of warm entries in syn queue, drop request. It is better than
1289 * clogging syn queue with openreqs with exponentially increasing
1290 * timeout.
1291 */
463c84b9 1292 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1da177e4
LT
1293 goto drop;
1294
ce4a7d0d 1295 req = inet_reqsk_alloc(&tcp_request_sock_ops);
1da177e4
LT
1296 if (!req)
1297 goto drop;
1298
cfb6eeb4
YH
1299#ifdef CONFIG_TCP_MD5SIG
1300 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1301#endif
1302
1da177e4 1303 tcp_clear_options(&tmp_opt);
bee7ca9e 1304 tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
4957faad 1305 tmp_opt.user_mss = tp->rx_opt.user_mss;
bb5b7c11 1306 tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
4957faad
WAS
1307
1308 if (tmp_opt.cookie_plus > 0 &&
1309 tmp_opt.saw_tstamp &&
1310 !tp->rx_opt.cookie_out_never &&
1311 (sysctl_tcp_cookie_size > 0 ||
1312 (tp->cookie_values != NULL &&
1313 tp->cookie_values->cookie_desired > 0))) {
1314 u8 *c;
1315 u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1316 int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1317
1318 if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1319 goto drop_and_release;
1320
1321 /* Secret recipe starts with IP addresses */
0eae88f3
ED
1322 *mess++ ^= (__force u32)daddr;
1323 *mess++ ^= (__force u32)saddr;
1da177e4 1324
4957faad
WAS
1325 /* plus variable length Initiator Cookie */
1326 c = (u8 *)mess;
1327 while (l-- > 0)
1328 *c++ ^= *hash_location++;
1329
4957faad 1330 want_cookie = 0; /* not our kind of cookie */
4957faad
WAS
1331 tmp_ext.cookie_out_never = 0; /* false */
1332 tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1333 } else if (!tp->rx_opt.cookie_in_always) {
1334 /* redundant indications, but ensure initialization. */
1335 tmp_ext.cookie_out_never = 1; /* true */
1336 tmp_ext.cookie_plus = 0;
1337 } else {
1338 goto drop_and_release;
1339 }
1340 tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1da177e4 1341
4dfc2817 1342 if (want_cookie && !tmp_opt.saw_tstamp)
1da177e4 1343 tcp_clear_options(&tmp_opt);
1da177e4 1344
1da177e4 1345 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1da177e4
LT
1346 tcp_openreq_init(req, &tmp_opt, skb);
1347
bb5b7c11
DM
1348 ireq = inet_rsk(req);
1349 ireq->loc_addr = daddr;
1350 ireq->rmt_addr = saddr;
1351 ireq->no_srccheck = inet_sk(sk)->transparent;
1352 ireq->opt = tcp_v4_save_options(sk, skb);
1353
284904aa 1354 if (security_inet_conn_request(sk, skb, req))
bb5b7c11 1355 goto drop_and_free;
284904aa 1356
172d69e6 1357 if (!want_cookie || tmp_opt.tstamp_ok)
aa8223c7 1358 TCP_ECN_create_request(req, tcp_hdr(skb));
1da177e4
LT
1359
1360 if (want_cookie) {
1da177e4 1361 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
172d69e6 1362 req->cookie_ts = tmp_opt.tstamp_ok;
1da177e4
LT
1363 } else if (!isn) {
1364 struct inet_peer *peer = NULL;
6bd023f3 1365 struct flowi4 fl4;
1da177e4
LT
1366
1367 /* VJ's idea. We save last timestamp seen
1368 * from the destination in peer table, when entering
1369 * state TIME-WAIT, and check against it before
1370 * accepting new connection request.
1371 *
1372 * If "isn" is not zero, this request hit alive
1373 * timewait bucket, so that all the necessary checks
1374 * are made in the function processing timewait state.
1375 */
1376 if (tmp_opt.saw_tstamp &&
295ff7ed 1377 tcp_death_row.sysctl_tw_recycle &&
6bd023f3 1378 (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
ed2361e6
DM
1379 fl4.daddr == saddr &&
1380 (peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) {
317fe0e6 1381 inet_peer_refcheck(peer);
2c1409a0 1382 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1da177e4
LT
1383 (s32)(peer->tcp_ts - req->ts_recent) >
1384 TCP_PAWS_WINDOW) {
de0744af 1385 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
7cd04fa7 1386 goto drop_and_release;
1da177e4
LT
1387 }
1388 }
1389 /* Kill the following clause, if you dislike this way. */
1390 else if (!sysctl_tcp_syncookies &&
463c84b9 1391 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1da177e4
LT
1392 (sysctl_max_syn_backlog >> 2)) &&
1393 (!peer || !peer->tcp_ts_stamp) &&
1394 (!dst || !dst_metric(dst, RTAX_RTT))) {
1395 /* Without syncookies last quarter of
1396 * backlog is filled with destinations,
1397 * proven to be alive.
1398 * It means that we continue to communicate
1399 * to destinations, already remembered
1400 * to the moment of synflood.
1401 */
afd46503 1402 LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
673d57e7 1403 &saddr, ntohs(tcp_hdr(skb)->source));
7cd04fa7 1404 goto drop_and_release;
1da177e4
LT
1405 }
1406
a94f723d 1407 isn = tcp_v4_init_sequence(skb);
1da177e4 1408 }
2e6599cb 1409 tcp_rsk(req)->snt_isn = isn;
9ad7c049 1410 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1da177e4 1411
72659ecc
OP
1412 if (tcp_v4_send_synack(sk, dst, req,
1413 (struct request_values *)&tmp_ext) ||
4957faad 1414 want_cookie)
1da177e4
LT
1415 goto drop_and_free;
1416
7cd04fa7 1417 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1da177e4
LT
1418 return 0;
1419
7cd04fa7
DL
1420drop_and_release:
1421 dst_release(dst);
1da177e4 1422drop_and_free:
60236fdd 1423 reqsk_free(req);
1da177e4 1424drop:
1da177e4
LT
1425 return 0;
1426}
4bc2f18b 1427EXPORT_SYMBOL(tcp_v4_conn_request);
1da177e4
LT
1428
1429
1430/*
1431 * The three way handshake has completed - we got a valid synack -
1432 * now create the new socket.
1433 */
1434struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
60236fdd 1435 struct request_sock *req,
1da177e4
LT
1436 struct dst_entry *dst)
1437{
2e6599cb 1438 struct inet_request_sock *ireq;
1da177e4
LT
1439 struct inet_sock *newinet;
1440 struct tcp_sock *newtp;
1441 struct sock *newsk;
cfb6eeb4
YH
1442#ifdef CONFIG_TCP_MD5SIG
1443 struct tcp_md5sig_key *key;
1444#endif
f6d8bd05 1445 struct ip_options_rcu *inet_opt;
1da177e4
LT
1446
1447 if (sk_acceptq_is_full(sk))
1448 goto exit_overflow;
1449
1da177e4
LT
1450 newsk = tcp_create_openreq_child(sk, req, skb);
1451 if (!newsk)
093d2823 1452 goto exit_nonewsk;
1da177e4 1453
bcd76111 1454 newsk->sk_gso_type = SKB_GSO_TCPV4;
1da177e4
LT
1455
1456 newtp = tcp_sk(newsk);
1457 newinet = inet_sk(newsk);
2e6599cb 1458 ireq = inet_rsk(req);
c720c7e8
ED
1459 newinet->inet_daddr = ireq->rmt_addr;
1460 newinet->inet_rcv_saddr = ireq->loc_addr;
1461 newinet->inet_saddr = ireq->loc_addr;
f6d8bd05
ED
1462 inet_opt = ireq->opt;
1463 rcu_assign_pointer(newinet->inet_opt, inet_opt);
2e6599cb 1464 ireq->opt = NULL;
463c84b9 1465 newinet->mc_index = inet_iif(skb);
eddc9ec5 1466 newinet->mc_ttl = ip_hdr(skb)->ttl;
4c507d28 1467 newinet->rcv_tos = ip_hdr(skb)->tos;
d83d8461 1468 inet_csk(newsk)->icsk_ext_hdr_len = 0;
f6d8bd05
ED
1469 if (inet_opt)
1470 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
c720c7e8 1471 newinet->inet_id = newtp->write_seq ^ jiffies;
1da177e4 1472
0e734419
DM
1473 if (!dst && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL)
1474 goto put_and_exit;
1475
1476 sk_setup_caps(newsk, dst);
1477
5d424d5a 1478 tcp_mtup_init(newsk);
1da177e4 1479 tcp_sync_mss(newsk, dst_mtu(dst));
0dbaee3b 1480 newtp->advmss = dst_metric_advmss(dst);
f5fff5dc
TQ
1481 if (tcp_sk(sk)->rx_opt.user_mss &&
1482 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1483 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1484
1da177e4 1485 tcp_initialize_rcv_mss(newsk);
9ad7c049
JC
1486 if (tcp_rsk(req)->snt_synack)
1487 tcp_valid_rtt_meas(newsk,
1488 tcp_time_stamp - tcp_rsk(req)->snt_synack);
1489 newtp->total_retrans = req->retrans;
1da177e4 1490
cfb6eeb4
YH
1491#ifdef CONFIG_TCP_MD5SIG
1492 /* Copy over the MD5 key from the original socket */
a915da9b
ED
1493 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1494 AF_INET);
c720c7e8 1495 if (key != NULL) {
cfb6eeb4
YH
1496 /*
1497 * We're using one, so create a matching key
1498 * on the newsk structure. If we fail to get
1499 * memory, then we end up not copying the key
1500 * across. Shucks.
1501 */
a915da9b
ED
1502 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1503 AF_INET, key->key, key->keylen, GFP_ATOMIC);
a465419b 1504 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
cfb6eeb4
YH
1505 }
1506#endif
1507
0e734419
DM
1508 if (__inet_inherit_port(sk, newsk) < 0)
1509 goto put_and_exit;
9327f705 1510 __inet_hash_nolisten(newsk, NULL);
1da177e4
LT
1511
1512 return newsk;
1513
1514exit_overflow:
de0744af 1515 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
093d2823
BS
1516exit_nonewsk:
1517 dst_release(dst);
1da177e4 1518exit:
de0744af 1519 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4 1520 return NULL;
0e734419 1521put_and_exit:
709e8697 1522 tcp_clear_xmit_timers(newsk);
d8a6e65f 1523 tcp_cleanup_congestion_control(newsk);
918eb399 1524 bh_unlock_sock(newsk);
0e734419
DM
1525 sock_put(newsk);
1526 goto exit;
1da177e4 1527}
4bc2f18b 1528EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1da177e4
LT
1529
1530static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1531{
aa8223c7 1532 struct tcphdr *th = tcp_hdr(skb);
eddc9ec5 1533 const struct iphdr *iph = ip_hdr(skb);
1da177e4 1534 struct sock *nsk;
60236fdd 1535 struct request_sock **prev;
1da177e4 1536 /* Find possible connection requests. */
463c84b9
ACM
1537 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1538 iph->saddr, iph->daddr);
1da177e4
LT
1539 if (req)
1540 return tcp_check_req(sk, skb, req, prev);
1541
3b1e0a65 1542 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
c67499c0 1543 th->source, iph->daddr, th->dest, inet_iif(skb));
1da177e4
LT
1544
1545 if (nsk) {
1546 if (nsk->sk_state != TCP_TIME_WAIT) {
1547 bh_lock_sock(nsk);
1548 return nsk;
1549 }
9469c7b4 1550 inet_twsk_put(inet_twsk(nsk));
1da177e4
LT
1551 return NULL;
1552 }
1553
1554#ifdef CONFIG_SYN_COOKIES
af9b4738 1555 if (!th->syn)
1da177e4
LT
1556 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1557#endif
1558 return sk;
1559}
1560
b51655b9 1561static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1da177e4 1562{
eddc9ec5
ACM
1563 const struct iphdr *iph = ip_hdr(skb);
1564
84fa7933 1565 if (skb->ip_summed == CHECKSUM_COMPLETE) {
eddc9ec5
ACM
1566 if (!tcp_v4_check(skb->len, iph->saddr,
1567 iph->daddr, skb->csum)) {
fb286bb2 1568 skb->ip_summed = CHECKSUM_UNNECESSARY;
1da177e4 1569 return 0;
fb286bb2 1570 }
1da177e4 1571 }
fb286bb2 1572
eddc9ec5 1573 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
fb286bb2
HX
1574 skb->len, IPPROTO_TCP, 0);
1575
1da177e4 1576 if (skb->len <= 76) {
fb286bb2 1577 return __skb_checksum_complete(skb);
1da177e4
LT
1578 }
1579 return 0;
1580}
1581
1582
1583/* The socket must have it's spinlock held when we get
1584 * here.
1585 *
1586 * We have a potential double-lock case here, so even when
1587 * doing backlog processing we use the BH locking scheme.
1588 * This is because we cannot sleep with the original spinlock
1589 * held.
1590 */
1591int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1592{
cfb6eeb4
YH
1593 struct sock *rsk;
1594#ifdef CONFIG_TCP_MD5SIG
1595 /*
1596 * We really want to reject the packet as early as possible
1597 * if:
1598 * o We're expecting an MD5'd packet and this is no MD5 tcp option
1599 * o There is an MD5 option and we're not expecting one
1600 */
7174259e 1601 if (tcp_v4_inbound_md5_hash(sk, skb))
cfb6eeb4
YH
1602 goto discard;
1603#endif
1604
1da177e4 1605 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
bdeab991 1606 sock_rps_save_rxhash(sk, skb);
aa8223c7 1607 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
cfb6eeb4 1608 rsk = sk;
1da177e4 1609 goto reset;
cfb6eeb4 1610 }
1da177e4
LT
1611 return 0;
1612 }
1613
ab6a5bb6 1614 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1da177e4
LT
1615 goto csum_err;
1616
1617 if (sk->sk_state == TCP_LISTEN) {
1618 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1619 if (!nsk)
1620 goto discard;
1621
1622 if (nsk != sk) {
bdeab991 1623 sock_rps_save_rxhash(nsk, skb);
cfb6eeb4
YH
1624 if (tcp_child_process(sk, nsk, skb)) {
1625 rsk = nsk;
1da177e4 1626 goto reset;
cfb6eeb4 1627 }
1da177e4
LT
1628 return 0;
1629 }
ca55158c 1630 } else
bdeab991 1631 sock_rps_save_rxhash(sk, skb);
ca55158c 1632
aa8223c7 1633 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
cfb6eeb4 1634 rsk = sk;
1da177e4 1635 goto reset;
cfb6eeb4 1636 }
1da177e4
LT
1637 return 0;
1638
1639reset:
cfb6eeb4 1640 tcp_v4_send_reset(rsk, skb);
1da177e4
LT
1641discard:
1642 kfree_skb(skb);
1643 /* Be careful here. If this function gets more complicated and
1644 * gcc suffers from register pressure on the x86, sk (in %ebx)
1645 * might be destroyed here. This current version compiles correctly,
1646 * but you have been warned.
1647 */
1648 return 0;
1649
1650csum_err:
63231bdd 1651 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1da177e4
LT
1652 goto discard;
1653}
4bc2f18b 1654EXPORT_SYMBOL(tcp_v4_do_rcv);
1da177e4
LT
1655
1656/*
1657 * From tcp_input.c
1658 */
1659
1660int tcp_v4_rcv(struct sk_buff *skb)
1661{
eddc9ec5 1662 const struct iphdr *iph;
cf533ea5 1663 const struct tcphdr *th;
1da177e4
LT
1664 struct sock *sk;
1665 int ret;
a86b1e30 1666 struct net *net = dev_net(skb->dev);
1da177e4
LT
1667
1668 if (skb->pkt_type != PACKET_HOST)
1669 goto discard_it;
1670
1671 /* Count it even if it's bad */
63231bdd 1672 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1da177e4
LT
1673
1674 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1675 goto discard_it;
1676
aa8223c7 1677 th = tcp_hdr(skb);
1da177e4
LT
1678
1679 if (th->doff < sizeof(struct tcphdr) / 4)
1680 goto bad_packet;
1681 if (!pskb_may_pull(skb, th->doff * 4))
1682 goto discard_it;
1683
1684 /* An explanation is required here, I think.
1685 * Packet length and doff are validated by header prediction,
caa20d9a 1686 * provided case of th->doff==0 is eliminated.
1da177e4 1687 * So, we defer the checks. */
60476372 1688 if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1da177e4
LT
1689 goto bad_packet;
1690
aa8223c7 1691 th = tcp_hdr(skb);
eddc9ec5 1692 iph = ip_hdr(skb);
1da177e4
LT
1693 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1694 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1695 skb->len - th->doff * 4);
1696 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1697 TCP_SKB_CB(skb)->when = 0;
b82d1bb4 1698 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1da177e4
LT
1699 TCP_SKB_CB(skb)->sacked = 0;
1700
9a1f27c4 1701 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1da177e4
LT
1702 if (!sk)
1703 goto no_tcp_socket;
1704
bb134d5d
ED
1705process:
1706 if (sk->sk_state == TCP_TIME_WAIT)
1707 goto do_time_wait;
1708
6cce09f8
ED
1709 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1710 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
d218d111 1711 goto discard_and_relse;
6cce09f8 1712 }
d218d111 1713
1da177e4
LT
1714 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1715 goto discard_and_relse;
b59c2701 1716 nf_reset(skb);
1da177e4 1717
fda9ef5d 1718 if (sk_filter(sk, skb))
1da177e4
LT
1719 goto discard_and_relse;
1720
1721 skb->dev = NULL;
1722
c6366184 1723 bh_lock_sock_nested(sk);
1da177e4
LT
1724 ret = 0;
1725 if (!sock_owned_by_user(sk)) {
1a2449a8
CL
1726#ifdef CONFIG_NET_DMA
1727 struct tcp_sock *tp = tcp_sk(sk);
1728 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
f67b4599 1729 tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
1a2449a8 1730 if (tp->ucopy.dma_chan)
1da177e4 1731 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8
CL
1732 else
1733#endif
1734 {
1735 if (!tcp_prequeue(sk, skb))
ae8d7f88 1736 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8 1737 }
6cce09f8 1738 } else if (unlikely(sk_add_backlog(sk, skb))) {
6b03a53a 1739 bh_unlock_sock(sk);
6cce09f8 1740 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
6b03a53a
ZY
1741 goto discard_and_relse;
1742 }
1da177e4
LT
1743 bh_unlock_sock(sk);
1744
1745 sock_put(sk);
1746
1747 return ret;
1748
1749no_tcp_socket:
1750 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1751 goto discard_it;
1752
1753 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1754bad_packet:
63231bdd 1755 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1da177e4 1756 } else {
cfb6eeb4 1757 tcp_v4_send_reset(NULL, skb);
1da177e4
LT
1758 }
1759
1760discard_it:
1761 /* Discard frame. */
1762 kfree_skb(skb);
e905a9ed 1763 return 0;
1da177e4
LT
1764
1765discard_and_relse:
1766 sock_put(sk);
1767 goto discard_it;
1768
1769do_time_wait:
1770 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
9469c7b4 1771 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1772 goto discard_it;
1773 }
1774
1775 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
63231bdd 1776 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
9469c7b4 1777 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1778 goto discard_it;
1779 }
9469c7b4 1780 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1da177e4 1781 case TCP_TW_SYN: {
c346dca1 1782 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
c67499c0 1783 &tcp_hashinfo,
eddc9ec5 1784 iph->daddr, th->dest,
463c84b9 1785 inet_iif(skb));
1da177e4 1786 if (sk2) {
9469c7b4
YH
1787 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1788 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1789 sk = sk2;
1790 goto process;
1791 }
1792 /* Fall through to ACK */
1793 }
1794 case TCP_TW_ACK:
1795 tcp_v4_timewait_ack(sk, skb);
1796 break;
1797 case TCP_TW_RST:
1798 goto no_tcp_socket;
1799 case TCP_TW_SUCCESS:;
1800 }
1801 goto discard_it;
1802}
1803
3f419d2d 1804struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it)
1da177e4 1805{
3f419d2d 1806 struct rtable *rt = (struct rtable *) __sk_dst_get(sk);
1da177e4 1807 struct inet_sock *inet = inet_sk(sk);
3f419d2d 1808 struct inet_peer *peer;
1da177e4 1809
c5216cc7
DM
1810 if (!rt ||
1811 inet->cork.fl.u.ip4.daddr != inet->inet_daddr) {
b534ecf1 1812 peer = inet_getpeer_v4(inet->inet_daddr, 1);
3f419d2d 1813 *release_it = true;
1da177e4
LT
1814 } else {
1815 if (!rt->peer)
a48eff12 1816 rt_bind_peer(rt, inet->inet_daddr, 1);
1da177e4 1817 peer = rt->peer;
3f419d2d 1818 *release_it = false;
1da177e4
LT
1819 }
1820
3f419d2d 1821 return peer;
1da177e4 1822}
3f419d2d 1823EXPORT_SYMBOL(tcp_v4_get_peer);
1da177e4 1824
ccb7c410 1825void *tcp_v4_tw_get_peer(struct sock *sk)
1da177e4 1826{
cf533ea5 1827 const struct inet_timewait_sock *tw = inet_twsk(sk);
1da177e4 1828
ccb7c410 1829 return inet_getpeer_v4(tw->tw_daddr, 1);
1da177e4 1830}
ccb7c410
DM
1831EXPORT_SYMBOL(tcp_v4_tw_get_peer);
1832
1833static struct timewait_sock_ops tcp_timewait_sock_ops = {
1834 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1835 .twsk_unique = tcp_twsk_unique,
1836 .twsk_destructor= tcp_twsk_destructor,
1837 .twsk_getpeer = tcp_v4_tw_get_peer,
1838};
1da177e4 1839
3b401a81 1840const struct inet_connection_sock_af_ops ipv4_specific = {
543d9cfe
ACM
1841 .queue_xmit = ip_queue_xmit,
1842 .send_check = tcp_v4_send_check,
1843 .rebuild_header = inet_sk_rebuild_header,
1844 .conn_request = tcp_v4_conn_request,
1845 .syn_recv_sock = tcp_v4_syn_recv_sock,
3f419d2d 1846 .get_peer = tcp_v4_get_peer,
543d9cfe
ACM
1847 .net_header_len = sizeof(struct iphdr),
1848 .setsockopt = ip_setsockopt,
1849 .getsockopt = ip_getsockopt,
1850 .addr2sockaddr = inet_csk_addr2sockaddr,
1851 .sockaddr_len = sizeof(struct sockaddr_in),
ab1e0a13 1852 .bind_conflict = inet_csk_bind_conflict,
3fdadf7d 1853#ifdef CONFIG_COMPAT
543d9cfe
ACM
1854 .compat_setsockopt = compat_ip_setsockopt,
1855 .compat_getsockopt = compat_ip_getsockopt,
3fdadf7d 1856#endif
1da177e4 1857};
4bc2f18b 1858EXPORT_SYMBOL(ipv4_specific);
1da177e4 1859
cfb6eeb4 1860#ifdef CONFIG_TCP_MD5SIG
b2e4b3de 1861static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
cfb6eeb4 1862 .md5_lookup = tcp_v4_md5_lookup,
49a72dfb 1863 .calc_md5_hash = tcp_v4_md5_hash_skb,
cfb6eeb4 1864 .md5_parse = tcp_v4_parse_md5_keys,
cfb6eeb4 1865};
b6332e6c 1866#endif
cfb6eeb4 1867
1da177e4
LT
1868/* NOTE: A lot of things set to zero explicitly by call to
1869 * sk_alloc() so need not be done here.
1870 */
1871static int tcp_v4_init_sock(struct sock *sk)
1872{
6687e988 1873 struct inet_connection_sock *icsk = inet_csk(sk);
1da177e4
LT
1874 struct tcp_sock *tp = tcp_sk(sk);
1875
1876 skb_queue_head_init(&tp->out_of_order_queue);
1877 tcp_init_xmit_timers(sk);
1878 tcp_prequeue_init(tp);
1879
6687e988 1880 icsk->icsk_rto = TCP_TIMEOUT_INIT;
1da177e4
LT
1881 tp->mdev = TCP_TIMEOUT_INIT;
1882
1883 /* So many TCP implementations out there (incorrectly) count the
1884 * initial SYN frame in their delayed-ACK and congestion control
1885 * algorithms that we must have the following bandaid to talk
1886 * efficiently to them. -DaveM
1887 */
9ad7c049 1888 tp->snd_cwnd = TCP_INIT_CWND;
1da177e4
LT
1889
1890 /* See draft-stevens-tcpca-spec-01 for discussion of the
1891 * initialization of these values.
1892 */
0b6a05c1 1893 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
1da177e4 1894 tp->snd_cwnd_clamp = ~0;
bee7ca9e 1895 tp->mss_cache = TCP_MSS_DEFAULT;
1da177e4
LT
1896
1897 tp->reordering = sysctl_tcp_reordering;
6687e988 1898 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1da177e4
LT
1899
1900 sk->sk_state = TCP_CLOSE;
1901
1902 sk->sk_write_space = sk_stream_write_space;
1903 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1904
8292a17a 1905 icsk->icsk_af_ops = &ipv4_specific;
d83d8461 1906 icsk->icsk_sync_mss = tcp_sync_mss;
cfb6eeb4
YH
1907#ifdef CONFIG_TCP_MD5SIG
1908 tp->af_specific = &tcp_sock_ipv4_specific;
1909#endif
1da177e4 1910
435cf559
WAS
1911 /* TCP Cookie Transactions */
1912 if (sysctl_tcp_cookie_size > 0) {
1913 /* Default, cookies without s_data_payload. */
1914 tp->cookie_values =
1915 kzalloc(sizeof(*tp->cookie_values),
1916 sk->sk_allocation);
1917 if (tp->cookie_values != NULL)
1918 kref_init(&tp->cookie_values->kref);
1919 }
1920 /* Presumed zeroed, in order of appearance:
1921 * cookie_in_always, cookie_out_never,
1922 * s_data_constant, s_data_in, s_data_out
1923 */
1da177e4
LT
1924 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1925 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1926
eb4dea58 1927 local_bh_disable();
d1a4c0b3 1928 sock_update_memcg(sk);
180d8cd9 1929 sk_sockets_allocated_inc(sk);
eb4dea58 1930 local_bh_enable();
1da177e4
LT
1931
1932 return 0;
1933}
1934
7d06b2e0 1935void tcp_v4_destroy_sock(struct sock *sk)
1da177e4
LT
1936{
1937 struct tcp_sock *tp = tcp_sk(sk);
1938
1939 tcp_clear_xmit_timers(sk);
1940
6687e988 1941 tcp_cleanup_congestion_control(sk);
317a76f9 1942
1da177e4 1943 /* Cleanup up the write buffer. */
fe067e8a 1944 tcp_write_queue_purge(sk);
1da177e4
LT
1945
1946 /* Cleans up our, hopefully empty, out_of_order_queue. */
e905a9ed 1947 __skb_queue_purge(&tp->out_of_order_queue);
1da177e4 1948
cfb6eeb4
YH
1949#ifdef CONFIG_TCP_MD5SIG
1950 /* Clean up the MD5 key list, if any */
1951 if (tp->md5sig_info) {
a915da9b 1952 tcp_clear_md5_list(sk);
a8afca03 1953 kfree_rcu(tp->md5sig_info, rcu);
cfb6eeb4
YH
1954 tp->md5sig_info = NULL;
1955 }
1956#endif
1957
1a2449a8
CL
1958#ifdef CONFIG_NET_DMA
1959 /* Cleans up our sk_async_wait_queue */
e905a9ed 1960 __skb_queue_purge(&sk->sk_async_wait_queue);
1a2449a8
CL
1961#endif
1962
1da177e4
LT
1963 /* Clean prequeue, it must be empty really */
1964 __skb_queue_purge(&tp->ucopy.prequeue);
1965
1966 /* Clean up a referenced TCP bind bucket. */
463c84b9 1967 if (inet_csk(sk)->icsk_bind_hash)
ab1e0a13 1968 inet_put_port(sk);
1da177e4
LT
1969
1970 /*
1971 * If sendmsg cached page exists, toss it.
1972 */
1973 if (sk->sk_sndmsg_page) {
1974 __free_page(sk->sk_sndmsg_page);
1975 sk->sk_sndmsg_page = NULL;
1976 }
1977
435cf559
WAS
1978 /* TCP Cookie Transactions */
1979 if (tp->cookie_values != NULL) {
1980 kref_put(&tp->cookie_values->kref,
1981 tcp_cookie_values_release);
1982 tp->cookie_values = NULL;
1983 }
1984
180d8cd9 1985 sk_sockets_allocated_dec(sk);
d1a4c0b3 1986 sock_release_memcg(sk);
1da177e4 1987}
1da177e4
LT
1988EXPORT_SYMBOL(tcp_v4_destroy_sock);
1989
1990#ifdef CONFIG_PROC_FS
1991/* Proc filesystem TCP sock list dumping. */
1992
3ab5aee7 1993static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
1da177e4 1994{
3ab5aee7 1995 return hlist_nulls_empty(head) ? NULL :
8feaf0c0 1996 list_entry(head->first, struct inet_timewait_sock, tw_node);
1da177e4
LT
1997}
1998
8feaf0c0 1999static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1da177e4 2000{
3ab5aee7
ED
2001 return !is_a_nulls(tw->tw_node.next) ?
2002 hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1da177e4
LT
2003}
2004
a8b690f9
TH
2005/*
2006 * Get next listener socket follow cur. If cur is NULL, get first socket
2007 * starting from bucket given in st->bucket; when st->bucket is zero the
2008 * very first socket in the hash table is returned.
2009 */
1da177e4
LT
2010static void *listening_get_next(struct seq_file *seq, void *cur)
2011{
463c84b9 2012 struct inet_connection_sock *icsk;
c25eb3bf 2013 struct hlist_nulls_node *node;
1da177e4 2014 struct sock *sk = cur;
5caea4ea 2015 struct inet_listen_hashbucket *ilb;
5799de0b 2016 struct tcp_iter_state *st = seq->private;
a4146b1b 2017 struct net *net = seq_file_net(seq);
1da177e4
LT
2018
2019 if (!sk) {
a8b690f9 2020 ilb = &tcp_hashinfo.listening_hash[st->bucket];
5caea4ea 2021 spin_lock_bh(&ilb->lock);
c25eb3bf 2022 sk = sk_nulls_head(&ilb->head);
a8b690f9 2023 st->offset = 0;
1da177e4
LT
2024 goto get_sk;
2025 }
5caea4ea 2026 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1da177e4 2027 ++st->num;
a8b690f9 2028 ++st->offset;
1da177e4
LT
2029
2030 if (st->state == TCP_SEQ_STATE_OPENREQ) {
60236fdd 2031 struct request_sock *req = cur;
1da177e4 2032
72a3effa 2033 icsk = inet_csk(st->syn_wait_sk);
1da177e4
LT
2034 req = req->dl_next;
2035 while (1) {
2036 while (req) {
bdccc4ca 2037 if (req->rsk_ops->family == st->family) {
1da177e4
LT
2038 cur = req;
2039 goto out;
2040 }
2041 req = req->dl_next;
2042 }
72a3effa 2043 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
1da177e4
LT
2044 break;
2045get_req:
463c84b9 2046 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
1da177e4 2047 }
1bde5ac4 2048 sk = sk_nulls_next(st->syn_wait_sk);
1da177e4 2049 st->state = TCP_SEQ_STATE_LISTENING;
463c84b9 2050 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2051 } else {
e905a9ed 2052 icsk = inet_csk(sk);
463c84b9
ACM
2053 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2054 if (reqsk_queue_len(&icsk->icsk_accept_queue))
1da177e4 2055 goto start_req;
463c84b9 2056 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1bde5ac4 2057 sk = sk_nulls_next(sk);
1da177e4
LT
2058 }
2059get_sk:
c25eb3bf 2060 sk_nulls_for_each_from(sk, node) {
8475ef9f
PE
2061 if (!net_eq(sock_net(sk), net))
2062 continue;
2063 if (sk->sk_family == st->family) {
1da177e4
LT
2064 cur = sk;
2065 goto out;
2066 }
e905a9ed 2067 icsk = inet_csk(sk);
463c84b9
ACM
2068 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2069 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
1da177e4
LT
2070start_req:
2071 st->uid = sock_i_uid(sk);
2072 st->syn_wait_sk = sk;
2073 st->state = TCP_SEQ_STATE_OPENREQ;
2074 st->sbucket = 0;
2075 goto get_req;
2076 }
463c84b9 2077 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2078 }
5caea4ea 2079 spin_unlock_bh(&ilb->lock);
a8b690f9 2080 st->offset = 0;
0f7ff927 2081 if (++st->bucket < INET_LHTABLE_SIZE) {
5caea4ea
ED
2082 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2083 spin_lock_bh(&ilb->lock);
c25eb3bf 2084 sk = sk_nulls_head(&ilb->head);
1da177e4
LT
2085 goto get_sk;
2086 }
2087 cur = NULL;
2088out:
2089 return cur;
2090}
2091
2092static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2093{
a8b690f9
TH
2094 struct tcp_iter_state *st = seq->private;
2095 void *rc;
2096
2097 st->bucket = 0;
2098 st->offset = 0;
2099 rc = listening_get_next(seq, NULL);
1da177e4
LT
2100
2101 while (rc && *pos) {
2102 rc = listening_get_next(seq, rc);
2103 --*pos;
2104 }
2105 return rc;
2106}
2107
6eac5604
AK
2108static inline int empty_bucket(struct tcp_iter_state *st)
2109{
3ab5aee7
ED
2110 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
2111 hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
6eac5604
AK
2112}
2113
a8b690f9
TH
2114/*
2115 * Get first established socket starting from bucket given in st->bucket.
2116 * If st->bucket is zero, the very first socket in the hash is returned.
2117 */
1da177e4
LT
2118static void *established_get_first(struct seq_file *seq)
2119{
5799de0b 2120 struct tcp_iter_state *st = seq->private;
a4146b1b 2121 struct net *net = seq_file_net(seq);
1da177e4
LT
2122 void *rc = NULL;
2123
a8b690f9
TH
2124 st->offset = 0;
2125 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
1da177e4 2126 struct sock *sk;
3ab5aee7 2127 struct hlist_nulls_node *node;
8feaf0c0 2128 struct inet_timewait_sock *tw;
9db66bdc 2129 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
1da177e4 2130
6eac5604
AK
2131 /* Lockless fast path for the common case of empty buckets */
2132 if (empty_bucket(st))
2133 continue;
2134
9db66bdc 2135 spin_lock_bh(lock);
3ab5aee7 2136 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
f40c8174 2137 if (sk->sk_family != st->family ||
878628fb 2138 !net_eq(sock_net(sk), net)) {
1da177e4
LT
2139 continue;
2140 }
2141 rc = sk;
2142 goto out;
2143 }
2144 st->state = TCP_SEQ_STATE_TIME_WAIT;
8feaf0c0 2145 inet_twsk_for_each(tw, node,
dbca9b27 2146 &tcp_hashinfo.ehash[st->bucket].twchain) {
28518fc1 2147 if (tw->tw_family != st->family ||
878628fb 2148 !net_eq(twsk_net(tw), net)) {
1da177e4
LT
2149 continue;
2150 }
2151 rc = tw;
2152 goto out;
2153 }
9db66bdc 2154 spin_unlock_bh(lock);
1da177e4
LT
2155 st->state = TCP_SEQ_STATE_ESTABLISHED;
2156 }
2157out:
2158 return rc;
2159}
2160
2161static void *established_get_next(struct seq_file *seq, void *cur)
2162{
2163 struct sock *sk = cur;
8feaf0c0 2164 struct inet_timewait_sock *tw;
3ab5aee7 2165 struct hlist_nulls_node *node;
5799de0b 2166 struct tcp_iter_state *st = seq->private;
a4146b1b 2167 struct net *net = seq_file_net(seq);
1da177e4
LT
2168
2169 ++st->num;
a8b690f9 2170 ++st->offset;
1da177e4
LT
2171
2172 if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2173 tw = cur;
2174 tw = tw_next(tw);
2175get_tw:
878628fb 2176 while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
1da177e4
LT
2177 tw = tw_next(tw);
2178 }
2179 if (tw) {
2180 cur = tw;
2181 goto out;
2182 }
9db66bdc 2183 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1da177e4
LT
2184 st->state = TCP_SEQ_STATE_ESTABLISHED;
2185
6eac5604 2186 /* Look for next non empty bucket */
a8b690f9 2187 st->offset = 0;
f373b53b 2188 while (++st->bucket <= tcp_hashinfo.ehash_mask &&
6eac5604
AK
2189 empty_bucket(st))
2190 ;
f373b53b 2191 if (st->bucket > tcp_hashinfo.ehash_mask)
6eac5604
AK
2192 return NULL;
2193
9db66bdc 2194 spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
3ab5aee7 2195 sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
1da177e4 2196 } else
3ab5aee7 2197 sk = sk_nulls_next(sk);
1da177e4 2198
3ab5aee7 2199 sk_nulls_for_each_from(sk, node) {
878628fb 2200 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
1da177e4
LT
2201 goto found;
2202 }
2203
2204 st->state = TCP_SEQ_STATE_TIME_WAIT;
dbca9b27 2205 tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
1da177e4
LT
2206 goto get_tw;
2207found:
2208 cur = sk;
2209out:
2210 return cur;
2211}
2212
2213static void *established_get_idx(struct seq_file *seq, loff_t pos)
2214{
a8b690f9
TH
2215 struct tcp_iter_state *st = seq->private;
2216 void *rc;
2217
2218 st->bucket = 0;
2219 rc = established_get_first(seq);
1da177e4
LT
2220
2221 while (rc && pos) {
2222 rc = established_get_next(seq, rc);
2223 --pos;
7174259e 2224 }
1da177e4
LT
2225 return rc;
2226}
2227
2228static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2229{
2230 void *rc;
5799de0b 2231 struct tcp_iter_state *st = seq->private;
1da177e4 2232
1da177e4
LT
2233 st->state = TCP_SEQ_STATE_LISTENING;
2234 rc = listening_get_idx(seq, &pos);
2235
2236 if (!rc) {
1da177e4
LT
2237 st->state = TCP_SEQ_STATE_ESTABLISHED;
2238 rc = established_get_idx(seq, pos);
2239 }
2240
2241 return rc;
2242}
2243
a8b690f9
TH
2244static void *tcp_seek_last_pos(struct seq_file *seq)
2245{
2246 struct tcp_iter_state *st = seq->private;
2247 int offset = st->offset;
2248 int orig_num = st->num;
2249 void *rc = NULL;
2250
2251 switch (st->state) {
2252 case TCP_SEQ_STATE_OPENREQ:
2253 case TCP_SEQ_STATE_LISTENING:
2254 if (st->bucket >= INET_LHTABLE_SIZE)
2255 break;
2256 st->state = TCP_SEQ_STATE_LISTENING;
2257 rc = listening_get_next(seq, NULL);
2258 while (offset-- && rc)
2259 rc = listening_get_next(seq, rc);
2260 if (rc)
2261 break;
2262 st->bucket = 0;
2263 /* Fallthrough */
2264 case TCP_SEQ_STATE_ESTABLISHED:
2265 case TCP_SEQ_STATE_TIME_WAIT:
2266 st->state = TCP_SEQ_STATE_ESTABLISHED;
2267 if (st->bucket > tcp_hashinfo.ehash_mask)
2268 break;
2269 rc = established_get_first(seq);
2270 while (offset-- && rc)
2271 rc = established_get_next(seq, rc);
2272 }
2273
2274 st->num = orig_num;
2275
2276 return rc;
2277}
2278
1da177e4
LT
2279static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2280{
5799de0b 2281 struct tcp_iter_state *st = seq->private;
a8b690f9
TH
2282 void *rc;
2283
2284 if (*pos && *pos == st->last_pos) {
2285 rc = tcp_seek_last_pos(seq);
2286 if (rc)
2287 goto out;
2288 }
2289
1da177e4
LT
2290 st->state = TCP_SEQ_STATE_LISTENING;
2291 st->num = 0;
a8b690f9
TH
2292 st->bucket = 0;
2293 st->offset = 0;
2294 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2295
2296out:
2297 st->last_pos = *pos;
2298 return rc;
1da177e4
LT
2299}
2300
2301static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2302{
a8b690f9 2303 struct tcp_iter_state *st = seq->private;
1da177e4 2304 void *rc = NULL;
1da177e4
LT
2305
2306 if (v == SEQ_START_TOKEN) {
2307 rc = tcp_get_idx(seq, 0);
2308 goto out;
2309 }
1da177e4
LT
2310
2311 switch (st->state) {
2312 case TCP_SEQ_STATE_OPENREQ:
2313 case TCP_SEQ_STATE_LISTENING:
2314 rc = listening_get_next(seq, v);
2315 if (!rc) {
1da177e4 2316 st->state = TCP_SEQ_STATE_ESTABLISHED;
a8b690f9
TH
2317 st->bucket = 0;
2318 st->offset = 0;
1da177e4
LT
2319 rc = established_get_first(seq);
2320 }
2321 break;
2322 case TCP_SEQ_STATE_ESTABLISHED:
2323 case TCP_SEQ_STATE_TIME_WAIT:
2324 rc = established_get_next(seq, v);
2325 break;
2326 }
2327out:
2328 ++*pos;
a8b690f9 2329 st->last_pos = *pos;
1da177e4
LT
2330 return rc;
2331}
2332
2333static void tcp_seq_stop(struct seq_file *seq, void *v)
2334{
5799de0b 2335 struct tcp_iter_state *st = seq->private;
1da177e4
LT
2336
2337 switch (st->state) {
2338 case TCP_SEQ_STATE_OPENREQ:
2339 if (v) {
463c84b9
ACM
2340 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2341 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4
LT
2342 }
2343 case TCP_SEQ_STATE_LISTENING:
2344 if (v != SEQ_START_TOKEN)
5caea4ea 2345 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
1da177e4
LT
2346 break;
2347 case TCP_SEQ_STATE_TIME_WAIT:
2348 case TCP_SEQ_STATE_ESTABLISHED:
2349 if (v)
9db66bdc 2350 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1da177e4
LT
2351 break;
2352 }
2353}
2354
73cb88ec 2355int tcp_seq_open(struct inode *inode, struct file *file)
1da177e4
LT
2356{
2357 struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
1da177e4 2358 struct tcp_iter_state *s;
52d6f3f1 2359 int err;
1da177e4 2360
52d6f3f1
DL
2361 err = seq_open_net(inode, file, &afinfo->seq_ops,
2362 sizeof(struct tcp_iter_state));
2363 if (err < 0)
2364 return err;
f40c8174 2365
52d6f3f1 2366 s = ((struct seq_file *)file->private_data)->private;
1da177e4 2367 s->family = afinfo->family;
a8b690f9 2368 s->last_pos = 0;
f40c8174
DL
2369 return 0;
2370}
73cb88ec 2371EXPORT_SYMBOL(tcp_seq_open);
f40c8174 2372
6f8b13bc 2373int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4
LT
2374{
2375 int rc = 0;
2376 struct proc_dir_entry *p;
2377
9427c4b3
DL
2378 afinfo->seq_ops.start = tcp_seq_start;
2379 afinfo->seq_ops.next = tcp_seq_next;
2380 afinfo->seq_ops.stop = tcp_seq_stop;
2381
84841c3c 2382 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
73cb88ec 2383 afinfo->seq_fops, afinfo);
84841c3c 2384 if (!p)
1da177e4
LT
2385 rc = -ENOMEM;
2386 return rc;
2387}
4bc2f18b 2388EXPORT_SYMBOL(tcp_proc_register);
1da177e4 2389
6f8b13bc 2390void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4 2391{
6f8b13bc 2392 proc_net_remove(net, afinfo->name);
1da177e4 2393}
4bc2f18b 2394EXPORT_SYMBOL(tcp_proc_unregister);
1da177e4 2395
cf533ea5 2396static void get_openreq4(const struct sock *sk, const struct request_sock *req,
5e659e4c 2397 struct seq_file *f, int i, int uid, int *len)
1da177e4 2398{
2e6599cb 2399 const struct inet_request_sock *ireq = inet_rsk(req);
1da177e4
LT
2400 int ttd = req->expires - jiffies;
2401
5e659e4c 2402 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
71338aa7 2403 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
1da177e4 2404 i,
2e6599cb 2405 ireq->loc_addr,
c720c7e8 2406 ntohs(inet_sk(sk)->inet_sport),
2e6599cb
ACM
2407 ireq->rmt_addr,
2408 ntohs(ireq->rmt_port),
1da177e4
LT
2409 TCP_SYN_RECV,
2410 0, 0, /* could print option size, but that is af dependent. */
2411 1, /* timers active (only the expire timer) */
2412 jiffies_to_clock_t(ttd),
2413 req->retrans,
2414 uid,
2415 0, /* non standard timer */
2416 0, /* open_requests have no inode */
2417 atomic_read(&sk->sk_refcnt),
5e659e4c
PE
2418 req,
2419 len);
1da177e4
LT
2420}
2421
5e659e4c 2422static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
1da177e4
LT
2423{
2424 int timer_active;
2425 unsigned long timer_expires;
cf533ea5 2426 const struct tcp_sock *tp = tcp_sk(sk);
cf4c6bf8 2427 const struct inet_connection_sock *icsk = inet_csk(sk);
cf533ea5 2428 const struct inet_sock *inet = inet_sk(sk);
c720c7e8
ED
2429 __be32 dest = inet->inet_daddr;
2430 __be32 src = inet->inet_rcv_saddr;
2431 __u16 destp = ntohs(inet->inet_dport);
2432 __u16 srcp = ntohs(inet->inet_sport);
49d09007 2433 int rx_queue;
1da177e4 2434
463c84b9 2435 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1da177e4 2436 timer_active = 1;
463c84b9
ACM
2437 timer_expires = icsk->icsk_timeout;
2438 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1da177e4 2439 timer_active = 4;
463c84b9 2440 timer_expires = icsk->icsk_timeout;
cf4c6bf8 2441 } else if (timer_pending(&sk->sk_timer)) {
1da177e4 2442 timer_active = 2;
cf4c6bf8 2443 timer_expires = sk->sk_timer.expires;
1da177e4
LT
2444 } else {
2445 timer_active = 0;
2446 timer_expires = jiffies;
2447 }
2448
49d09007
ED
2449 if (sk->sk_state == TCP_LISTEN)
2450 rx_queue = sk->sk_ack_backlog;
2451 else
2452 /*
2453 * because we dont lock socket, we might find a transient negative value
2454 */
2455 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2456
5e659e4c 2457 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
71338aa7 2458 "%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n",
cf4c6bf8 2459 i, src, srcp, dest, destp, sk->sk_state,
47da8ee6 2460 tp->write_seq - tp->snd_una,
49d09007 2461 rx_queue,
1da177e4
LT
2462 timer_active,
2463 jiffies_to_clock_t(timer_expires - jiffies),
463c84b9 2464 icsk->icsk_retransmits,
cf4c6bf8 2465 sock_i_uid(sk),
6687e988 2466 icsk->icsk_probes_out,
cf4c6bf8
IJ
2467 sock_i_ino(sk),
2468 atomic_read(&sk->sk_refcnt), sk,
7be87351
SH
2469 jiffies_to_clock_t(icsk->icsk_rto),
2470 jiffies_to_clock_t(icsk->icsk_ack.ato),
463c84b9 2471 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1da177e4 2472 tp->snd_cwnd,
0b6a05c1 2473 tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
5e659e4c 2474 len);
1da177e4
LT
2475}
2476
cf533ea5 2477static void get_timewait4_sock(const struct inet_timewait_sock *tw,
5e659e4c 2478 struct seq_file *f, int i, int *len)
1da177e4 2479{
23f33c2d 2480 __be32 dest, src;
1da177e4
LT
2481 __u16 destp, srcp;
2482 int ttd = tw->tw_ttd - jiffies;
2483
2484 if (ttd < 0)
2485 ttd = 0;
2486
2487 dest = tw->tw_daddr;
2488 src = tw->tw_rcv_saddr;
2489 destp = ntohs(tw->tw_dport);
2490 srcp = ntohs(tw->tw_sport);
2491
5e659e4c 2492 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
71338aa7 2493 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n",
1da177e4
LT
2494 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2495 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
5e659e4c 2496 atomic_read(&tw->tw_refcnt), tw, len);
1da177e4
LT
2497}
2498
2499#define TMPSZ 150
2500
2501static int tcp4_seq_show(struct seq_file *seq, void *v)
2502{
5799de0b 2503 struct tcp_iter_state *st;
5e659e4c 2504 int len;
1da177e4
LT
2505
2506 if (v == SEQ_START_TOKEN) {
2507 seq_printf(seq, "%-*s\n", TMPSZ - 1,
2508 " sl local_address rem_address st tx_queue "
2509 "rx_queue tr tm->when retrnsmt uid timeout "
2510 "inode");
2511 goto out;
2512 }
2513 st = seq->private;
2514
2515 switch (st->state) {
2516 case TCP_SEQ_STATE_LISTENING:
2517 case TCP_SEQ_STATE_ESTABLISHED:
5e659e4c 2518 get_tcp4_sock(v, seq, st->num, &len);
1da177e4
LT
2519 break;
2520 case TCP_SEQ_STATE_OPENREQ:
5e659e4c 2521 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
1da177e4
LT
2522 break;
2523 case TCP_SEQ_STATE_TIME_WAIT:
5e659e4c 2524 get_timewait4_sock(v, seq, st->num, &len);
1da177e4
LT
2525 break;
2526 }
5e659e4c 2527 seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
1da177e4
LT
2528out:
2529 return 0;
2530}
2531
73cb88ec
AV
2532static const struct file_operations tcp_afinfo_seq_fops = {
2533 .owner = THIS_MODULE,
2534 .open = tcp_seq_open,
2535 .read = seq_read,
2536 .llseek = seq_lseek,
2537 .release = seq_release_net
2538};
2539
1da177e4 2540static struct tcp_seq_afinfo tcp4_seq_afinfo = {
1da177e4
LT
2541 .name = "tcp",
2542 .family = AF_INET,
73cb88ec 2543 .seq_fops = &tcp_afinfo_seq_fops,
9427c4b3
DL
2544 .seq_ops = {
2545 .show = tcp4_seq_show,
2546 },
1da177e4
LT
2547};
2548
2c8c1e72 2549static int __net_init tcp4_proc_init_net(struct net *net)
757764f6
PE
2550{
2551 return tcp_proc_register(net, &tcp4_seq_afinfo);
2552}
2553
2c8c1e72 2554static void __net_exit tcp4_proc_exit_net(struct net *net)
757764f6
PE
2555{
2556 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2557}
2558
2559static struct pernet_operations tcp4_net_ops = {
2560 .init = tcp4_proc_init_net,
2561 .exit = tcp4_proc_exit_net,
2562};
2563
1da177e4
LT
2564int __init tcp4_proc_init(void)
2565{
757764f6 2566 return register_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2567}
2568
2569void tcp4_proc_exit(void)
2570{
757764f6 2571 unregister_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2572}
2573#endif /* CONFIG_PROC_FS */
2574
bf296b12
HX
2575struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2576{
b71d1d42 2577 const struct iphdr *iph = skb_gro_network_header(skb);
bf296b12
HX
2578
2579 switch (skb->ip_summed) {
2580 case CHECKSUM_COMPLETE:
86911732 2581 if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
bf296b12
HX
2582 skb->csum)) {
2583 skb->ip_summed = CHECKSUM_UNNECESSARY;
2584 break;
2585 }
2586
2587 /* fall through */
2588 case CHECKSUM_NONE:
2589 NAPI_GRO_CB(skb)->flush = 1;
2590 return NULL;
2591 }
2592
2593 return tcp_gro_receive(head, skb);
2594}
bf296b12
HX
2595
2596int tcp4_gro_complete(struct sk_buff *skb)
2597{
b71d1d42 2598 const struct iphdr *iph = ip_hdr(skb);
bf296b12
HX
2599 struct tcphdr *th = tcp_hdr(skb);
2600
2601 th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2602 iph->saddr, iph->daddr, 0);
2603 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2604
2605 return tcp_gro_complete(skb);
2606}
bf296b12 2607
1da177e4
LT
2608struct proto tcp_prot = {
2609 .name = "TCP",
2610 .owner = THIS_MODULE,
2611 .close = tcp_close,
2612 .connect = tcp_v4_connect,
2613 .disconnect = tcp_disconnect,
463c84b9 2614 .accept = inet_csk_accept,
1da177e4
LT
2615 .ioctl = tcp_ioctl,
2616 .init = tcp_v4_init_sock,
2617 .destroy = tcp_v4_destroy_sock,
2618 .shutdown = tcp_shutdown,
2619 .setsockopt = tcp_setsockopt,
2620 .getsockopt = tcp_getsockopt,
1da177e4 2621 .recvmsg = tcp_recvmsg,
7ba42910
CG
2622 .sendmsg = tcp_sendmsg,
2623 .sendpage = tcp_sendpage,
1da177e4 2624 .backlog_rcv = tcp_v4_do_rcv,
ab1e0a13
ACM
2625 .hash = inet_hash,
2626 .unhash = inet_unhash,
2627 .get_port = inet_csk_get_port,
1da177e4
LT
2628 .enter_memory_pressure = tcp_enter_memory_pressure,
2629 .sockets_allocated = &tcp_sockets_allocated,
0a5578cf 2630 .orphan_count = &tcp_orphan_count,
1da177e4
LT
2631 .memory_allocated = &tcp_memory_allocated,
2632 .memory_pressure = &tcp_memory_pressure,
1da177e4
LT
2633 .sysctl_wmem = sysctl_tcp_wmem,
2634 .sysctl_rmem = sysctl_tcp_rmem,
2635 .max_header = MAX_TCP_HEADER,
2636 .obj_size = sizeof(struct tcp_sock),
3ab5aee7 2637 .slab_flags = SLAB_DESTROY_BY_RCU,
6d6ee43e 2638 .twsk_prot = &tcp_timewait_sock_ops,
60236fdd 2639 .rsk_prot = &tcp_request_sock_ops,
39d8cda7 2640 .h.hashinfo = &tcp_hashinfo,
7ba42910 2641 .no_autobind = true,
543d9cfe
ACM
2642#ifdef CONFIG_COMPAT
2643 .compat_setsockopt = compat_tcp_setsockopt,
2644 .compat_getsockopt = compat_tcp_getsockopt,
2645#endif
d1a4c0b3
GC
2646#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
2647 .init_cgroup = tcp_init_cgroup,
2648 .destroy_cgroup = tcp_destroy_cgroup,
2649 .proto_cgroup = tcp_proto_cgroup,
2650#endif
1da177e4 2651};
4bc2f18b 2652EXPORT_SYMBOL(tcp_prot);
1da177e4 2653
046ee902
DL
2654static int __net_init tcp_sk_init(struct net *net)
2655{
2656 return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2657 PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2658}
2659
2660static void __net_exit tcp_sk_exit(struct net *net)
2661{
2662 inet_ctl_sock_destroy(net->ipv4.tcp_sock);
b099ce26
EB
2663}
2664
2665static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2666{
2667 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
046ee902
DL
2668}
2669
2670static struct pernet_operations __net_initdata tcp_sk_ops = {
b099ce26
EB
2671 .init = tcp_sk_init,
2672 .exit = tcp_sk_exit,
2673 .exit_batch = tcp_sk_exit_batch,
046ee902
DL
2674};
2675
9b0f976f 2676void __init tcp_v4_init(void)
1da177e4 2677{
5caea4ea 2678 inet_hashinfo_init(&tcp_hashinfo);
6a1b3054 2679 if (register_pernet_subsys(&tcp_sk_ops))
1da177e4 2680 panic("Failed to create the TCP control socket.\n");
1da177e4 2681}
This page took 0.947527 seconds and 5 git commands to generate.