[NETFILTER]: nf_conntrack: use bool type in struct nf_conntrack_l4proto
[deliverable/linux.git] / net / netfilter / nf_conntrack_proto_tcp.c
CommitLineData
9fb9cbb1
YK
1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
9fb9cbb1
YK
7 */
8
9fb9cbb1 9#include <linux/types.h>
9fb9cbb1 10#include <linux/timer.h>
9fb9cbb1
YK
11#include <linux/module.h>
12#include <linux/in.h>
13#include <linux/tcp.h>
14#include <linux/spinlock.h>
15#include <linux/skbuff.h>
16#include <linux/ipv6.h>
17#include <net/ip6_checksum.h>
18
19#include <net/tcp.h>
20
21#include <linux/netfilter.h>
22#include <linux/netfilter_ipv4.h>
23#include <linux/netfilter_ipv6.h>
24#include <net/netfilter/nf_conntrack.h>
605dcad6 25#include <net/netfilter/nf_conntrack_l4proto.h>
f6180121 26#include <net/netfilter/nf_conntrack_ecache.h>
f01ffbd6 27#include <net/netfilter/nf_log.h>
9fb9cbb1 28
c88130bc 29/* Protects ct->proto.tcp */
9fb9cbb1
YK
30static DEFINE_RWLOCK(tcp_lock);
31
601e68e1
YH
32/* "Be conservative in what you do,
33 be liberal in what you accept from others."
9fb9cbb1 34 If it's non-zero, we mark only out of window RST segments as INVALID. */
3aef0fd9 35static int nf_ct_tcp_be_liberal __read_mostly = 0;
9fb9cbb1 36
a09113c2 37/* If it is set to zero, we disable picking up already established
9fb9cbb1 38 connections. */
3aef0fd9 39static int nf_ct_tcp_loose __read_mostly = 1;
9fb9cbb1 40
601e68e1
YH
41/* Max number of the retransmitted packets without receiving an (acceptable)
42 ACK from the destination. If this number is reached, a shorter timer
9fb9cbb1 43 will be started. */
3aef0fd9 44static int nf_ct_tcp_max_retrans __read_mostly = 3;
9fb9cbb1
YK
45
46 /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
47 closely. They're more complex. --RR */
48
82f568fc 49static const char *const tcp_conntrack_names[] = {
9fb9cbb1
YK
50 "NONE",
51 "SYN_SENT",
52 "SYN_RECV",
53 "ESTABLISHED",
54 "FIN_WAIT",
55 "CLOSE_WAIT",
56 "LAST_ACK",
57 "TIME_WAIT",
58 "CLOSE",
59 "LISTEN"
60};
601e68e1 61
9fb9cbb1
YK
62#define SECS * HZ
63#define MINS * 60 SECS
64#define HOURS * 60 MINS
65#define DAYS * 24 HOURS
66
9fb9cbb1 67/* RFC1122 says the R2 limit should be at least 100 seconds.
601e68e1 68 Linux uses 15 packets as limit, which corresponds
9fb9cbb1 69 to ~13-30min depending on RTO. */
933a41e7 70static unsigned int nf_ct_tcp_timeout_max_retrans __read_mostly = 5 MINS;
601e68e1 71
2d646286
PM
72static unsigned int tcp_timeouts[TCP_CONNTRACK_MAX] __read_mostly = {
73 [TCP_CONNTRACK_SYN_SENT] = 2 MINS,
74 [TCP_CONNTRACK_SYN_RECV] = 60 SECS,
75 [TCP_CONNTRACK_ESTABLISHED] = 5 DAYS,
76 [TCP_CONNTRACK_FIN_WAIT] = 2 MINS,
77 [TCP_CONNTRACK_CLOSE_WAIT] = 60 SECS,
78 [TCP_CONNTRACK_LAST_ACK] = 30 SECS,
79 [TCP_CONNTRACK_TIME_WAIT] = 2 MINS,
80 [TCP_CONNTRACK_CLOSE] = 10 SECS,
81};
601e68e1 82
9fb9cbb1
YK
83#define sNO TCP_CONNTRACK_NONE
84#define sSS TCP_CONNTRACK_SYN_SENT
85#define sSR TCP_CONNTRACK_SYN_RECV
86#define sES TCP_CONNTRACK_ESTABLISHED
87#define sFW TCP_CONNTRACK_FIN_WAIT
88#define sCW TCP_CONNTRACK_CLOSE_WAIT
89#define sLA TCP_CONNTRACK_LAST_ACK
90#define sTW TCP_CONNTRACK_TIME_WAIT
91#define sCL TCP_CONNTRACK_CLOSE
92#define sLI TCP_CONNTRACK_LISTEN
93#define sIV TCP_CONNTRACK_MAX
94#define sIG TCP_CONNTRACK_IGNORE
95
96/* What TCP flags are set from RST/SYN/FIN/ACK. */
97enum tcp_bit_set {
98 TCP_SYN_SET,
99 TCP_SYNACK_SET,
100 TCP_FIN_SET,
101 TCP_ACK_SET,
102 TCP_RST_SET,
103 TCP_NONE_SET,
104};
601e68e1 105
9fb9cbb1
YK
106/*
107 * The TCP state transition table needs a few words...
108 *
109 * We are the man in the middle. All the packets go through us
110 * but might get lost in transit to the destination.
601e68e1 111 * It is assumed that the destinations can't receive segments
9fb9cbb1
YK
112 * we haven't seen.
113 *
114 * The checked segment is in window, but our windows are *not*
115 * equivalent with the ones of the sender/receiver. We always
116 * try to guess the state of the current sender.
117 *
118 * The meaning of the states are:
119 *
120 * NONE: initial state
601e68e1 121 * SYN_SENT: SYN-only packet seen
9fb9cbb1
YK
122 * SYN_RECV: SYN-ACK packet seen
123 * ESTABLISHED: ACK packet seen
124 * FIN_WAIT: FIN packet seen
601e68e1 125 * CLOSE_WAIT: ACK seen (after FIN)
9fb9cbb1
YK
126 * LAST_ACK: FIN seen (after FIN)
127 * TIME_WAIT: last ACK seen
b2155e7f 128 * CLOSE: closed connection (RST)
9fb9cbb1
YK
129 *
130 * LISTEN state is not used.
131 *
132 * Packets marked as IGNORED (sIG):
601e68e1
YH
133 * if they may be either invalid or valid
134 * and the receiver may send back a connection
9fb9cbb1
YK
135 * closing RST or a SYN/ACK.
136 *
137 * Packets marked as INVALID (sIV):
138 * if they are invalid
139 * or we do not support the request (simultaneous open)
140 */
a5e73c29 141static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
9fb9cbb1
YK
142 {
143/* ORIGINAL */
144/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
145/*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sIV },
146/*
147 * sNO -> sSS Initialize a new connection
148 * sSS -> sSS Retransmitted SYN
149 * sSR -> sIG Late retransmitted SYN?
150 * sES -> sIG Error: SYNs in window outside the SYN_SENT state
601e68e1 151 * are errors. Receiver will reply with RST
9fb9cbb1
YK
152 * and close the connection.
153 * Or we are not in sync and hold a dead connection.
154 * sFW -> sIG
155 * sCW -> sIG
156 * sLA -> sIG
157 * sTW -> sSS Reopened connection (RFC 1122).
158 * sCL -> sSS
159 */
160/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
161/*synack*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
162/*
163 * A SYN/ACK from the client is always invalid:
601e68e1 164 * - either it tries to set up a simultaneous open, which is
9fb9cbb1
YK
165 * not supported;
166 * - or the firewall has just been inserted between the two hosts
601e68e1 167 * during the session set-up. The SYN will be retransmitted
9fb9cbb1
YK
168 * by the true client (or it'll time out).
169 */
170/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
171/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
172/*
173 * sNO -> sIV Too late and no reason to do anything...
174 * sSS -> sIV Client migth not send FIN in this state:
175 * we enforce waiting for a SYN/ACK reply first.
176 * sSR -> sFW Close started.
177 * sES -> sFW
178 * sFW -> sLA FIN seen in both directions, waiting for
601e68e1 179 * the last ACK.
9fb9cbb1
YK
180 * Migth be a retransmitted FIN as well...
181 * sCW -> sLA
182 * sLA -> sLA Retransmitted FIN. Remain in the same state.
183 * sTW -> sTW
184 * sCL -> sCL
185 */
186/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
187/*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
188/*
189 * sNO -> sES Assumed.
190 * sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet.
191 * sSR -> sES Established state is reached.
192 * sES -> sES :-)
193 * sFW -> sCW Normal close request answered by ACK.
194 * sCW -> sCW
195 * sLA -> sTW Last ACK detected.
196 * sTW -> sTW Retransmitted last ACK. Remain in the same state.
197 * sCL -> sCL
198 */
199/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
200/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
201/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
202 },
203 {
204/* REPLY */
205/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
206/*syn*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
207/*
208 * sNO -> sIV Never reached.
209 * sSS -> sIV Simultaneous open, not supported
210 * sSR -> sIV Simultaneous open, not supported.
211 * sES -> sIV Server may not initiate a connection.
212 * sFW -> sIV
213 * sCW -> sIV
214 * sLA -> sIV
215 * sTW -> sIV Reopened connection, but server may not do it.
216 * sCL -> sIV
217 */
218/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
219/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIV },
220/*
221 * sSS -> sSR Standard open.
222 * sSR -> sSR Retransmitted SYN/ACK.
223 * sES -> sIG Late retransmitted SYN/ACK?
224 * sFW -> sIG Might be SYN/ACK answering ignored SYN
225 * sCW -> sIG
226 * sLA -> sIG
227 * sTW -> sIG
228 * sCL -> sIG
229 */
230/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
231/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
232/*
233 * sSS -> sIV Server might not send FIN in this state.
234 * sSR -> sFW Close started.
235 * sES -> sFW
236 * sFW -> sLA FIN seen in both directions.
237 * sCW -> sLA
238 * sLA -> sLA Retransmitted FIN.
239 * sTW -> sTW
240 * sCL -> sCL
241 */
242/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
73f30602 243/*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV },
9fb9cbb1 244/*
73f30602 245 * sSS -> sIG Might be a half-open connection.
9fb9cbb1
YK
246 * sSR -> sSR Might answer late resent SYN.
247 * sES -> sES :-)
248 * sFW -> sCW Normal close request answered by ACK.
249 * sCW -> sCW
250 * sLA -> sTW Last ACK detected.
251 * sTW -> sTW Retransmitted last ACK.
252 * sCL -> sCL
253 */
254/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
255/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
256/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
601e68e1 257 }
9fb9cbb1
YK
258};
259
09f263cd
JE
260static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
261 struct nf_conntrack_tuple *tuple)
9fb9cbb1 262{
82f568fc
JE
263 const struct tcphdr *hp;
264 struct tcphdr _hdr;
9fb9cbb1
YK
265
266 /* Actually only need first 8 bytes. */
267 hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
268 if (hp == NULL)
09f263cd 269 return false;
9fb9cbb1
YK
270
271 tuple->src.u.tcp.port = hp->source;
272 tuple->dst.u.tcp.port = hp->dest;
273
09f263cd 274 return true;
9fb9cbb1
YK
275}
276
09f263cd
JE
277static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
278 const struct nf_conntrack_tuple *orig)
9fb9cbb1
YK
279{
280 tuple->src.u.tcp.port = orig->dst.u.tcp.port;
281 tuple->dst.u.tcp.port = orig->src.u.tcp.port;
09f263cd 282 return true;
9fb9cbb1
YK
283}
284
285/* Print out the per-protocol part of the tuple. */
286static int tcp_print_tuple(struct seq_file *s,
287 const struct nf_conntrack_tuple *tuple)
288{
289 return seq_printf(s, "sport=%hu dport=%hu ",
290 ntohs(tuple->src.u.tcp.port),
291 ntohs(tuple->dst.u.tcp.port));
292}
293
294/* Print out the private part of the conntrack. */
c88130bc 295static int tcp_print_conntrack(struct seq_file *s, const struct nf_conn *ct)
9fb9cbb1
YK
296{
297 enum tcp_conntrack state;
298
299 read_lock_bh(&tcp_lock);
c88130bc 300 state = ct->proto.tcp.state;
9fb9cbb1
YK
301 read_unlock_bh(&tcp_lock);
302
303 return seq_printf(s, "%s ", tcp_conntrack_names[state]);
304}
305
306static unsigned int get_conntrack_index(const struct tcphdr *tcph)
307{
308 if (tcph->rst) return TCP_RST_SET;
309 else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
310 else if (tcph->fin) return TCP_FIN_SET;
311 else if (tcph->ack) return TCP_ACK_SET;
312 else return TCP_NONE_SET;
313}
314
315/* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
316 in IP Filter' by Guido van Rooij.
601e68e1 317
9fb9cbb1
YK
318 http://www.nluug.nl/events/sane2000/papers.html
319 http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz
601e68e1 320
9fb9cbb1
YK
321 The boundaries and the conditions are changed according to RFC793:
322 the packet must intersect the window (i.e. segments may be
323 after the right or before the left edge) and thus receivers may ACK
324 segments after the right edge of the window.
325
601e68e1 326 td_maxend = max(sack + max(win,1)) seen in reply packets
9fb9cbb1
YK
327 td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
328 td_maxwin += seq + len - sender.td_maxend
329 if seq + len > sender.td_maxend
330 td_end = max(seq + len) seen in sent packets
601e68e1 331
9fb9cbb1
YK
332 I. Upper bound for valid data: seq <= sender.td_maxend
333 II. Lower bound for valid data: seq + len >= sender.td_end - receiver.td_maxwin
334 III. Upper bound for valid ack: sack <= receiver.td_end
335 IV. Lower bound for valid ack: ack >= receiver.td_end - MAXACKWINDOW
336
337 where sack is the highest right edge of sack block found in the packet.
338
601e68e1
YH
339 The upper bound limit for a valid ack is not ignored -
340 we doesn't have to deal with fragments.
9fb9cbb1
YK
341*/
342
343static inline __u32 segment_seq_plus_len(__u32 seq,
344 size_t len,
345 unsigned int dataoff,
82f568fc 346 const struct tcphdr *tcph)
9fb9cbb1
YK
347{
348 /* XXX Should I use payload length field in IP/IPv6 header ?
349 * - YK */
350 return (seq + len - dataoff - tcph->doff*4
351 + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
352}
601e68e1 353
9fb9cbb1
YK
354/* Fixme: what about big packets? */
355#define MAXACKWINCONST 66000
356#define MAXACKWINDOW(sender) \
357 ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin \
358 : MAXACKWINCONST)
601e68e1 359
9fb9cbb1
YK
360/*
361 * Simplified tcp_parse_options routine from tcp_input.c
362 */
363static void tcp_options(const struct sk_buff *skb,
364 unsigned int dataoff,
82f568fc 365 const struct tcphdr *tcph,
9fb9cbb1
YK
366 struct ip_ct_tcp_state *state)
367{
368 unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
82f568fc 369 const unsigned char *ptr;
9fb9cbb1
YK
370 int length = (tcph->doff*4) - sizeof(struct tcphdr);
371
372 if (!length)
373 return;
374
375 ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
376 length, buff);
377 BUG_ON(ptr == NULL);
378
601e68e1 379 state->td_scale =
9fb9cbb1
YK
380 state->flags = 0;
381
382 while (length > 0) {
383 int opcode=*ptr++;
384 int opsize;
385
386 switch (opcode) {
387 case TCPOPT_EOL:
388 return;
389 case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
390 length--;
391 continue;
392 default:
393 opsize=*ptr++;
394 if (opsize < 2) /* "silly options" */
395 return;
396 if (opsize > length)
397 break; /* don't parse partial options */
398
601e68e1 399 if (opcode == TCPOPT_SACK_PERM
9fb9cbb1
YK
400 && opsize == TCPOLEN_SACK_PERM)
401 state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
402 else if (opcode == TCPOPT_WINDOW
403 && opsize == TCPOLEN_WINDOW) {
404 state->td_scale = *(u_int8_t *)ptr;
405
406 if (state->td_scale > 14) {
407 /* See RFC1323 */
408 state->td_scale = 14;
409 }
410 state->flags |=
411 IP_CT_TCP_FLAG_WINDOW_SCALE;
412 }
413 ptr += opsize - 2;
414 length -= opsize;
415 }
416 }
417}
418
419static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
82f568fc 420 const struct tcphdr *tcph, __u32 *sack)
9fb9cbb1 421{
601e68e1 422 unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
82f568fc 423 const unsigned char *ptr;
9fb9cbb1
YK
424 int length = (tcph->doff*4) - sizeof(struct tcphdr);
425 __u32 tmp;
426
427 if (!length)
428 return;
429
430 ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
431 length, buff);
432 BUG_ON(ptr == NULL);
433
434 /* Fast path for timestamp-only option */
435 if (length == TCPOLEN_TSTAMP_ALIGNED*4
8f05ce91
YH
436 && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
437 | (TCPOPT_NOP << 16)
438 | (TCPOPT_TIMESTAMP << 8)
439 | TCPOLEN_TIMESTAMP))
9fb9cbb1
YK
440 return;
441
442 while (length > 0) {
443 int opcode = *ptr++;
444 int opsize, i;
445
446 switch (opcode) {
447 case TCPOPT_EOL:
448 return;
449 case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
450 length--;
451 continue;
452 default:
453 opsize = *ptr++;
454 if (opsize < 2) /* "silly options" */
455 return;
456 if (opsize > length)
457 break; /* don't parse partial options */
458
601e68e1
YH
459 if (opcode == TCPOPT_SACK
460 && opsize >= (TCPOLEN_SACK_BASE
461 + TCPOLEN_SACK_PERBLOCK)
462 && !((opsize - TCPOLEN_SACK_BASE)
463 % TCPOLEN_SACK_PERBLOCK)) {
464 for (i = 0;
465 i < (opsize - TCPOLEN_SACK_BASE);
466 i += TCPOLEN_SACK_PERBLOCK) {
467 tmp = ntohl(*((__be32 *)(ptr+i)+1));
9fb9cbb1
YK
468
469 if (after(tmp, *sack))
470 *sack = tmp;
471 }
472 return;
473 }
474 ptr += opsize - 2;
475 length -= opsize;
476 }
477 }
478}
479
09f263cd
JE
480static bool tcp_in_window(const struct nf_conn *ct,
481 struct ip_ct_tcp *state,
482 enum ip_conntrack_dir dir,
483 unsigned int index,
484 const struct sk_buff *skb,
485 unsigned int dataoff,
486 const struct tcphdr *tcph,
487 int pf)
9fb9cbb1
YK
488{
489 struct ip_ct_tcp_state *sender = &state->seen[dir];
490 struct ip_ct_tcp_state *receiver = &state->seen[!dir];
82f568fc 491 const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
9fb9cbb1 492 __u32 seq, ack, sack, end, win, swin;
09f263cd 493 bool res;
9fb9cbb1
YK
494
495 /*
496 * Get the required data from the packet.
497 */
498 seq = ntohl(tcph->seq);
499 ack = sack = ntohl(tcph->ack_seq);
500 win = ntohs(tcph->window);
501 end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);
502
503 if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
504 tcp_sack(skb, dataoff, tcph, &sack);
505
0d53778e
PM
506 pr_debug("tcp_in_window: START\n");
507 pr_debug("tcp_in_window: ");
508 NF_CT_DUMP_TUPLE(tuple);
509 pr_debug("seq=%u ack=%u sack=%u win=%u end=%u\n",
510 seq, ack, sack, win, end);
511 pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
512 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
513 sender->td_end, sender->td_maxend, sender->td_maxwin,
514 sender->td_scale,
515 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
516 receiver->td_scale);
9fb9cbb1
YK
517
518 if (sender->td_end == 0) {
519 /*
520 * Initialize sender data.
521 */
522 if (tcph->syn && tcph->ack) {
523 /*
524 * Outgoing SYN-ACK in reply to a SYN.
525 */
601e68e1 526 sender->td_end =
9fb9cbb1
YK
527 sender->td_maxend = end;
528 sender->td_maxwin = (win == 0 ? 1 : win);
529
530 tcp_options(skb, dataoff, tcph, sender);
601e68e1 531 /*
9fb9cbb1
YK
532 * RFC 1323:
533 * Both sides must send the Window Scale option
534 * to enable window scaling in either direction.
535 */
536 if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
537 && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
601e68e1 538 sender->td_scale =
9fb9cbb1
YK
539 receiver->td_scale = 0;
540 } else {
541 /*
542 * We are in the middle of a connection,
543 * its history is lost for us.
544 * Let's try to use the data from the packet.
601e68e1 545 */
9fb9cbb1
YK
546 sender->td_end = end;
547 sender->td_maxwin = (win == 0 ? 1 : win);
548 sender->td_maxend = end + sender->td_maxwin;
549 }
550 } else if (((state->state == TCP_CONNTRACK_SYN_SENT
551 && dir == IP_CT_DIR_ORIGINAL)
552 || (state->state == TCP_CONNTRACK_SYN_RECV
553 && dir == IP_CT_DIR_REPLY))
554 && after(end, sender->td_end)) {
555 /*
556 * RFC 793: "if a TCP is reinitialized ... then it need
601e68e1 557 * not wait at all; it must only be sure to use sequence
9fb9cbb1
YK
558 * numbers larger than those recently used."
559 */
560 sender->td_end =
561 sender->td_maxend = end;
562 sender->td_maxwin = (win == 0 ? 1 : win);
563
564 tcp_options(skb, dataoff, tcph, sender);
565 }
566
567 if (!(tcph->ack)) {
568 /*
569 * If there is no ACK, just pretend it was set and OK.
570 */
571 ack = sack = receiver->td_end;
601e68e1
YH
572 } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
573 (TCP_FLAG_ACK|TCP_FLAG_RST))
9fb9cbb1
YK
574 && (ack == 0)) {
575 /*
576 * Broken TCP stacks, that set ACK in RST packets as well
577 * with zero ack value.
578 */
579 ack = sack = receiver->td_end;
580 }
581
582 if (seq == end
583 && (!tcph->rst
584 || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)))
585 /*
586 * Packets contains no data: we assume it is valid
587 * and check the ack value only.
588 * However RST segments are always validated by their
589 * SEQ number, except when seq == 0 (reset sent answering
590 * SYN.
591 */
592 seq = end = sender->td_end;
593
0d53778e
PM
594 pr_debug("tcp_in_window: ");
595 NF_CT_DUMP_TUPLE(tuple);
596 pr_debug("seq=%u ack=%u sack =%u win=%u end=%u\n",
597 seq, ack, sack, win, end);
598 pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
599 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
600 sender->td_end, sender->td_maxend, sender->td_maxwin,
601 sender->td_scale,
602 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
603 receiver->td_scale);
604
605 pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
606 before(seq, sender->td_maxend + 1),
607 after(end, sender->td_end - receiver->td_maxwin - 1),
608 before(sack, receiver->td_end + 1),
609 after(ack, receiver->td_end - MAXACKWINDOW(sender)));
9fb9cbb1 610
a09113c2
PM
611 if (before(seq, sender->td_maxend + 1) &&
612 after(end, sender->td_end - receiver->td_maxwin - 1) &&
613 before(sack, receiver->td_end + 1) &&
614 after(ack, receiver->td_end - MAXACKWINDOW(sender))) {
601e68e1 615 /*
9fb9cbb1
YK
616 * Take into account window scaling (RFC 1323).
617 */
618 if (!tcph->syn)
619 win <<= sender->td_scale;
620
621 /*
622 * Update sender data.
623 */
624 swin = win + (sack - ack);
625 if (sender->td_maxwin < swin)
626 sender->td_maxwin = swin;
627 if (after(end, sender->td_end))
628 sender->td_end = end;
629 /*
630 * Update receiver data.
631 */
632 if (after(end, sender->td_maxend))
633 receiver->td_maxwin += end - sender->td_maxend;
634 if (after(sack + win, receiver->td_maxend - 1)) {
635 receiver->td_maxend = sack + win;
636 if (win == 0)
637 receiver->td_maxend++;
638 }
639
601e68e1 640 /*
9fb9cbb1
YK
641 * Check retransmissions.
642 */
643 if (index == TCP_ACK_SET) {
644 if (state->last_dir == dir
645 && state->last_seq == seq
646 && state->last_ack == ack
c1fe3ca5
GH
647 && state->last_end == end
648 && state->last_win == win)
9fb9cbb1
YK
649 state->retrans++;
650 else {
651 state->last_dir = dir;
652 state->last_seq = seq;
653 state->last_ack = ack;
654 state->last_end = end;
c1fe3ca5 655 state->last_win = win;
9fb9cbb1
YK
656 state->retrans = 0;
657 }
658 }
09f263cd 659 res = true;
9fb9cbb1 660 } else {
09f263cd 661 res = false;
a09113c2
PM
662 if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
663 nf_ct_tcp_be_liberal)
09f263cd 664 res = true;
a09113c2 665 if (!res && LOG_INVALID(IPPROTO_TCP))
9fb9cbb1
YK
666 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
667 "nf_ct_tcp: %s ",
668 before(seq, sender->td_maxend + 1) ?
669 after(end, sender->td_end - receiver->td_maxwin - 1) ?
670 before(sack, receiver->td_end + 1) ?
671 after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG"
672 : "ACK is under the lower bound (possible overly delayed ACK)"
673 : "ACK is over the upper bound (ACKed data not seen yet)"
674 : "SEQ is under the lower bound (already ACKed data retransmitted)"
675 : "SEQ is over the upper bound (over the window of the receiver)");
601e68e1
YH
676 }
677
09f263cd 678 pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u "
0d53778e
PM
679 "receiver end=%u maxend=%u maxwin=%u\n",
680 res, sender->td_end, sender->td_maxend, sender->td_maxwin,
681 receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
9fb9cbb1
YK
682
683 return res;
684}
685
5b1158e9 686#ifdef CONFIG_NF_NAT_NEEDED
9fb9cbb1
YK
687/* Update sender->td_end after NAT successfully mangled the packet */
688/* Caller must linearize skb at tcp header. */
82f568fc 689void nf_conntrack_tcp_update(const struct sk_buff *skb,
9fb9cbb1 690 unsigned int dataoff,
c88130bc 691 struct nf_conn *ct,
9fb9cbb1
YK
692 int dir)
693{
82f568fc
JE
694 const struct tcphdr *tcph = (const void *)skb->data + dataoff;
695 const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[dir];
696 const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[!dir];
0d53778e 697 __u32 end;
9fb9cbb1
YK
698
699 end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, dataoff, tcph);
700
701 write_lock_bh(&tcp_lock);
702 /*
703 * We have to worry for the ack in the reply packet only...
704 */
c88130bc
PM
705 if (after(end, ct->proto.tcp.seen[dir].td_end))
706 ct->proto.tcp.seen[dir].td_end = end;
707 ct->proto.tcp.last_end = end;
9fb9cbb1 708 write_unlock_bh(&tcp_lock);
0d53778e
PM
709 pr_debug("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i "
710 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
711 sender->td_end, sender->td_maxend, sender->td_maxwin,
712 sender->td_scale,
713 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
714 receiver->td_scale);
9fb9cbb1 715}
13b18339 716EXPORT_SYMBOL_GPL(nf_conntrack_tcp_update);
9fb9cbb1
YK
717#endif
718
719#define TH_FIN 0x01
720#define TH_SYN 0x02
721#define TH_RST 0x04
722#define TH_PUSH 0x08
723#define TH_ACK 0x10
724#define TH_URG 0x20
725#define TH_ECE 0x40
726#define TH_CWR 0x80
727
5c8ce7c9 728/* table of valid flag combinations - PUSH, ECE and CWR are always valid */
82f568fc 729static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG) + 1] =
9fb9cbb1
YK
730{
731 [TH_SYN] = 1,
d3ab4298 732 [TH_SYN|TH_URG] = 1,
d3ab4298 733 [TH_SYN|TH_ACK] = 1,
9fb9cbb1
YK
734 [TH_RST] = 1,
735 [TH_RST|TH_ACK] = 1,
9fb9cbb1 736 [TH_FIN|TH_ACK] = 1,
5c8ce7c9 737 [TH_FIN|TH_ACK|TH_URG] = 1,
9fb9cbb1 738 [TH_ACK] = 1,
9fb9cbb1 739 [TH_ACK|TH_URG] = 1,
9fb9cbb1
YK
740};
741
742/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */
743static int tcp_error(struct sk_buff *skb,
744 unsigned int dataoff,
745 enum ip_conntrack_info *ctinfo,
746 int pf,
96f6bf82 747 unsigned int hooknum)
9fb9cbb1 748{
82f568fc
JE
749 const struct tcphdr *th;
750 struct tcphdr _tcph;
9fb9cbb1
YK
751 unsigned int tcplen = skb->len - dataoff;
752 u_int8_t tcpflags;
753
754 /* Smaller that minimal TCP header? */
755 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
756 if (th == NULL) {
757 if (LOG_INVALID(IPPROTO_TCP))
758 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
759 "nf_ct_tcp: short packet ");
760 return -NF_ACCEPT;
601e68e1
YH
761 }
762
9fb9cbb1
YK
763 /* Not whole TCP header or malformed packet */
764 if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
765 if (LOG_INVALID(IPPROTO_TCP))
766 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
767 "nf_ct_tcp: truncated/malformed packet ");
768 return -NF_ACCEPT;
769 }
601e68e1 770
9fb9cbb1
YK
771 /* Checksum invalid? Ignore.
772 * We skip checking packets on the outgoing path
84fa7933 773 * because the checksum is assumed to be correct.
9fb9cbb1
YK
774 */
775 /* FIXME: Source route IP option packets --RR */
6e23ae2a 776 if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING &&
96f6bf82 777 nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
9fb9cbb1
YK
778 if (LOG_INVALID(IPPROTO_TCP))
779 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
780 "nf_ct_tcp: bad TCP checksum ");
781 return -NF_ACCEPT;
782 }
783
784 /* Check TCP flags. */
5c8ce7c9 785 tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR|TH_PUSH));
9fb9cbb1
YK
786 if (!tcp_valid_flags[tcpflags]) {
787 if (LOG_INVALID(IPPROTO_TCP))
788 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
789 "nf_ct_tcp: invalid TCP flag combination ");
790 return -NF_ACCEPT;
791 }
792
793 return NF_ACCEPT;
794}
795
9fb9cbb1 796/* Returns verdict for packet, or -1 for invalid. */
c88130bc 797static int tcp_packet(struct nf_conn *ct,
9fb9cbb1
YK
798 const struct sk_buff *skb,
799 unsigned int dataoff,
800 enum ip_conntrack_info ctinfo,
801 int pf,
802 unsigned int hooknum)
803{
0d53778e 804 struct nf_conntrack_tuple *tuple;
9fb9cbb1
YK
805 enum tcp_conntrack new_state, old_state;
806 enum ip_conntrack_dir dir;
82f568fc
JE
807 const struct tcphdr *th;
808 struct tcphdr _tcph;
9fb9cbb1
YK
809 unsigned long timeout;
810 unsigned int index;
811
812 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
813 BUG_ON(th == NULL);
814
815 write_lock_bh(&tcp_lock);
c88130bc 816 old_state = ct->proto.tcp.state;
9fb9cbb1
YK
817 dir = CTINFO2DIR(ctinfo);
818 index = get_conntrack_index(th);
819 new_state = tcp_conntracks[dir][index][old_state];
c88130bc 820 tuple = &ct->tuplehash[dir].tuple;
9fb9cbb1
YK
821
822 switch (new_state) {
17311393
JK
823 case TCP_CONNTRACK_SYN_SENT:
824 if (old_state < TCP_CONNTRACK_TIME_WAIT)
825 break;
b2155e7f
JK
826 /* RFC 1122: "When a connection is closed actively,
827 * it MUST linger in TIME-WAIT state for a time 2xMSL
828 * (Maximum Segment Lifetime). However, it MAY accept
829 * a new SYN from the remote TCP to reopen the connection
830 * directly from TIME-WAIT state, if..."
831 * We ignore the conditions because we are in the
832 * TIME-WAIT state anyway.
833 *
834 * Handle aborted connections: we and the server
835 * think there is an existing connection but the client
836 * aborts it and starts a new one.
837 */
838 if (((ct->proto.tcp.seen[dir].flags
839 | ct->proto.tcp.seen[!dir].flags)
840 & IP_CT_TCP_FLAG_CLOSE_INIT)
c88130bc
PM
841 || (ct->proto.tcp.last_dir == dir
842 && ct->proto.tcp.last_index == TCP_RST_SET)) {
bc34b841
JK
843 /* Attempt to reopen a closed/aborted connection.
844 * Delete this connection and look up again. */
17311393 845 write_unlock_bh(&tcp_lock);
c88130bc
PM
846 if (del_timer(&ct->timeout))
847 ct->timeout.function((unsigned long)ct);
17311393
JK
848 return -NF_REPEAT;
849 }
850 /* Fall through */
9fb9cbb1 851 case TCP_CONNTRACK_IGNORE:
73f30602 852 /* Ignored packets:
b2155e7f
JK
853 *
854 * Our connection entry may be out of sync, so ignore
855 * packets which may signal the real connection between
856 * the client and the server.
73f30602
JK
857 *
858 * a) SYN in ORIGINAL
859 * b) SYN/ACK in REPLY
601e68e1 860 * c) ACK in reply direction after initial SYN in original.
b2155e7f
JK
861 *
862 * If the ignored packet is invalid, the receiver will send
863 * a RST we'll catch below.
73f30602 864 */
9fb9cbb1 865 if (index == TCP_SYNACK_SET
c88130bc
PM
866 && ct->proto.tcp.last_index == TCP_SYN_SET
867 && ct->proto.tcp.last_dir != dir
868 && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
b2155e7f 869 /* b) This SYN/ACK acknowledges a SYN that we earlier
9fb9cbb1
YK
870 * ignored as invalid. This means that the client and
871 * the server are both in sync, while the firewall is
872 * not. We kill this session and block the SYN/ACK so
601e68e1 873 * that the client cannot but retransmit its SYN and
9fb9cbb1
YK
874 * thus initiate a clean new session.
875 */
601e68e1 876 write_unlock_bh(&tcp_lock);
9fb9cbb1
YK
877 if (LOG_INVALID(IPPROTO_TCP))
878 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
879 "nf_ct_tcp: killing out of sync session ");
c88130bc
PM
880 if (del_timer(&ct->timeout))
881 ct->timeout.function((unsigned long)ct);
601e68e1 882 return -NF_DROP;
9fb9cbb1 883 }
c88130bc
PM
884 ct->proto.tcp.last_index = index;
885 ct->proto.tcp.last_dir = dir;
886 ct->proto.tcp.last_seq = ntohl(th->seq);
887 ct->proto.tcp.last_end =
9fb9cbb1
YK
888 segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
889
890 write_unlock_bh(&tcp_lock);
891 if (LOG_INVALID(IPPROTO_TCP))
892 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
b2155e7f 893 "nf_ct_tcp: invalid packet ignored ");
9fb9cbb1
YK
894 return NF_ACCEPT;
895 case TCP_CONNTRACK_MAX:
896 /* Invalid packet */
0d53778e
PM
897 pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
898 dir, get_conntrack_index(th), old_state);
9fb9cbb1
YK
899 write_unlock_bh(&tcp_lock);
900 if (LOG_INVALID(IPPROTO_TCP))
901 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
902 "nf_ct_tcp: invalid state ");
903 return -NF_ACCEPT;
9fb9cbb1
YK
904 case TCP_CONNTRACK_CLOSE:
905 if (index == TCP_RST_SET
c88130bc
PM
906 && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
907 && ct->proto.tcp.last_index == TCP_SYN_SET)
908 || (!test_bit(IPS_ASSURED_BIT, &ct->status)
909 && ct->proto.tcp.last_index == TCP_ACK_SET))
910 && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
93b1fae4 911 /* RST sent to invalid SYN or ACK we had let through
73f30602
JK
912 * at a) and c) above:
913 *
914 * a) SYN was in window then
915 * c) we hold a half-open connection.
916 *
917 * Delete our connection entry.
9fb9cbb1 918 * We skip window checking, because packet might ACK
73f30602 919 * segments we ignored. */
9fb9cbb1
YK
920 goto in_window;
921 }
93b1fae4 922 /* Just fall through */
9fb9cbb1
YK
923 default:
924 /* Keep compilers happy. */
925 break;
926 }
927
c88130bc 928 if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
9fb9cbb1
YK
929 skb, dataoff, th, pf)) {
930 write_unlock_bh(&tcp_lock);
931 return -NF_ACCEPT;
932 }
933 in_window:
934 /* From now on we have got in-window packets */
c88130bc
PM
935 ct->proto.tcp.last_index = index;
936 ct->proto.tcp.last_dir = dir;
9fb9cbb1 937
0d53778e
PM
938 pr_debug("tcp_conntracks: ");
939 NF_CT_DUMP_TUPLE(tuple);
940 pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
941 (th->syn ? 1 : 0), (th->ack ? 1 : 0),
942 (th->fin ? 1 : 0), (th->rst ? 1 : 0),
943 old_state, new_state);
9fb9cbb1 944
c88130bc 945 ct->proto.tcp.state = new_state;
9fb9cbb1 946 if (old_state != new_state
d0c1fd7a 947 && new_state == TCP_CONNTRACK_FIN_WAIT)
c88130bc
PM
948 ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
949 timeout = ct->proto.tcp.retrans >= nf_ct_tcp_max_retrans
2d646286
PM
950 && tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans
951 ? nf_ct_tcp_timeout_max_retrans : tcp_timeouts[new_state];
9fb9cbb1
YK
952 write_unlock_bh(&tcp_lock);
953
954 nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
955 if (new_state != old_state)
956 nf_conntrack_event_cache(IPCT_PROTOINFO, skb);
957
c88130bc 958 if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
9fb9cbb1
YK
959 /* If only reply is a RST, we can consider ourselves not to
960 have an established connection: this is a fairly common
961 problem case, so we can delete the conntrack
962 immediately. --RR */
963 if (th->rst) {
c88130bc
PM
964 if (del_timer(&ct->timeout))
965 ct->timeout.function((unsigned long)ct);
9fb9cbb1
YK
966 return NF_ACCEPT;
967 }
c88130bc 968 } else if (!test_bit(IPS_ASSURED_BIT, &ct->status)
9fb9cbb1
YK
969 && (old_state == TCP_CONNTRACK_SYN_RECV
970 || old_state == TCP_CONNTRACK_ESTABLISHED)
971 && new_state == TCP_CONNTRACK_ESTABLISHED) {
601e68e1
YH
972 /* Set ASSURED if we see see valid ack in ESTABLISHED
973 after SYN_RECV or a valid answer for a picked up
9fb9cbb1 974 connection. */
c88130bc 975 set_bit(IPS_ASSURED_BIT, &ct->status);
9fb9cbb1
YK
976 nf_conntrack_event_cache(IPCT_STATUS, skb);
977 }
c88130bc 978 nf_ct_refresh_acct(ct, ctinfo, skb, timeout);
9fb9cbb1
YK
979
980 return NF_ACCEPT;
981}
601e68e1 982
9fb9cbb1 983/* Called when a new connection for this protocol found. */
09f263cd
JE
984static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
985 unsigned int dataoff)
9fb9cbb1
YK
986{
987 enum tcp_conntrack new_state;
82f568fc
JE
988 const struct tcphdr *th;
989 struct tcphdr _tcph;
990 const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
991 const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
9fb9cbb1
YK
992
993 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
994 BUG_ON(th == NULL);
995
996 /* Don't need lock here: this conntrack not in circulation yet */
997 new_state
998 = tcp_conntracks[0][get_conntrack_index(th)]
999 [TCP_CONNTRACK_NONE];
1000
1001 /* Invalid: delete conntrack */
1002 if (new_state >= TCP_CONNTRACK_MAX) {
0d53778e 1003 pr_debug("nf_ct_tcp: invalid new deleting.\n");
09f263cd 1004 return false;
9fb9cbb1
YK
1005 }
1006
1007 if (new_state == TCP_CONNTRACK_SYN_SENT) {
1008 /* SYN packet */
c88130bc 1009 ct->proto.tcp.seen[0].td_end =
9fb9cbb1
YK
1010 segment_seq_plus_len(ntohl(th->seq), skb->len,
1011 dataoff, th);
c88130bc
PM
1012 ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1013 if (ct->proto.tcp.seen[0].td_maxwin == 0)
1014 ct->proto.tcp.seen[0].td_maxwin = 1;
1015 ct->proto.tcp.seen[0].td_maxend =
1016 ct->proto.tcp.seen[0].td_end;
1017
1018 tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
1019 ct->proto.tcp.seen[1].flags = 0;
9fb9cbb1
YK
1020 } else if (nf_ct_tcp_loose == 0) {
1021 /* Don't try to pick up connections. */
09f263cd 1022 return false;
9fb9cbb1
YK
1023 } else {
1024 /*
1025 * We are in the middle of a connection,
1026 * its history is lost for us.
1027 * Let's try to use the data from the packet.
1028 */
c88130bc 1029 ct->proto.tcp.seen[0].td_end =
9fb9cbb1
YK
1030 segment_seq_plus_len(ntohl(th->seq), skb->len,
1031 dataoff, th);
c88130bc
PM
1032 ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1033 if (ct->proto.tcp.seen[0].td_maxwin == 0)
1034 ct->proto.tcp.seen[0].td_maxwin = 1;
1035 ct->proto.tcp.seen[0].td_maxend =
1036 ct->proto.tcp.seen[0].td_end +
1037 ct->proto.tcp.seen[0].td_maxwin;
1038 ct->proto.tcp.seen[0].td_scale = 0;
9fb9cbb1 1039
a09113c2
PM
1040 /* We assume SACK and liberal window checking to handle
1041 * window scaling */
c88130bc
PM
1042 ct->proto.tcp.seen[0].flags =
1043 ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
1044 IP_CT_TCP_FLAG_BE_LIBERAL;
9fb9cbb1 1045 }
601e68e1 1046
c88130bc
PM
1047 ct->proto.tcp.seen[1].td_end = 0;
1048 ct->proto.tcp.seen[1].td_maxend = 0;
1049 ct->proto.tcp.seen[1].td_maxwin = 1;
1050 ct->proto.tcp.seen[1].td_scale = 0;
9fb9cbb1
YK
1051
1052 /* tcp_packet will set them */
c88130bc
PM
1053 ct->proto.tcp.state = TCP_CONNTRACK_NONE;
1054 ct->proto.tcp.last_index = TCP_NONE_SET;
601e68e1 1055
0d53778e
PM
1056 pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
1057 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
1058 sender->td_end, sender->td_maxend, sender->td_maxwin,
1059 sender->td_scale,
1060 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
1061 receiver->td_scale);
09f263cd 1062 return true;
9fb9cbb1 1063}
c1d10adb 1064
e281db5c 1065#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
c1d10adb
PNA
1066
1067#include <linux/netfilter/nfnetlink.h>
1068#include <linux/netfilter/nfnetlink_conntrack.h>
1069
fdf70832 1070static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
c1d10adb
PNA
1071 const struct nf_conn *ct)
1072{
df6fb868 1073 struct nlattr *nest_parms;
c8e2078c 1074 struct nf_ct_tcp_flags tmp = {};
601e68e1 1075
c1d10adb 1076 read_lock_bh(&tcp_lock);
df6fb868
PM
1077 nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED);
1078 if (!nest_parms)
1079 goto nla_put_failure;
1080
77236b6e 1081 NLA_PUT_U8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state);
c8e2078c 1082
77236b6e
PM
1083 NLA_PUT_U8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
1084 ct->proto.tcp.seen[0].td_scale);
c8e2078c 1085
77236b6e
PM
1086 NLA_PUT_U8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY,
1087 ct->proto.tcp.seen[1].td_scale);
c8e2078c
PNA
1088
1089 tmp.flags = ct->proto.tcp.seen[0].flags;
df6fb868 1090 NLA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
c8e2078c
PNA
1091 sizeof(struct nf_ct_tcp_flags), &tmp);
1092
1093 tmp.flags = ct->proto.tcp.seen[1].flags;
df6fb868 1094 NLA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
c8e2078c 1095 sizeof(struct nf_ct_tcp_flags), &tmp);
c1d10adb
PNA
1096 read_unlock_bh(&tcp_lock);
1097
df6fb868 1098 nla_nest_end(skb, nest_parms);
c1d10adb
PNA
1099
1100 return 0;
1101
df6fb868 1102nla_put_failure:
c1d10adb
PNA
1103 read_unlock_bh(&tcp_lock);
1104 return -1;
1105}
1106
f73e924c
PM
1107static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
1108 [CTA_PROTOINFO_TCP_STATE] = { .type = NLA_U8 },
1109 [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 },
1110 [CTA_PROTOINFO_TCP_WSCALE_REPLY] = { .type = NLA_U8 },
1111 [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL] = { .len = sizeof(struct nf_ct_tcp_flags) },
1112 [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .len = sizeof(struct nf_ct_tcp_flags) },
c1d10adb
PNA
1113};
1114
fdf70832 1115static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
c1d10adb 1116{
2f0d2f10 1117 struct nlattr *pattr = cda[CTA_PROTOINFO_TCP];
df6fb868 1118 struct nlattr *tb[CTA_PROTOINFO_TCP_MAX+1];
f73e924c 1119 int err;
c1d10adb
PNA
1120
1121 /* updates could not contain anything about the private
1122 * protocol info, in that case skip the parsing */
2f0d2f10 1123 if (!pattr)
c1d10adb
PNA
1124 return 0;
1125
2f0d2f10 1126 err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr, tcp_nla_policy);
f73e924c
PM
1127 if (err < 0)
1128 return err;
c1d10adb 1129
5f7da4d2
PM
1130 if (tb[CTA_PROTOINFO_TCP_STATE] &&
1131 nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
c1d10adb
PNA
1132 return -EINVAL;
1133
1134 write_lock_bh(&tcp_lock);
5f7da4d2
PM
1135 if (tb[CTA_PROTOINFO_TCP_STATE])
1136 ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
c8e2078c 1137
df6fb868 1138 if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) {
c8e2078c 1139 struct nf_ct_tcp_flags *attr =
df6fb868 1140 nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]);
c8e2078c
PNA
1141 ct->proto.tcp.seen[0].flags &= ~attr->mask;
1142 ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask;
1143 }
1144
df6fb868 1145 if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]) {
c8e2078c 1146 struct nf_ct_tcp_flags *attr =
df6fb868 1147 nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]);
c8e2078c
PNA
1148 ct->proto.tcp.seen[1].flags &= ~attr->mask;
1149 ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask;
1150 }
1151
df6fb868
PM
1152 if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] &&
1153 tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] &&
c8e2078c
PNA
1154 ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
1155 ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
77236b6e
PM
1156 ct->proto.tcp.seen[0].td_scale =
1157 nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]);
1158 ct->proto.tcp.seen[1].td_scale =
1159 nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
c8e2078c 1160 }
c1d10adb
PNA
1161 write_unlock_bh(&tcp_lock);
1162
1163 return 0;
1164}
1165#endif
933a41e7
PM
1166
1167#ifdef CONFIG_SYSCTL
1168static unsigned int tcp_sysctl_table_users;
1169static struct ctl_table_header *tcp_sysctl_header;
1170static struct ctl_table tcp_sysctl_table[] = {
1171 {
933a41e7 1172 .procname = "nf_conntrack_tcp_timeout_syn_sent",
2d646286 1173 .data = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT],
933a41e7
PM
1174 .maxlen = sizeof(unsigned int),
1175 .mode = 0644,
1176 .proc_handler = &proc_dointvec_jiffies,
1177 },
1178 {
933a41e7 1179 .procname = "nf_conntrack_tcp_timeout_syn_recv",
2d646286 1180 .data = &tcp_timeouts[TCP_CONNTRACK_SYN_RECV],
933a41e7
PM
1181 .maxlen = sizeof(unsigned int),
1182 .mode = 0644,
1183 .proc_handler = &proc_dointvec_jiffies,
1184 },
1185 {
933a41e7 1186 .procname = "nf_conntrack_tcp_timeout_established",
2d646286 1187 .data = &tcp_timeouts[TCP_CONNTRACK_ESTABLISHED],
933a41e7
PM
1188 .maxlen = sizeof(unsigned int),
1189 .mode = 0644,
1190 .proc_handler = &proc_dointvec_jiffies,
1191 },
1192 {
933a41e7 1193 .procname = "nf_conntrack_tcp_timeout_fin_wait",
2d646286 1194 .data = &tcp_timeouts[TCP_CONNTRACK_FIN_WAIT],
933a41e7
PM
1195 .maxlen = sizeof(unsigned int),
1196 .mode = 0644,
1197 .proc_handler = &proc_dointvec_jiffies,
1198 },
1199 {
933a41e7 1200 .procname = "nf_conntrack_tcp_timeout_close_wait",
2d646286 1201 .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE_WAIT],
933a41e7
PM
1202 .maxlen = sizeof(unsigned int),
1203 .mode = 0644,
1204 .proc_handler = &proc_dointvec_jiffies,
1205 },
1206 {
933a41e7 1207 .procname = "nf_conntrack_tcp_timeout_last_ack",
2d646286 1208 .data = &tcp_timeouts[TCP_CONNTRACK_LAST_ACK],
933a41e7
PM
1209 .maxlen = sizeof(unsigned int),
1210 .mode = 0644,
1211 .proc_handler = &proc_dointvec_jiffies,
1212 },
1213 {
933a41e7 1214 .procname = "nf_conntrack_tcp_timeout_time_wait",
2d646286 1215 .data = &tcp_timeouts[TCP_CONNTRACK_TIME_WAIT],
933a41e7
PM
1216 .maxlen = sizeof(unsigned int),
1217 .mode = 0644,
1218 .proc_handler = &proc_dointvec_jiffies,
1219 },
1220 {
933a41e7 1221 .procname = "nf_conntrack_tcp_timeout_close",
2d646286 1222 .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE],
933a41e7
PM
1223 .maxlen = sizeof(unsigned int),
1224 .mode = 0644,
1225 .proc_handler = &proc_dointvec_jiffies,
1226 },
1227 {
933a41e7
PM
1228 .procname = "nf_conntrack_tcp_timeout_max_retrans",
1229 .data = &nf_ct_tcp_timeout_max_retrans,
1230 .maxlen = sizeof(unsigned int),
1231 .mode = 0644,
1232 .proc_handler = &proc_dointvec_jiffies,
1233 },
1234 {
1235 .ctl_name = NET_NF_CONNTRACK_TCP_LOOSE,
1236 .procname = "nf_conntrack_tcp_loose",
1237 .data = &nf_ct_tcp_loose,
1238 .maxlen = sizeof(unsigned int),
1239 .mode = 0644,
1240 .proc_handler = &proc_dointvec,
1241 },
1242 {
1243 .ctl_name = NET_NF_CONNTRACK_TCP_BE_LIBERAL,
1244 .procname = "nf_conntrack_tcp_be_liberal",
1245 .data = &nf_ct_tcp_be_liberal,
1246 .maxlen = sizeof(unsigned int),
1247 .mode = 0644,
1248 .proc_handler = &proc_dointvec,
1249 },
1250 {
1251 .ctl_name = NET_NF_CONNTRACK_TCP_MAX_RETRANS,
1252 .procname = "nf_conntrack_tcp_max_retrans",
1253 .data = &nf_ct_tcp_max_retrans,
1254 .maxlen = sizeof(unsigned int),
1255 .mode = 0644,
1256 .proc_handler = &proc_dointvec,
1257 },
1258 {
1259 .ctl_name = 0
1260 }
1261};
a999e683
PM
1262
1263#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
1264static struct ctl_table tcp_compat_sysctl_table[] = {
1265 {
a999e683 1266 .procname = "ip_conntrack_tcp_timeout_syn_sent",
2d646286 1267 .data = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT],
a999e683
PM
1268 .maxlen = sizeof(unsigned int),
1269 .mode = 0644,
1270 .proc_handler = &proc_dointvec_jiffies,
1271 },
1272 {
a999e683 1273 .procname = "ip_conntrack_tcp_timeout_syn_recv",
2d646286 1274 .data = &tcp_timeouts[TCP_CONNTRACK_SYN_RECV],
a999e683
PM
1275 .maxlen = sizeof(unsigned int),
1276 .mode = 0644,
1277 .proc_handler = &proc_dointvec_jiffies,
1278 },
1279 {
a999e683 1280 .procname = "ip_conntrack_tcp_timeout_established",
2d646286 1281 .data = &tcp_timeouts[TCP_CONNTRACK_ESTABLISHED],
a999e683
PM
1282 .maxlen = sizeof(unsigned int),
1283 .mode = 0644,
1284 .proc_handler = &proc_dointvec_jiffies,
1285 },
1286 {
a999e683 1287 .procname = "ip_conntrack_tcp_timeout_fin_wait",
2d646286 1288 .data = &tcp_timeouts[TCP_CONNTRACK_FIN_WAIT],
a999e683
PM
1289 .maxlen = sizeof(unsigned int),
1290 .mode = 0644,
1291 .proc_handler = &proc_dointvec_jiffies,
1292 },
1293 {
a999e683 1294 .procname = "ip_conntrack_tcp_timeout_close_wait",
2d646286 1295 .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE_WAIT],
a999e683
PM
1296 .maxlen = sizeof(unsigned int),
1297 .mode = 0644,
1298 .proc_handler = &proc_dointvec_jiffies,
1299 },
1300 {
a999e683 1301 .procname = "ip_conntrack_tcp_timeout_last_ack",
2d646286 1302 .data = &tcp_timeouts[TCP_CONNTRACK_LAST_ACK],
a999e683
PM
1303 .maxlen = sizeof(unsigned int),
1304 .mode = 0644,
1305 .proc_handler = &proc_dointvec_jiffies,
1306 },
1307 {
a999e683 1308 .procname = "ip_conntrack_tcp_timeout_time_wait",
2d646286 1309 .data = &tcp_timeouts[TCP_CONNTRACK_TIME_WAIT],
a999e683
PM
1310 .maxlen = sizeof(unsigned int),
1311 .mode = 0644,
1312 .proc_handler = &proc_dointvec_jiffies,
1313 },
1314 {
a999e683 1315 .procname = "ip_conntrack_tcp_timeout_close",
2d646286 1316 .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE],
a999e683
PM
1317 .maxlen = sizeof(unsigned int),
1318 .mode = 0644,
1319 .proc_handler = &proc_dointvec_jiffies,
1320 },
1321 {
a999e683
PM
1322 .procname = "ip_conntrack_tcp_timeout_max_retrans",
1323 .data = &nf_ct_tcp_timeout_max_retrans,
1324 .maxlen = sizeof(unsigned int),
1325 .mode = 0644,
1326 .proc_handler = &proc_dointvec_jiffies,
1327 },
1328 {
1329 .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_LOOSE,
1330 .procname = "ip_conntrack_tcp_loose",
1331 .data = &nf_ct_tcp_loose,
1332 .maxlen = sizeof(unsigned int),
1333 .mode = 0644,
1334 .proc_handler = &proc_dointvec,
1335 },
1336 {
1337 .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL,
1338 .procname = "ip_conntrack_tcp_be_liberal",
1339 .data = &nf_ct_tcp_be_liberal,
1340 .maxlen = sizeof(unsigned int),
1341 .mode = 0644,
1342 .proc_handler = &proc_dointvec,
1343 },
1344 {
1345 .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS,
1346 .procname = "ip_conntrack_tcp_max_retrans",
1347 .data = &nf_ct_tcp_max_retrans,
1348 .maxlen = sizeof(unsigned int),
1349 .mode = 0644,
1350 .proc_handler = &proc_dointvec,
1351 },
1352 {
1353 .ctl_name = 0
1354 }
1355};
1356#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
933a41e7
PM
1357#endif /* CONFIG_SYSCTL */
1358
61075af5 1359struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
9fb9cbb1
YK
1360{
1361 .l3proto = PF_INET,
605dcad6 1362 .l4proto = IPPROTO_TCP,
9fb9cbb1
YK
1363 .name = "tcp",
1364 .pkt_to_tuple = tcp_pkt_to_tuple,
1365 .invert_tuple = tcp_invert_tuple,
1366 .print_tuple = tcp_print_tuple,
1367 .print_conntrack = tcp_print_conntrack,
1368 .packet = tcp_packet,
1369 .new = tcp_new,
96f6bf82 1370 .error = tcp_error,
e281db5c 1371#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
fdf70832
PM
1372 .to_nlattr = tcp_to_nlattr,
1373 .from_nlattr = nlattr_to_tcp,
1374 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
1375 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
f73e924c 1376 .nla_policy = nf_ct_port_nla_policy,
c1d10adb 1377#endif
933a41e7
PM
1378#ifdef CONFIG_SYSCTL
1379 .ctl_table_users = &tcp_sysctl_table_users,
1380 .ctl_table_header = &tcp_sysctl_header,
1381 .ctl_table = tcp_sysctl_table,
a999e683
PM
1382#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
1383 .ctl_compat_table = tcp_compat_sysctl_table,
1384#endif
933a41e7 1385#endif
9fb9cbb1 1386};
13b18339 1387EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4);
9fb9cbb1 1388
61075af5 1389struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
9fb9cbb1
YK
1390{
1391 .l3proto = PF_INET6,
605dcad6 1392 .l4proto = IPPROTO_TCP,
9fb9cbb1
YK
1393 .name = "tcp",
1394 .pkt_to_tuple = tcp_pkt_to_tuple,
1395 .invert_tuple = tcp_invert_tuple,
1396 .print_tuple = tcp_print_tuple,
1397 .print_conntrack = tcp_print_conntrack,
1398 .packet = tcp_packet,
1399 .new = tcp_new,
96f6bf82 1400 .error = tcp_error,
e281db5c 1401#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
fdf70832
PM
1402 .to_nlattr = tcp_to_nlattr,
1403 .from_nlattr = nlattr_to_tcp,
1404 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
1405 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
f73e924c 1406 .nla_policy = nf_ct_port_nla_policy,
c1d10adb 1407#endif
933a41e7
PM
1408#ifdef CONFIG_SYSCTL
1409 .ctl_table_users = &tcp_sysctl_table_users,
1410 .ctl_table_header = &tcp_sysctl_header,
1411 .ctl_table = tcp_sysctl_table,
1412#endif
9fb9cbb1 1413};
13b18339 1414EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6);
This page took 0.391435 seconds and 5 git commands to generate.