amd-iommu: remove BUS_NOTIFY_BOUND_DRIVER handling
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / net / netfilter / nf_conntrack_proto_tcp.c
blob97a6e93d742e21281c13bc0d86355303ed3703e9
1 /* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
9 #include <linux/types.h>
10 #include <linux/timer.h>
11 #include <linux/module.h>
12 #include <linux/in.h>
13 #include <linux/tcp.h>
14 #include <linux/spinlock.h>
15 #include <linux/skbuff.h>
16 #include <linux/ipv6.h>
17 #include <net/ip6_checksum.h>
18 #include <asm/unaligned.h>
20 #include <net/tcp.h>
22 #include <linux/netfilter.h>
23 #include <linux/netfilter_ipv4.h>
24 #include <linux/netfilter_ipv6.h>
25 #include <net/netfilter/nf_conntrack.h>
26 #include <net/netfilter/nf_conntrack_l4proto.h>
27 #include <net/netfilter/nf_conntrack_ecache.h>
28 #include <net/netfilter/nf_log.h>
29 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
30 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
32 /* Protects ct->proto.tcp */
33 static DEFINE_RWLOCK(tcp_lock);
35 /* "Be conservative in what you do,
36 be liberal in what you accept from others."
37 If it's non-zero, we mark only out of window RST segments as INVALID. */
38 static int nf_ct_tcp_be_liberal __read_mostly = 0;
40 /* If it is set to zero, we disable picking up already established
41 connections. */
42 static int nf_ct_tcp_loose __read_mostly = 1;
44 /* Max number of the retransmitted packets without receiving an (acceptable)
45 ACK from the destination. If this number is reached, a shorter timer
46 will be started. */
47 static int nf_ct_tcp_max_retrans __read_mostly = 3;
49 /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
50 closely. They're more complex. --RR */
52 static const char *const tcp_conntrack_names[] = {
53 "NONE",
54 "SYN_SENT",
55 "SYN_RECV",
56 "ESTABLISHED",
57 "FIN_WAIT",
58 "CLOSE_WAIT",
59 "LAST_ACK",
60 "TIME_WAIT",
61 "CLOSE",
62 "LISTEN"
65 #define SECS * HZ
66 #define MINS * 60 SECS
67 #define HOURS * 60 MINS
68 #define DAYS * 24 HOURS
70 /* RFC1122 says the R2 limit should be at least 100 seconds.
71 Linux uses 15 packets as limit, which corresponds
72 to ~13-30min depending on RTO. */
73 static unsigned int nf_ct_tcp_timeout_max_retrans __read_mostly = 5 MINS;
74 static unsigned int nf_ct_tcp_timeout_unacknowledged __read_mostly = 5 MINS;
76 static unsigned int tcp_timeouts[TCP_CONNTRACK_MAX] __read_mostly = {
77 [TCP_CONNTRACK_SYN_SENT] = 2 MINS,
78 [TCP_CONNTRACK_SYN_RECV] = 60 SECS,
79 [TCP_CONNTRACK_ESTABLISHED] = 5 DAYS,
80 [TCP_CONNTRACK_FIN_WAIT] = 2 MINS,
81 [TCP_CONNTRACK_CLOSE_WAIT] = 60 SECS,
82 [TCP_CONNTRACK_LAST_ACK] = 30 SECS,
83 [TCP_CONNTRACK_TIME_WAIT] = 2 MINS,
84 [TCP_CONNTRACK_CLOSE] = 10 SECS,
87 #define sNO TCP_CONNTRACK_NONE
88 #define sSS TCP_CONNTRACK_SYN_SENT
89 #define sSR TCP_CONNTRACK_SYN_RECV
90 #define sES TCP_CONNTRACK_ESTABLISHED
91 #define sFW TCP_CONNTRACK_FIN_WAIT
92 #define sCW TCP_CONNTRACK_CLOSE_WAIT
93 #define sLA TCP_CONNTRACK_LAST_ACK
94 #define sTW TCP_CONNTRACK_TIME_WAIT
95 #define sCL TCP_CONNTRACK_CLOSE
96 #define sLI TCP_CONNTRACK_LISTEN
97 #define sIV TCP_CONNTRACK_MAX
98 #define sIG TCP_CONNTRACK_IGNORE
100 /* What TCP flags are set from RST/SYN/FIN/ACK. */
101 enum tcp_bit_set {
102 TCP_SYN_SET,
103 TCP_SYNACK_SET,
104 TCP_FIN_SET,
105 TCP_ACK_SET,
106 TCP_RST_SET,
107 TCP_NONE_SET,
111 * The TCP state transition table needs a few words...
113 * We are the man in the middle. All the packets go through us
114 * but might get lost in transit to the destination.
115 * It is assumed that the destinations can't receive segments
116 * we haven't seen.
118 * The checked segment is in window, but our windows are *not*
119 * equivalent with the ones of the sender/receiver. We always
120 * try to guess the state of the current sender.
122 * The meaning of the states are:
124 * NONE: initial state
125 * SYN_SENT: SYN-only packet seen
126 * SYN_RECV: SYN-ACK packet seen
127 * ESTABLISHED: ACK packet seen
128 * FIN_WAIT: FIN packet seen
129 * CLOSE_WAIT: ACK seen (after FIN)
130 * LAST_ACK: FIN seen (after FIN)
131 * TIME_WAIT: last ACK seen
132 * CLOSE: closed connection (RST)
134 * LISTEN state is not used.
136 * Packets marked as IGNORED (sIG):
137 * if they may be either invalid or valid
138 * and the receiver may send back a connection
139 * closing RST or a SYN/ACK.
141 * Packets marked as INVALID (sIV):
142 * if they are invalid
143 * or we do not support the request (simultaneous open)
145 static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
147 /* ORIGINAL */
148 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
149 /*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sIV },
151 * sNO -> sSS Initialize a new connection
152 * sSS -> sSS Retransmitted SYN
153 * sSR -> sIG Late retransmitted SYN?
154 * sES -> sIG Error: SYNs in window outside the SYN_SENT state
155 * are errors. Receiver will reply with RST
156 * and close the connection.
157 * Or we are not in sync and hold a dead connection.
158 * sFW -> sIG
159 * sCW -> sIG
160 * sLA -> sIG
161 * sTW -> sSS Reopened connection (RFC 1122).
162 * sCL -> sSS
164 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
165 /*synack*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
167 * A SYN/ACK from the client is always invalid:
168 * - either it tries to set up a simultaneous open, which is
169 * not supported;
170 * - or the firewall has just been inserted between the two hosts
171 * during the session set-up. The SYN will be retransmitted
172 * by the true client (or it'll time out).
174 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
175 /*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
177 * sNO -> sIV Too late and no reason to do anything...
178 * sSS -> sIV Client migth not send FIN in this state:
179 * we enforce waiting for a SYN/ACK reply first.
180 * sSR -> sFW Close started.
181 * sES -> sFW
182 * sFW -> sLA FIN seen in both directions, waiting for
183 * the last ACK.
184 * Migth be a retransmitted FIN as well...
185 * sCW -> sLA
186 * sLA -> sLA Retransmitted FIN. Remain in the same state.
187 * sTW -> sTW
188 * sCL -> sCL
190 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
191 /*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
193 * sNO -> sES Assumed.
194 * sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet.
195 * sSR -> sES Established state is reached.
196 * sES -> sES :-)
197 * sFW -> sCW Normal close request answered by ACK.
198 * sCW -> sCW
199 * sLA -> sTW Last ACK detected.
200 * sTW -> sTW Retransmitted last ACK. Remain in the same state.
201 * sCL -> sCL
203 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
204 /*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
205 /*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
208 /* REPLY */
209 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
210 /*syn*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
212 * sNO -> sIV Never reached.
213 * sSS -> sIV Simultaneous open, not supported
214 * sSR -> sIV Simultaneous open, not supported.
215 * sES -> sIV Server may not initiate a connection.
216 * sFW -> sIV
217 * sCW -> sIV
218 * sLA -> sIV
219 * sTW -> sIV Reopened connection, but server may not do it.
220 * sCL -> sIV
222 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
223 /*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIV },
225 * sSS -> sSR Standard open.
226 * sSR -> sSR Retransmitted SYN/ACK.
227 * sES -> sIG Late retransmitted SYN/ACK?
228 * sFW -> sIG Might be SYN/ACK answering ignored SYN
229 * sCW -> sIG
230 * sLA -> sIG
231 * sTW -> sIG
232 * sCL -> sIG
234 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
235 /*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
237 * sSS -> sIV Server might not send FIN in this state.
238 * sSR -> sFW Close started.
239 * sES -> sFW
240 * sFW -> sLA FIN seen in both directions.
241 * sCW -> sLA
242 * sLA -> sLA Retransmitted FIN.
243 * sTW -> sTW
244 * sCL -> sCL
246 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
247 /*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV },
249 * sSS -> sIG Might be a half-open connection.
250 * sSR -> sSR Might answer late resent SYN.
251 * sES -> sES :-)
252 * sFW -> sCW Normal close request answered by ACK.
253 * sCW -> sCW
254 * sLA -> sTW Last ACK detected.
255 * sTW -> sTW Retransmitted last ACK.
256 * sCL -> sCL
258 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
259 /*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
260 /*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
264 static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
265 struct nf_conntrack_tuple *tuple)
267 const struct tcphdr *hp;
268 struct tcphdr _hdr;
270 /* Actually only need first 8 bytes. */
271 hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
272 if (hp == NULL)
273 return false;
275 tuple->src.u.tcp.port = hp->source;
276 tuple->dst.u.tcp.port = hp->dest;
278 return true;
281 static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
282 const struct nf_conntrack_tuple *orig)
284 tuple->src.u.tcp.port = orig->dst.u.tcp.port;
285 tuple->dst.u.tcp.port = orig->src.u.tcp.port;
286 return true;
289 /* Print out the per-protocol part of the tuple. */
290 static int tcp_print_tuple(struct seq_file *s,
291 const struct nf_conntrack_tuple *tuple)
293 return seq_printf(s, "sport=%hu dport=%hu ",
294 ntohs(tuple->src.u.tcp.port),
295 ntohs(tuple->dst.u.tcp.port));
298 /* Print out the private part of the conntrack. */
299 static int tcp_print_conntrack(struct seq_file *s, const struct nf_conn *ct)
301 enum tcp_conntrack state;
303 read_lock_bh(&tcp_lock);
304 state = ct->proto.tcp.state;
305 read_unlock_bh(&tcp_lock);
307 return seq_printf(s, "%s ", tcp_conntrack_names[state]);
310 static unsigned int get_conntrack_index(const struct tcphdr *tcph)
312 if (tcph->rst) return TCP_RST_SET;
313 else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
314 else if (tcph->fin) return TCP_FIN_SET;
315 else if (tcph->ack) return TCP_ACK_SET;
316 else return TCP_NONE_SET;
319 /* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
320 in IP Filter' by Guido van Rooij.
322 http://www.nluug.nl/events/sane2000/papers.html
323 http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz
325 The boundaries and the conditions are changed according to RFC793:
326 the packet must intersect the window (i.e. segments may be
327 after the right or before the left edge) and thus receivers may ACK
328 segments after the right edge of the window.
330 td_maxend = max(sack + max(win,1)) seen in reply packets
331 td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
332 td_maxwin += seq + len - sender.td_maxend
333 if seq + len > sender.td_maxend
334 td_end = max(seq + len) seen in sent packets
336 I. Upper bound for valid data: seq <= sender.td_maxend
337 II. Lower bound for valid data: seq + len >= sender.td_end - receiver.td_maxwin
338 III. Upper bound for valid (s)ack: sack <= receiver.td_end
339 IV. Lower bound for valid (s)ack: sack >= receiver.td_end - MAXACKWINDOW
341 where sack is the highest right edge of sack block found in the packet
342 or ack in the case of packet without SACK option.
344 The upper bound limit for a valid (s)ack is not ignored -
345 we doesn't have to deal with fragments.
348 static inline __u32 segment_seq_plus_len(__u32 seq,
349 size_t len,
350 unsigned int dataoff,
351 const struct tcphdr *tcph)
353 /* XXX Should I use payload length field in IP/IPv6 header ?
354 * - YK */
355 return (seq + len - dataoff - tcph->doff*4
356 + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
359 /* Fixme: what about big packets? */
360 #define MAXACKWINCONST 66000
361 #define MAXACKWINDOW(sender) \
362 ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin \
363 : MAXACKWINCONST)
366 * Simplified tcp_parse_options routine from tcp_input.c
368 static void tcp_options(const struct sk_buff *skb,
369 unsigned int dataoff,
370 const struct tcphdr *tcph,
371 struct ip_ct_tcp_state *state)
373 unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
374 const unsigned char *ptr;
375 int length = (tcph->doff*4) - sizeof(struct tcphdr);
377 if (!length)
378 return;
380 ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
381 length, buff);
382 BUG_ON(ptr == NULL);
384 state->td_scale =
385 state->flags = 0;
387 while (length > 0) {
388 int opcode=*ptr++;
389 int opsize;
391 switch (opcode) {
392 case TCPOPT_EOL:
393 return;
394 case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
395 length--;
396 continue;
397 default:
398 opsize=*ptr++;
399 if (opsize < 2) /* "silly options" */
400 return;
401 if (opsize > length)
402 break; /* don't parse partial options */
404 if (opcode == TCPOPT_SACK_PERM
405 && opsize == TCPOLEN_SACK_PERM)
406 state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
407 else if (opcode == TCPOPT_WINDOW
408 && opsize == TCPOLEN_WINDOW) {
409 state->td_scale = *(u_int8_t *)ptr;
411 if (state->td_scale > 14) {
412 /* See RFC1323 */
413 state->td_scale = 14;
415 state->flags |=
416 IP_CT_TCP_FLAG_WINDOW_SCALE;
418 ptr += opsize - 2;
419 length -= opsize;
424 static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
425 const struct tcphdr *tcph, __u32 *sack)
427 unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
428 const unsigned char *ptr;
429 int length = (tcph->doff*4) - sizeof(struct tcphdr);
430 __u32 tmp;
432 if (!length)
433 return;
435 ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
436 length, buff);
437 BUG_ON(ptr == NULL);
439 /* Fast path for timestamp-only option */
440 if (length == TCPOLEN_TSTAMP_ALIGNED*4
441 && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
442 | (TCPOPT_NOP << 16)
443 | (TCPOPT_TIMESTAMP << 8)
444 | TCPOLEN_TIMESTAMP))
445 return;
447 while (length > 0) {
448 int opcode = *ptr++;
449 int opsize, i;
451 switch (opcode) {
452 case TCPOPT_EOL:
453 return;
454 case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
455 length--;
456 continue;
457 default:
458 opsize = *ptr++;
459 if (opsize < 2) /* "silly options" */
460 return;
461 if (opsize > length)
462 break; /* don't parse partial options */
464 if (opcode == TCPOPT_SACK
465 && opsize >= (TCPOLEN_SACK_BASE
466 + TCPOLEN_SACK_PERBLOCK)
467 && !((opsize - TCPOLEN_SACK_BASE)
468 % TCPOLEN_SACK_PERBLOCK)) {
469 for (i = 0;
470 i < (opsize - TCPOLEN_SACK_BASE);
471 i += TCPOLEN_SACK_PERBLOCK) {
472 tmp = get_unaligned_be32((__be32 *)(ptr+i)+1);
474 if (after(tmp, *sack))
475 *sack = tmp;
477 return;
479 ptr += opsize - 2;
480 length -= opsize;
485 static bool tcp_in_window(const struct nf_conn *ct,
486 struct ip_ct_tcp *state,
487 enum ip_conntrack_dir dir,
488 unsigned int index,
489 const struct sk_buff *skb,
490 unsigned int dataoff,
491 const struct tcphdr *tcph,
492 u_int8_t pf)
494 struct net *net = nf_ct_net(ct);
495 struct ip_ct_tcp_state *sender = &state->seen[dir];
496 struct ip_ct_tcp_state *receiver = &state->seen[!dir];
497 const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
498 __u32 seq, ack, sack, end, win, swin;
499 bool res;
502 * Get the required data from the packet.
504 seq = ntohl(tcph->seq);
505 ack = sack = ntohl(tcph->ack_seq);
506 win = ntohs(tcph->window);
507 end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);
509 if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
510 tcp_sack(skb, dataoff, tcph, &sack);
512 pr_debug("tcp_in_window: START\n");
513 pr_debug("tcp_in_window: ");
514 nf_ct_dump_tuple(tuple);
515 pr_debug("seq=%u ack=%u sack=%u win=%u end=%u\n",
516 seq, ack, sack, win, end);
517 pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
518 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
519 sender->td_end, sender->td_maxend, sender->td_maxwin,
520 sender->td_scale,
521 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
522 receiver->td_scale);
524 if (sender->td_end == 0) {
526 * Initialize sender data.
528 if (tcph->syn && tcph->ack) {
530 * Outgoing SYN-ACK in reply to a SYN.
532 sender->td_end =
533 sender->td_maxend = end;
534 sender->td_maxwin = (win == 0 ? 1 : win);
536 tcp_options(skb, dataoff, tcph, sender);
538 * RFC 1323:
539 * Both sides must send the Window Scale option
540 * to enable window scaling in either direction.
542 if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
543 && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
544 sender->td_scale =
545 receiver->td_scale = 0;
546 } else {
548 * We are in the middle of a connection,
549 * its history is lost for us.
550 * Let's try to use the data from the packet.
552 sender->td_end = end;
553 sender->td_maxwin = (win == 0 ? 1 : win);
554 sender->td_maxend = end + sender->td_maxwin;
556 } else if (((state->state == TCP_CONNTRACK_SYN_SENT
557 && dir == IP_CT_DIR_ORIGINAL)
558 || (state->state == TCP_CONNTRACK_SYN_RECV
559 && dir == IP_CT_DIR_REPLY))
560 && after(end, sender->td_end)) {
562 * RFC 793: "if a TCP is reinitialized ... then it need
563 * not wait at all; it must only be sure to use sequence
564 * numbers larger than those recently used."
566 sender->td_end =
567 sender->td_maxend = end;
568 sender->td_maxwin = (win == 0 ? 1 : win);
570 tcp_options(skb, dataoff, tcph, sender);
573 if (!(tcph->ack)) {
575 * If there is no ACK, just pretend it was set and OK.
577 ack = sack = receiver->td_end;
578 } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
579 (TCP_FLAG_ACK|TCP_FLAG_RST))
580 && (ack == 0)) {
582 * Broken TCP stacks, that set ACK in RST packets as well
583 * with zero ack value.
585 ack = sack = receiver->td_end;
588 if (seq == end
589 && (!tcph->rst
590 || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)))
592 * Packets contains no data: we assume it is valid
593 * and check the ack value only.
594 * However RST segments are always validated by their
595 * SEQ number, except when seq == 0 (reset sent answering
596 * SYN.
598 seq = end = sender->td_end;
600 pr_debug("tcp_in_window: ");
601 nf_ct_dump_tuple(tuple);
602 pr_debug("seq=%u ack=%u sack =%u win=%u end=%u\n",
603 seq, ack, sack, win, end);
604 pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
605 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
606 sender->td_end, sender->td_maxend, sender->td_maxwin,
607 sender->td_scale,
608 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
609 receiver->td_scale);
611 pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
612 before(seq, sender->td_maxend + 1),
613 after(end, sender->td_end - receiver->td_maxwin - 1),
614 before(sack, receiver->td_end + 1),
615 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1));
617 if (before(seq, sender->td_maxend + 1) &&
618 after(end, sender->td_end - receiver->td_maxwin - 1) &&
619 before(sack, receiver->td_end + 1) &&
620 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) {
622 * Take into account window scaling (RFC 1323).
624 if (!tcph->syn)
625 win <<= sender->td_scale;
628 * Update sender data.
630 swin = win + (sack - ack);
631 if (sender->td_maxwin < swin)
632 sender->td_maxwin = swin;
633 if (after(end, sender->td_end)) {
634 sender->td_end = end;
635 sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
637 if (tcph->ack) {
638 if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) {
639 sender->td_maxack = ack;
640 sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET;
641 } else if (after(ack, sender->td_maxack))
642 sender->td_maxack = ack;
646 * Update receiver data.
648 if (after(end, sender->td_maxend))
649 receiver->td_maxwin += end - sender->td_maxend;
650 if (after(sack + win, receiver->td_maxend - 1)) {
651 receiver->td_maxend = sack + win;
652 if (win == 0)
653 receiver->td_maxend++;
655 if (ack == receiver->td_end)
656 receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
659 * Check retransmissions.
661 if (index == TCP_ACK_SET) {
662 if (state->last_dir == dir
663 && state->last_seq == seq
664 && state->last_ack == ack
665 && state->last_end == end
666 && state->last_win == win)
667 state->retrans++;
668 else {
669 state->last_dir = dir;
670 state->last_seq = seq;
671 state->last_ack = ack;
672 state->last_end = end;
673 state->last_win = win;
674 state->retrans = 0;
677 res = true;
678 } else {
679 res = false;
680 if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
681 nf_ct_tcp_be_liberal)
682 res = true;
683 if (!res && LOG_INVALID(net, IPPROTO_TCP))
684 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
685 "nf_ct_tcp: %s ",
686 before(seq, sender->td_maxend + 1) ?
687 after(end, sender->td_end - receiver->td_maxwin - 1) ?
688 before(sack, receiver->td_end + 1) ?
689 after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG"
690 : "ACK is under the lower bound (possible overly delayed ACK)"
691 : "ACK is over the upper bound (ACKed data not seen yet)"
692 : "SEQ is under the lower bound (already ACKed data retransmitted)"
693 : "SEQ is over the upper bound (over the window of the receiver)");
696 pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u "
697 "receiver end=%u maxend=%u maxwin=%u\n",
698 res, sender->td_end, sender->td_maxend, sender->td_maxwin,
699 receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
701 return res;
704 #ifdef CONFIG_NF_NAT_NEEDED
705 /* Update sender->td_end after NAT successfully mangled the packet */
706 /* Caller must linearize skb at tcp header. */
707 void nf_conntrack_tcp_update(const struct sk_buff *skb,
708 unsigned int dataoff,
709 struct nf_conn *ct,
710 int dir)
712 const struct tcphdr *tcph = (const void *)skb->data + dataoff;
713 const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[dir];
714 const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[!dir];
715 __u32 end;
717 end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, dataoff, tcph);
719 write_lock_bh(&tcp_lock);
721 * We have to worry for the ack in the reply packet only...
723 if (after(end, ct->proto.tcp.seen[dir].td_end))
724 ct->proto.tcp.seen[dir].td_end = end;
725 ct->proto.tcp.last_end = end;
726 write_unlock_bh(&tcp_lock);
727 pr_debug("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i "
728 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
729 sender->td_end, sender->td_maxend, sender->td_maxwin,
730 sender->td_scale,
731 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
732 receiver->td_scale);
734 EXPORT_SYMBOL_GPL(nf_conntrack_tcp_update);
735 #endif
737 #define TH_FIN 0x01
738 #define TH_SYN 0x02
739 #define TH_RST 0x04
740 #define TH_PUSH 0x08
741 #define TH_ACK 0x10
742 #define TH_URG 0x20
743 #define TH_ECE 0x40
744 #define TH_CWR 0x80
746 /* table of valid flag combinations - PUSH, ECE and CWR are always valid */
747 static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG) + 1] =
749 [TH_SYN] = 1,
750 [TH_SYN|TH_URG] = 1,
751 [TH_SYN|TH_ACK] = 1,
752 [TH_RST] = 1,
753 [TH_RST|TH_ACK] = 1,
754 [TH_FIN|TH_ACK] = 1,
755 [TH_FIN|TH_ACK|TH_URG] = 1,
756 [TH_ACK] = 1,
757 [TH_ACK|TH_URG] = 1,
760 /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */
761 static int tcp_error(struct net *net,
762 struct sk_buff *skb,
763 unsigned int dataoff,
764 enum ip_conntrack_info *ctinfo,
765 u_int8_t pf,
766 unsigned int hooknum)
768 const struct tcphdr *th;
769 struct tcphdr _tcph;
770 unsigned int tcplen = skb->len - dataoff;
771 u_int8_t tcpflags;
773 /* Smaller that minimal TCP header? */
774 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
775 if (th == NULL) {
776 if (LOG_INVALID(net, IPPROTO_TCP))
777 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
778 "nf_ct_tcp: short packet ");
779 return -NF_ACCEPT;
782 /* Not whole TCP header or malformed packet */
783 if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
784 if (LOG_INVALID(net, IPPROTO_TCP))
785 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
786 "nf_ct_tcp: truncated/malformed packet ");
787 return -NF_ACCEPT;
790 /* Checksum invalid? Ignore.
791 * We skip checking packets on the outgoing path
792 * because the checksum is assumed to be correct.
794 /* FIXME: Source route IP option packets --RR */
795 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
796 nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
797 if (LOG_INVALID(net, IPPROTO_TCP))
798 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
799 "nf_ct_tcp: bad TCP checksum ");
800 return -NF_ACCEPT;
803 /* Check TCP flags. */
804 tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR|TH_PUSH));
805 if (!tcp_valid_flags[tcpflags]) {
806 if (LOG_INVALID(net, IPPROTO_TCP))
807 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
808 "nf_ct_tcp: invalid TCP flag combination ");
809 return -NF_ACCEPT;
812 return NF_ACCEPT;
815 /* Returns verdict for packet, or -1 for invalid. */
816 static int tcp_packet(struct nf_conn *ct,
817 const struct sk_buff *skb,
818 unsigned int dataoff,
819 enum ip_conntrack_info ctinfo,
820 u_int8_t pf,
821 unsigned int hooknum)
823 struct net *net = nf_ct_net(ct);
824 struct nf_conntrack_tuple *tuple;
825 enum tcp_conntrack new_state, old_state;
826 enum ip_conntrack_dir dir;
827 const struct tcphdr *th;
828 struct tcphdr _tcph;
829 unsigned long timeout;
830 unsigned int index;
832 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
833 BUG_ON(th == NULL);
835 write_lock_bh(&tcp_lock);
836 old_state = ct->proto.tcp.state;
837 dir = CTINFO2DIR(ctinfo);
838 index = get_conntrack_index(th);
839 new_state = tcp_conntracks[dir][index][old_state];
840 tuple = &ct->tuplehash[dir].tuple;
842 switch (new_state) {
843 case TCP_CONNTRACK_SYN_SENT:
844 if (old_state < TCP_CONNTRACK_TIME_WAIT)
845 break;
846 /* RFC 1122: "When a connection is closed actively,
847 * it MUST linger in TIME-WAIT state for a time 2xMSL
848 * (Maximum Segment Lifetime). However, it MAY accept
849 * a new SYN from the remote TCP to reopen the connection
850 * directly from TIME-WAIT state, if..."
851 * We ignore the conditions because we are in the
852 * TIME-WAIT state anyway.
854 * Handle aborted connections: we and the server
855 * think there is an existing connection but the client
856 * aborts it and starts a new one.
858 if (((ct->proto.tcp.seen[dir].flags
859 | ct->proto.tcp.seen[!dir].flags)
860 & IP_CT_TCP_FLAG_CLOSE_INIT)
861 || (ct->proto.tcp.last_dir == dir
862 && ct->proto.tcp.last_index == TCP_RST_SET)) {
863 /* Attempt to reopen a closed/aborted connection.
864 * Delete this connection and look up again. */
865 write_unlock_bh(&tcp_lock);
867 /* Only repeat if we can actually remove the timer.
868 * Destruction may already be in progress in process
869 * context and we must give it a chance to terminate.
871 if (nf_ct_kill(ct))
872 return -NF_REPEAT;
873 return NF_DROP;
875 /* Fall through */
876 case TCP_CONNTRACK_IGNORE:
877 /* Ignored packets:
879 * Our connection entry may be out of sync, so ignore
880 * packets which may signal the real connection between
881 * the client and the server.
883 * a) SYN in ORIGINAL
884 * b) SYN/ACK in REPLY
885 * c) ACK in reply direction after initial SYN in original.
887 * If the ignored packet is invalid, the receiver will send
888 * a RST we'll catch below.
890 if (index == TCP_SYNACK_SET
891 && ct->proto.tcp.last_index == TCP_SYN_SET
892 && ct->proto.tcp.last_dir != dir
893 && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
894 /* b) This SYN/ACK acknowledges a SYN that we earlier
895 * ignored as invalid. This means that the client and
896 * the server are both in sync, while the firewall is
897 * not. We kill this session and block the SYN/ACK so
898 * that the client cannot but retransmit its SYN and
899 * thus initiate a clean new session.
901 write_unlock_bh(&tcp_lock);
902 if (LOG_INVALID(net, IPPROTO_TCP))
903 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
904 "nf_ct_tcp: killing out of sync session ");
905 nf_ct_kill(ct);
906 return NF_DROP;
908 ct->proto.tcp.last_index = index;
909 ct->proto.tcp.last_dir = dir;
910 ct->proto.tcp.last_seq = ntohl(th->seq);
911 ct->proto.tcp.last_end =
912 segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
914 write_unlock_bh(&tcp_lock);
915 if (LOG_INVALID(net, IPPROTO_TCP))
916 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
917 "nf_ct_tcp: invalid packet ignored ");
918 return NF_ACCEPT;
919 case TCP_CONNTRACK_MAX:
920 /* Invalid packet */
921 pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
922 dir, get_conntrack_index(th), old_state);
923 write_unlock_bh(&tcp_lock);
924 if (LOG_INVALID(net, IPPROTO_TCP))
925 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
926 "nf_ct_tcp: invalid state ");
927 return -NF_ACCEPT;
928 case TCP_CONNTRACK_CLOSE:
929 if (index == TCP_RST_SET
930 && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
931 && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
932 /* Invalid RST */
933 write_unlock_bh(&tcp_lock);
934 if (LOG_INVALID(net, IPPROTO_TCP))
935 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
936 "nf_ct_tcp: invalid RST ");
937 return -NF_ACCEPT;
939 if (index == TCP_RST_SET
940 && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
941 && ct->proto.tcp.last_index == TCP_SYN_SET)
942 || (!test_bit(IPS_ASSURED_BIT, &ct->status)
943 && ct->proto.tcp.last_index == TCP_ACK_SET))
944 && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
945 /* RST sent to invalid SYN or ACK we had let through
946 * at a) and c) above:
948 * a) SYN was in window then
949 * c) we hold a half-open connection.
951 * Delete our connection entry.
952 * We skip window checking, because packet might ACK
953 * segments we ignored. */
954 goto in_window;
956 /* Just fall through */
957 default:
958 /* Keep compilers happy. */
959 break;
962 if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
963 skb, dataoff, th, pf)) {
964 write_unlock_bh(&tcp_lock);
965 return -NF_ACCEPT;
967 in_window:
968 /* From now on we have got in-window packets */
969 ct->proto.tcp.last_index = index;
970 ct->proto.tcp.last_dir = dir;
972 pr_debug("tcp_conntracks: ");
973 nf_ct_dump_tuple(tuple);
974 pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
975 (th->syn ? 1 : 0), (th->ack ? 1 : 0),
976 (th->fin ? 1 : 0), (th->rst ? 1 : 0),
977 old_state, new_state);
979 ct->proto.tcp.state = new_state;
980 if (old_state != new_state
981 && new_state == TCP_CONNTRACK_FIN_WAIT)
982 ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
984 if (ct->proto.tcp.retrans >= nf_ct_tcp_max_retrans &&
985 tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans)
986 timeout = nf_ct_tcp_timeout_max_retrans;
987 else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) &
988 IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
989 tcp_timeouts[new_state] > nf_ct_tcp_timeout_unacknowledged)
990 timeout = nf_ct_tcp_timeout_unacknowledged;
991 else
992 timeout = tcp_timeouts[new_state];
993 write_unlock_bh(&tcp_lock);
995 nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct);
996 if (new_state != old_state)
997 nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
999 if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
1000 /* If only reply is a RST, we can consider ourselves not to
1001 have an established connection: this is a fairly common
1002 problem case, so we can delete the conntrack
1003 immediately. --RR */
1004 if (th->rst) {
1005 nf_ct_kill_acct(ct, ctinfo, skb);
1006 return NF_ACCEPT;
1008 } else if (!test_bit(IPS_ASSURED_BIT, &ct->status)
1009 && (old_state == TCP_CONNTRACK_SYN_RECV
1010 || old_state == TCP_CONNTRACK_ESTABLISHED)
1011 && new_state == TCP_CONNTRACK_ESTABLISHED) {
1012 /* Set ASSURED if we see see valid ack in ESTABLISHED
1013 after SYN_RECV or a valid answer for a picked up
1014 connection. */
1015 set_bit(IPS_ASSURED_BIT, &ct->status);
1016 nf_conntrack_event_cache(IPCT_STATUS, ct);
1018 nf_ct_refresh_acct(ct, ctinfo, skb, timeout);
1020 return NF_ACCEPT;
1023 /* Called when a new connection for this protocol found. */
1024 static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
1025 unsigned int dataoff)
1027 enum tcp_conntrack new_state;
1028 const struct tcphdr *th;
1029 struct tcphdr _tcph;
1030 const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
1031 const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
1033 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
1034 BUG_ON(th == NULL);
1036 /* Don't need lock here: this conntrack not in circulation yet */
1037 new_state
1038 = tcp_conntracks[0][get_conntrack_index(th)]
1039 [TCP_CONNTRACK_NONE];
1041 /* Invalid: delete conntrack */
1042 if (new_state >= TCP_CONNTRACK_MAX) {
1043 pr_debug("nf_ct_tcp: invalid new deleting.\n");
1044 return false;
1047 if (new_state == TCP_CONNTRACK_SYN_SENT) {
1048 /* SYN packet */
1049 ct->proto.tcp.seen[0].td_end =
1050 segment_seq_plus_len(ntohl(th->seq), skb->len,
1051 dataoff, th);
1052 ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1053 if (ct->proto.tcp.seen[0].td_maxwin == 0)
1054 ct->proto.tcp.seen[0].td_maxwin = 1;
1055 ct->proto.tcp.seen[0].td_maxend =
1056 ct->proto.tcp.seen[0].td_end;
1058 tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
1059 ct->proto.tcp.seen[1].flags = 0;
1060 } else if (nf_ct_tcp_loose == 0) {
1061 /* Don't try to pick up connections. */
1062 return false;
1063 } else {
1065 * We are in the middle of a connection,
1066 * its history is lost for us.
1067 * Let's try to use the data from the packet.
1069 ct->proto.tcp.seen[0].td_end =
1070 segment_seq_plus_len(ntohl(th->seq), skb->len,
1071 dataoff, th);
1072 ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1073 if (ct->proto.tcp.seen[0].td_maxwin == 0)
1074 ct->proto.tcp.seen[0].td_maxwin = 1;
1075 ct->proto.tcp.seen[0].td_maxend =
1076 ct->proto.tcp.seen[0].td_end +
1077 ct->proto.tcp.seen[0].td_maxwin;
1078 ct->proto.tcp.seen[0].td_scale = 0;
1080 /* We assume SACK and liberal window checking to handle
1081 * window scaling */
1082 ct->proto.tcp.seen[0].flags =
1083 ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
1084 IP_CT_TCP_FLAG_BE_LIBERAL;
1087 ct->proto.tcp.seen[1].td_end = 0;
1088 ct->proto.tcp.seen[1].td_maxend = 0;
1089 ct->proto.tcp.seen[1].td_maxwin = 1;
1090 ct->proto.tcp.seen[1].td_scale = 0;
1092 /* tcp_packet will set them */
1093 ct->proto.tcp.state = TCP_CONNTRACK_NONE;
1094 ct->proto.tcp.last_index = TCP_NONE_SET;
1096 pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
1097 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
1098 sender->td_end, sender->td_maxend, sender->td_maxwin,
1099 sender->td_scale,
1100 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
1101 receiver->td_scale);
1102 return true;
1105 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
1107 #include <linux/netfilter/nfnetlink.h>
1108 #include <linux/netfilter/nfnetlink_conntrack.h>
1110 static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
1111 const struct nf_conn *ct)
1113 struct nlattr *nest_parms;
1114 struct nf_ct_tcp_flags tmp = {};
1116 read_lock_bh(&tcp_lock);
1117 nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED);
1118 if (!nest_parms)
1119 goto nla_put_failure;
1121 NLA_PUT_U8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state);
1123 NLA_PUT_U8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
1124 ct->proto.tcp.seen[0].td_scale);
1126 NLA_PUT_U8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY,
1127 ct->proto.tcp.seen[1].td_scale);
1129 tmp.flags = ct->proto.tcp.seen[0].flags;
1130 NLA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
1131 sizeof(struct nf_ct_tcp_flags), &tmp);
1133 tmp.flags = ct->proto.tcp.seen[1].flags;
1134 NLA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
1135 sizeof(struct nf_ct_tcp_flags), &tmp);
1136 read_unlock_bh(&tcp_lock);
1138 nla_nest_end(skb, nest_parms);
1140 return 0;
1142 nla_put_failure:
1143 read_unlock_bh(&tcp_lock);
1144 return -1;
1147 static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
1148 [CTA_PROTOINFO_TCP_STATE] = { .type = NLA_U8 },
1149 [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 },
1150 [CTA_PROTOINFO_TCP_WSCALE_REPLY] = { .type = NLA_U8 },
1151 [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL] = { .len = sizeof(struct nf_ct_tcp_flags) },
1152 [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .len = sizeof(struct nf_ct_tcp_flags) },
1155 static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
1157 struct nlattr *pattr = cda[CTA_PROTOINFO_TCP];
1158 struct nlattr *tb[CTA_PROTOINFO_TCP_MAX+1];
1159 int err;
1161 /* updates could not contain anything about the private
1162 * protocol info, in that case skip the parsing */
1163 if (!pattr)
1164 return 0;
1166 err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr, tcp_nla_policy);
1167 if (err < 0)
1168 return err;
1170 if (tb[CTA_PROTOINFO_TCP_STATE] &&
1171 nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
1172 return -EINVAL;
1174 write_lock_bh(&tcp_lock);
1175 if (tb[CTA_PROTOINFO_TCP_STATE])
1176 ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
1178 if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) {
1179 struct nf_ct_tcp_flags *attr =
1180 nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]);
1181 ct->proto.tcp.seen[0].flags &= ~attr->mask;
1182 ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask;
1185 if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]) {
1186 struct nf_ct_tcp_flags *attr =
1187 nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]);
1188 ct->proto.tcp.seen[1].flags &= ~attr->mask;
1189 ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask;
1192 if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] &&
1193 tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] &&
1194 ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
1195 ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
1196 ct->proto.tcp.seen[0].td_scale =
1197 nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]);
1198 ct->proto.tcp.seen[1].td_scale =
1199 nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
1201 write_unlock_bh(&tcp_lock);
1203 return 0;
1206 static int tcp_nlattr_size(void)
1208 return nla_total_size(0) /* CTA_PROTOINFO_TCP */
1209 + nla_policy_len(tcp_nla_policy, CTA_PROTOINFO_TCP_MAX + 1);
1212 static int tcp_nlattr_tuple_size(void)
1214 return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
1216 #endif
1218 #ifdef CONFIG_SYSCTL
1219 static unsigned int tcp_sysctl_table_users;
1220 static struct ctl_table_header *tcp_sysctl_header;
1221 static struct ctl_table tcp_sysctl_table[] = {
1223 .procname = "nf_conntrack_tcp_timeout_syn_sent",
1224 .data = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT],
1225 .maxlen = sizeof(unsigned int),
1226 .mode = 0644,
1227 .proc_handler = proc_dointvec_jiffies,
1230 .procname = "nf_conntrack_tcp_timeout_syn_recv",
1231 .data = &tcp_timeouts[TCP_CONNTRACK_SYN_RECV],
1232 .maxlen = sizeof(unsigned int),
1233 .mode = 0644,
1234 .proc_handler = proc_dointvec_jiffies,
1237 .procname = "nf_conntrack_tcp_timeout_established",
1238 .data = &tcp_timeouts[TCP_CONNTRACK_ESTABLISHED],
1239 .maxlen = sizeof(unsigned int),
1240 .mode = 0644,
1241 .proc_handler = proc_dointvec_jiffies,
1244 .procname = "nf_conntrack_tcp_timeout_fin_wait",
1245 .data = &tcp_timeouts[TCP_CONNTRACK_FIN_WAIT],
1246 .maxlen = sizeof(unsigned int),
1247 .mode = 0644,
1248 .proc_handler = proc_dointvec_jiffies,
1251 .procname = "nf_conntrack_tcp_timeout_close_wait",
1252 .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE_WAIT],
1253 .maxlen = sizeof(unsigned int),
1254 .mode = 0644,
1255 .proc_handler = proc_dointvec_jiffies,
1258 .procname = "nf_conntrack_tcp_timeout_last_ack",
1259 .data = &tcp_timeouts[TCP_CONNTRACK_LAST_ACK],
1260 .maxlen = sizeof(unsigned int),
1261 .mode = 0644,
1262 .proc_handler = proc_dointvec_jiffies,
1265 .procname = "nf_conntrack_tcp_timeout_time_wait",
1266 .data = &tcp_timeouts[TCP_CONNTRACK_TIME_WAIT],
1267 .maxlen = sizeof(unsigned int),
1268 .mode = 0644,
1269 .proc_handler = proc_dointvec_jiffies,
1272 .procname = "nf_conntrack_tcp_timeout_close",
1273 .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE],
1274 .maxlen = sizeof(unsigned int),
1275 .mode = 0644,
1276 .proc_handler = proc_dointvec_jiffies,
1279 .procname = "nf_conntrack_tcp_timeout_max_retrans",
1280 .data = &nf_ct_tcp_timeout_max_retrans,
1281 .maxlen = sizeof(unsigned int),
1282 .mode = 0644,
1283 .proc_handler = proc_dointvec_jiffies,
1286 .procname = "nf_conntrack_tcp_timeout_unacknowledged",
1287 .data = &nf_ct_tcp_timeout_unacknowledged,
1288 .maxlen = sizeof(unsigned int),
1289 .mode = 0644,
1290 .proc_handler = proc_dointvec_jiffies,
1293 .ctl_name = NET_NF_CONNTRACK_TCP_LOOSE,
1294 .procname = "nf_conntrack_tcp_loose",
1295 .data = &nf_ct_tcp_loose,
1296 .maxlen = sizeof(unsigned int),
1297 .mode = 0644,
1298 .proc_handler = proc_dointvec,
1301 .ctl_name = NET_NF_CONNTRACK_TCP_BE_LIBERAL,
1302 .procname = "nf_conntrack_tcp_be_liberal",
1303 .data = &nf_ct_tcp_be_liberal,
1304 .maxlen = sizeof(unsigned int),
1305 .mode = 0644,
1306 .proc_handler = proc_dointvec,
1309 .ctl_name = NET_NF_CONNTRACK_TCP_MAX_RETRANS,
1310 .procname = "nf_conntrack_tcp_max_retrans",
1311 .data = &nf_ct_tcp_max_retrans,
1312 .maxlen = sizeof(unsigned int),
1313 .mode = 0644,
1314 .proc_handler = proc_dointvec,
1317 .ctl_name = 0
1321 #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
1322 static struct ctl_table tcp_compat_sysctl_table[] = {
1324 .procname = "ip_conntrack_tcp_timeout_syn_sent",
1325 .data = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT],
1326 .maxlen = sizeof(unsigned int),
1327 .mode = 0644,
1328 .proc_handler = proc_dointvec_jiffies,
1331 .procname = "ip_conntrack_tcp_timeout_syn_recv",
1332 .data = &tcp_timeouts[TCP_CONNTRACK_SYN_RECV],
1333 .maxlen = sizeof(unsigned int),
1334 .mode = 0644,
1335 .proc_handler = proc_dointvec_jiffies,
1338 .procname = "ip_conntrack_tcp_timeout_established",
1339 .data = &tcp_timeouts[TCP_CONNTRACK_ESTABLISHED],
1340 .maxlen = sizeof(unsigned int),
1341 .mode = 0644,
1342 .proc_handler = proc_dointvec_jiffies,
1345 .procname = "ip_conntrack_tcp_timeout_fin_wait",
1346 .data = &tcp_timeouts[TCP_CONNTRACK_FIN_WAIT],
1347 .maxlen = sizeof(unsigned int),
1348 .mode = 0644,
1349 .proc_handler = proc_dointvec_jiffies,
1352 .procname = "ip_conntrack_tcp_timeout_close_wait",
1353 .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE_WAIT],
1354 .maxlen = sizeof(unsigned int),
1355 .mode = 0644,
1356 .proc_handler = proc_dointvec_jiffies,
1359 .procname = "ip_conntrack_tcp_timeout_last_ack",
1360 .data = &tcp_timeouts[TCP_CONNTRACK_LAST_ACK],
1361 .maxlen = sizeof(unsigned int),
1362 .mode = 0644,
1363 .proc_handler = proc_dointvec_jiffies,
1366 .procname = "ip_conntrack_tcp_timeout_time_wait",
1367 .data = &tcp_timeouts[TCP_CONNTRACK_TIME_WAIT],
1368 .maxlen = sizeof(unsigned int),
1369 .mode = 0644,
1370 .proc_handler = proc_dointvec_jiffies,
1373 .procname = "ip_conntrack_tcp_timeout_close",
1374 .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE],
1375 .maxlen = sizeof(unsigned int),
1376 .mode = 0644,
1377 .proc_handler = proc_dointvec_jiffies,
1380 .procname = "ip_conntrack_tcp_timeout_max_retrans",
1381 .data = &nf_ct_tcp_timeout_max_retrans,
1382 .maxlen = sizeof(unsigned int),
1383 .mode = 0644,
1384 .proc_handler = proc_dointvec_jiffies,
1387 .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_LOOSE,
1388 .procname = "ip_conntrack_tcp_loose",
1389 .data = &nf_ct_tcp_loose,
1390 .maxlen = sizeof(unsigned int),
1391 .mode = 0644,
1392 .proc_handler = proc_dointvec,
1395 .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL,
1396 .procname = "ip_conntrack_tcp_be_liberal",
1397 .data = &nf_ct_tcp_be_liberal,
1398 .maxlen = sizeof(unsigned int),
1399 .mode = 0644,
1400 .proc_handler = proc_dointvec,
1403 .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS,
1404 .procname = "ip_conntrack_tcp_max_retrans",
1405 .data = &nf_ct_tcp_max_retrans,
1406 .maxlen = sizeof(unsigned int),
1407 .mode = 0644,
1408 .proc_handler = proc_dointvec,
1411 .ctl_name = 0
1414 #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
1415 #endif /* CONFIG_SYSCTL */
1417 struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
1419 .l3proto = PF_INET,
1420 .l4proto = IPPROTO_TCP,
1421 .name = "tcp",
1422 .pkt_to_tuple = tcp_pkt_to_tuple,
1423 .invert_tuple = tcp_invert_tuple,
1424 .print_tuple = tcp_print_tuple,
1425 .print_conntrack = tcp_print_conntrack,
1426 .packet = tcp_packet,
1427 .new = tcp_new,
1428 .error = tcp_error,
1429 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
1430 .to_nlattr = tcp_to_nlattr,
1431 .nlattr_size = tcp_nlattr_size,
1432 .from_nlattr = nlattr_to_tcp,
1433 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
1434 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
1435 .nlattr_tuple_size = tcp_nlattr_tuple_size,
1436 .nla_policy = nf_ct_port_nla_policy,
1437 #endif
1438 #ifdef CONFIG_SYSCTL
1439 .ctl_table_users = &tcp_sysctl_table_users,
1440 .ctl_table_header = &tcp_sysctl_header,
1441 .ctl_table = tcp_sysctl_table,
1442 #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
1443 .ctl_compat_table = tcp_compat_sysctl_table,
1444 #endif
1445 #endif
1447 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4);
1449 struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
1451 .l3proto = PF_INET6,
1452 .l4proto = IPPROTO_TCP,
1453 .name = "tcp",
1454 .pkt_to_tuple = tcp_pkt_to_tuple,
1455 .invert_tuple = tcp_invert_tuple,
1456 .print_tuple = tcp_print_tuple,
1457 .print_conntrack = tcp_print_conntrack,
1458 .packet = tcp_packet,
1459 .new = tcp_new,
1460 .error = tcp_error,
1461 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
1462 .to_nlattr = tcp_to_nlattr,
1463 .nlattr_size = tcp_nlattr_size,
1464 .from_nlattr = nlattr_to_tcp,
1465 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
1466 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
1467 .nlattr_tuple_size = tcp_nlattr_tuple_size,
1468 .nla_policy = nf_ct_port_nla_policy,
1469 #endif
1470 #ifdef CONFIG_SYSCTL
1471 .ctl_table_users = &tcp_sysctl_table_users,
1472 .ctl_table_header = &tcp_sysctl_header,
1473 .ctl_table = tcp_sysctl_table,
1474 #endif
1476 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6);