GUI: Fix Tomato RAF theme for all builds. Compilation typo.
[tomato.git] / release / src-rt-6.x.4708 / linux / linux-2.6.36 / net / netfilter / nf_conntrack_proto_tcp.c
blobdc0a75d0fe589290d6dd49f68bce6fd5d51454a0
1 /* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
9 #include <linux/types.h>
10 #include <linux/timer.h>
11 #include <linux/module.h>
12 #include <linux/in.h>
13 #include <linux/tcp.h>
14 #include <linux/spinlock.h>
15 #include <linux/skbuff.h>
16 #include <linux/ipv6.h>
17 #include <net/ip6_checksum.h>
18 #include <asm/unaligned.h>
20 #include <net/tcp.h>
22 #include <linux/netfilter.h>
23 #include <linux/netfilter_ipv4.h>
24 #include <linux/netfilter_ipv6.h>
25 #include <net/netfilter/nf_conntrack.h>
26 #include <net/netfilter/nf_conntrack_l4proto.h>
27 #include <net/netfilter/nf_conntrack_ecache.h>
28 #include <net/netfilter/nf_log.h>
29 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
30 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
32 #ifdef HNDCTF
33 #include <ctf/hndctf.h>
34 extern int ip_conntrack_ipct_delete(struct nf_conn *ct, int ct_timeout);
35 #endif /* HNDCTF */
37 /* "Be conservative in what you do,
38 be liberal in what you accept from others."
39 If it's non-zero, we mark only out of window RST segments as INVALID. */
40 static int nf_ct_tcp_be_liberal __read_mostly = 0;
42 /* If it is set to zero, we disable picking up already established
43 connections. */
44 static int nf_ct_tcp_loose __read_mostly = 1;
46 /* Max number of the retransmitted packets without receiving an (acceptable)
47 ACK from the destination. If this number is reached, a shorter timer
48 will be started. */
49 static int nf_ct_tcp_max_retrans __read_mostly = 3;
52 static const char *const tcp_conntrack_names[] = {
53 "NONE",
54 "SYN_SENT",
55 "SYN_RECV",
56 "ESTABLISHED",
57 "FIN_WAIT",
58 "CLOSE_WAIT",
59 "LAST_ACK",
60 "TIME_WAIT",
61 "CLOSE",
62 "SYN_SENT2",
65 #define SECS * HZ
66 #define MINS * 60 SECS
67 #define HOURS * 60 MINS
68 #define DAYS * 24 HOURS
70 /* RFC1122 says the R2 limit should be at least 100 seconds.
71 Linux uses 15 packets as limit, which corresponds
72 to ~13-30min depending on RTO. */
73 static unsigned int nf_ct_tcp_timeout_max_retrans __read_mostly = 5 MINS;
74 static unsigned int nf_ct_tcp_timeout_unacknowledged __read_mostly = 5 MINS;
76 static unsigned int tcp_timeouts[TCP_CONNTRACK_MAX] __read_mostly = {
77 [TCP_CONNTRACK_SYN_SENT] = 2 MINS,
78 [TCP_CONNTRACK_SYN_RECV] = 60 SECS,
79 [TCP_CONNTRACK_ESTABLISHED] = 5 DAYS,
80 [TCP_CONNTRACK_FIN_WAIT] = 2 MINS,
81 [TCP_CONNTRACK_CLOSE_WAIT] = 60 SECS,
82 [TCP_CONNTRACK_LAST_ACK] = 30 SECS,
83 [TCP_CONNTRACK_TIME_WAIT] = 2 MINS,
84 [TCP_CONNTRACK_CLOSE] = 10 SECS,
85 [TCP_CONNTRACK_SYN_SENT2] = 2 MINS,
88 #define sNO TCP_CONNTRACK_NONE
89 #define sSS TCP_CONNTRACK_SYN_SENT
90 #define sSR TCP_CONNTRACK_SYN_RECV
91 #define sES TCP_CONNTRACK_ESTABLISHED
92 #define sFW TCP_CONNTRACK_FIN_WAIT
93 #define sCW TCP_CONNTRACK_CLOSE_WAIT
94 #define sLA TCP_CONNTRACK_LAST_ACK
95 #define sTW TCP_CONNTRACK_TIME_WAIT
96 #define sCL TCP_CONNTRACK_CLOSE
97 #define sS2 TCP_CONNTRACK_SYN_SENT2
98 #define sIV TCP_CONNTRACK_MAX
99 #define sIG TCP_CONNTRACK_IGNORE
101 /* What TCP flags are set from RST/SYN/FIN/ACK. */
102 enum tcp_bit_set {
103 TCP_SYN_SET,
104 TCP_SYNACK_SET,
105 TCP_FIN_SET,
106 TCP_ACK_SET,
107 TCP_RST_SET,
108 TCP_NONE_SET,
112 * The TCP state transition table needs a few words...
114 * We are the man in the middle. All the packets go through us
115 * but might get lost in transit to the destination.
116 * It is assumed that the destinations can't receive segments
117 * we haven't seen.
119 * The checked segment is in window, but our windows are *not*
120 * equivalent with the ones of the sender/receiver. We always
121 * try to guess the state of the current sender.
123 * The meaning of the states are:
125 * NONE: initial state
126 * SYN_SENT: SYN-only packet seen
127 * SYN_SENT2: SYN-only packet seen from reply dir, simultaneous open
128 * SYN_RECV: SYN-ACK packet seen
129 * ESTABLISHED: ACK packet seen
130 * FIN_WAIT: FIN packet seen
131 * CLOSE_WAIT: ACK seen (after FIN)
132 * LAST_ACK: FIN seen (after FIN)
133 * TIME_WAIT: last ACK seen
134 * CLOSE: closed connection (RST)
136 * Packets marked as IGNORED (sIG):
137 * if they may be either invalid or valid
138 * and the receiver may send back a connection
139 * closing RST or a SYN/ACK.
141 * Packets marked as INVALID (sIV):
142 * if we regard them as truly invalid packets
144 static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
146 /* ORIGINAL */
147 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
148 /*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 },
150 * sNO -> sSS Initialize a new connection
151 * sSS -> sSS Retransmitted SYN
152 * sS2 -> sS2 Late retransmitted SYN
153 * sSR -> sIG
154 * sES -> sIG Error: SYNs in window outside the SYN_SENT state
155 * are errors. Receiver will reply with RST
156 * and close the connection.
157 * Or we are not in sync and hold a dead connection.
158 * sFW -> sIG
159 * sCW -> sIG
160 * sLA -> sIG
161 * sTW -> sSS Reopened connection (RFC 1122).
162 * sCL -> sSS
164 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
165 /*synack*/ { sIV, sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
167 * sNO -> sIV Too late and no reason to do anything
168 * sSS -> sIV Client can't send SYN and then SYN/ACK
169 * sS2 -> sSR SYN/ACK sent to SYN2 in simultaneous open
170 * sSR -> sIG
171 * sES -> sIG Error: SYNs in window outside the SYN_SENT state
172 * are errors. Receiver will reply with RST
173 * and close the connection.
174 * Or we are not in sync and hold a dead connection.
175 * sFW -> sIG
176 * sCW -> sIG
177 * sLA -> sIG
178 * sTW -> sIG
179 * sCL -> sIG
181 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
182 /*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
184 * sNO -> sIV Too late and no reason to do anything...
185 * sSS -> sIV Client migth not send FIN in this state:
186 * we enforce waiting for a SYN/ACK reply first.
187 * sS2 -> sIV
188 * sSR -> sFW Close started.
189 * sES -> sFW
190 * sFW -> sLA FIN seen in both directions, waiting for
191 * the last ACK.
192 * Migth be a retransmitted FIN as well...
193 * sCW -> sLA
194 * sLA -> sLA Retransmitted FIN. Remain in the same state.
195 * sTW -> sTW
196 * sCL -> sCL
198 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
199 /*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
201 * sNO -> sES Assumed.
202 * sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet.
203 * sS2 -> sIV
204 * sSR -> sES Established state is reached.
205 * sES -> sES :-)
206 * sFW -> sCW Normal close request answered by ACK.
207 * sCW -> sCW
208 * sLA -> sTW Last ACK detected.
209 * sTW -> sTW Retransmitted last ACK. Remain in the same state.
210 * sCL -> sCL
212 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
213 /*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
214 /*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
217 /* REPLY */
218 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
219 /*syn*/ { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sS2 },
221 * sNO -> sIV Never reached.
222 * sSS -> sS2 Simultaneous open
223 * sS2 -> sS2 Retransmitted simultaneous SYN
224 * sSR -> sIV Invalid SYN packets sent by the server
225 * sES -> sIV
226 * sFW -> sIV
227 * sCW -> sIV
228 * sLA -> sIV
229 * sTW -> sIV Reopened connection, but server may not do it.
230 * sCL -> sIV
232 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
233 /*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
235 * sSS -> sSR Standard open.
236 * sS2 -> sSR Simultaneous open
237 * sSR -> sSR Retransmitted SYN/ACK.
238 * sES -> sIG Late retransmitted SYN/ACK?
239 * sFW -> sIG Might be SYN/ACK answering ignored SYN
240 * sCW -> sIG
241 * sLA -> sIG
242 * sTW -> sIG
243 * sCL -> sIG
245 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
246 /*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
248 * sSS -> sIV Server might not send FIN in this state.
249 * sS2 -> sIV
250 * sSR -> sFW Close started.
251 * sES -> sFW
252 * sFW -> sLA FIN seen in both directions.
253 * sCW -> sLA
254 * sLA -> sLA Retransmitted FIN.
255 * sTW -> sTW
256 * sCL -> sCL
258 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
259 /*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG },
261 * sSS -> sIG Might be a half-open connection.
262 * sS2 -> sIG
263 * sSR -> sSR Might answer late resent SYN.
264 * sES -> sES :-)
265 * sFW -> sCW Normal close request answered by ACK.
266 * sCW -> sCW
267 * sLA -> sTW Last ACK detected.
268 * sTW -> sTW Retransmitted last ACK.
269 * sCL -> sCL
271 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
272 /*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
273 /*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
277 static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
278 struct nf_conntrack_tuple *tuple)
280 const struct tcphdr *hp;
281 struct tcphdr _hdr;
283 /* Actually only need first 8 bytes. */
284 hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
285 if (hp == NULL)
286 return false;
288 tuple->src.u.tcp.port = hp->source;
289 tuple->dst.u.tcp.port = hp->dest;
291 return true;
294 static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
295 const struct nf_conntrack_tuple *orig)
297 tuple->src.u.tcp.port = orig->dst.u.tcp.port;
298 tuple->dst.u.tcp.port = orig->src.u.tcp.port;
299 return true;
302 /* Print out the per-protocol part of the tuple. */
303 static int tcp_print_tuple(struct seq_file *s,
304 const struct nf_conntrack_tuple *tuple)
306 return seq_printf(s, "sport=%hu dport=%hu ",
307 ntohs(tuple->src.u.tcp.port),
308 ntohs(tuple->dst.u.tcp.port));
311 /* Print out the private part of the conntrack. */
312 static int tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
314 enum tcp_conntrack state;
316 spin_lock_bh(&ct->lock);
317 state = ct->proto.tcp.state;
318 spin_unlock_bh(&ct->lock);
320 return seq_printf(s, "%s ", tcp_conntrack_names[state]);
323 static unsigned int get_conntrack_index(const struct tcphdr *tcph)
325 if (tcph->rst) return TCP_RST_SET;
326 else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
327 else if (tcph->fin) return TCP_FIN_SET;
328 else if (tcph->ack) return TCP_ACK_SET;
329 else return TCP_NONE_SET;
332 /* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
333 in IP Filter' by Guido van Rooij.
335 http://www.nluug.nl/events/sane2000/papers.html
336 http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz
338 The boundaries and the conditions are changed according to RFC793:
339 the packet must intersect the window (i.e. segments may be
340 after the right or before the left edge) and thus receivers may ACK
341 segments after the right edge of the window.
343 td_maxend = max(sack + max(win,1)) seen in reply packets
344 td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
345 td_maxwin += seq + len - sender.td_maxend
346 if seq + len > sender.td_maxend
347 td_end = max(seq + len) seen in sent packets
349 I. Upper bound for valid data: seq <= sender.td_maxend
350 II. Lower bound for valid data: seq + len >= sender.td_end - receiver.td_maxwin
351 III. Upper bound for valid (s)ack: sack <= receiver.td_end
352 IV. Lower bound for valid (s)ack: sack >= receiver.td_end - MAXACKWINDOW
354 where sack is the highest right edge of sack block found in the packet
355 or ack in the case of packet without SACK option.
357 The upper bound limit for a valid (s)ack is not ignored -
358 we doesn't have to deal with fragments.
361 static inline __u32 segment_seq_plus_len(__u32 seq,
362 size_t len,
363 unsigned int dataoff,
364 const struct tcphdr *tcph)
366 return (seq + len - dataoff - tcph->doff*4
367 + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
370 #define MAXACKWINCONST 66000
371 #define MAXACKWINDOW(sender) \
372 ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin \
373 : MAXACKWINCONST)
376 * Simplified tcp_parse_options routine from tcp_input.c
378 static void tcp_options(const struct sk_buff *skb,
379 unsigned int dataoff,
380 const struct tcphdr *tcph,
381 struct ip_ct_tcp_state *state)
383 unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
384 const unsigned char *ptr;
385 int length = (tcph->doff*4) - sizeof(struct tcphdr);
387 if (!length)
388 return;
390 ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
391 length, buff);
392 BUG_ON(ptr == NULL);
394 state->td_scale =
395 state->flags = 0;
397 while (length > 0) {
398 int opcode=*ptr++;
399 int opsize;
401 switch (opcode) {
402 case TCPOPT_EOL:
403 return;
404 case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
405 length--;
406 continue;
407 default:
408 opsize=*ptr++;
409 if (opsize < 2) /* "silly options" */
410 return;
411 if (opsize > length)
412 break; /* don't parse partial options */
414 if (opcode == TCPOPT_SACK_PERM
415 && opsize == TCPOLEN_SACK_PERM)
416 state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
417 else if (opcode == TCPOPT_WINDOW
418 && opsize == TCPOLEN_WINDOW) {
419 state->td_scale = *(u_int8_t *)ptr;
421 if (state->td_scale > 14) {
422 /* See RFC1323 */
423 state->td_scale = 14;
425 state->flags |=
426 IP_CT_TCP_FLAG_WINDOW_SCALE;
428 ptr += opsize - 2;
429 length -= opsize;
434 static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
435 const struct tcphdr *tcph, __u32 *sack)
437 unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
438 const unsigned char *ptr;
439 int length = (tcph->doff*4) - sizeof(struct tcphdr);
440 __u32 tmp;
442 if (!length)
443 return;
445 ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
446 length, buff);
447 BUG_ON(ptr == NULL);
449 /* Fast path for timestamp-only option */
450 if (length == TCPOLEN_TSTAMP_ALIGNED*4
451 && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
452 | (TCPOPT_NOP << 16)
453 | (TCPOPT_TIMESTAMP << 8)
454 | TCPOLEN_TIMESTAMP))
455 return;
457 while (length > 0) {
458 int opcode = *ptr++;
459 int opsize, i;
461 switch (opcode) {
462 case TCPOPT_EOL:
463 return;
464 case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
465 length--;
466 continue;
467 default:
468 opsize = *ptr++;
469 if (opsize < 2) /* "silly options" */
470 return;
471 if (opsize > length)
472 break; /* don't parse partial options */
474 if (opcode == TCPOPT_SACK
475 && opsize >= (TCPOLEN_SACK_BASE
476 + TCPOLEN_SACK_PERBLOCK)
477 && !((opsize - TCPOLEN_SACK_BASE)
478 % TCPOLEN_SACK_PERBLOCK)) {
479 for (i = 0;
480 i < (opsize - TCPOLEN_SACK_BASE);
481 i += TCPOLEN_SACK_PERBLOCK) {
482 tmp = get_unaligned_be32((__be32 *)(ptr+i)+1);
484 if (after(tmp, *sack))
485 *sack = tmp;
487 return;
489 ptr += opsize - 2;
490 length -= opsize;
495 #ifdef CONFIG_NF_NAT_NEEDED
496 static inline s16 nat_offset(const struct nf_conn *ct,
497 enum ip_conntrack_dir dir,
498 u32 seq)
500 typeof(nf_ct_nat_offset) get_offset = rcu_dereference(nf_ct_nat_offset);
502 return get_offset != NULL ? get_offset(ct, dir, seq) : 0;
504 #define NAT_OFFSET(pf, ct, dir, seq) \
505 (pf == NFPROTO_IPV4 ? nat_offset(ct, dir, seq) : 0)
506 #else
507 #define NAT_OFFSET(pf, ct, dir, seq) 0
508 #endif
510 static bool tcp_in_window(const struct nf_conn *ct,
511 struct ip_ct_tcp *state,
512 enum ip_conntrack_dir dir,
513 unsigned int index,
514 const struct sk_buff *skb,
515 unsigned int dataoff,
516 const struct tcphdr *tcph,
517 u_int8_t pf)
519 struct net *net = nf_ct_net(ct);
520 struct ip_ct_tcp_state *sender = &state->seen[dir];
521 struct ip_ct_tcp_state *receiver = &state->seen[!dir];
522 const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
523 __u32 seq, ack, sack, end, win, swin;
524 s16 receiver_offset;
525 bool res;
528 * Get the required data from the packet.
530 seq = ntohl(tcph->seq);
531 ack = sack = ntohl(tcph->ack_seq);
532 win = ntohs(tcph->window);
533 end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);
535 if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
536 tcp_sack(skb, dataoff, tcph, &sack);
538 /* Take into account NAT sequence number mangling */
539 receiver_offset = NAT_OFFSET(pf, ct, !dir, ack - 1);
540 ack -= receiver_offset;
541 sack -= receiver_offset;
543 pr_debug("tcp_in_window: START\n");
544 pr_debug("tcp_in_window: ");
545 nf_ct_dump_tuple(tuple);
546 pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
547 seq, ack, receiver_offset, sack, receiver_offset, win, end);
548 pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
549 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
550 sender->td_end, sender->td_maxend, sender->td_maxwin,
551 sender->td_scale,
552 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
553 receiver->td_scale);
555 if (sender->td_maxwin == 0) {
557 * Initialize sender data.
559 if (tcph->syn) {
561 * SYN-ACK in reply to a SYN
562 * or SYN from reply direction in simultaneous open.
564 sender->td_end =
565 sender->td_maxend = end;
566 sender->td_maxwin = (win == 0 ? 1 : win);
568 tcp_options(skb, dataoff, tcph, sender);
570 * RFC 1323:
571 * Both sides must send the Window Scale option
572 * to enable window scaling in either direction.
574 if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
575 && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
576 sender->td_scale =
577 receiver->td_scale = 0;
578 if (!tcph->ack)
579 /* Simultaneous open */
580 return true;
581 } else {
583 * We are in the middle of a connection,
584 * its history is lost for us.
585 * Let's try to use the data from the packet.
587 sender->td_end = end;
588 win <<= sender->td_scale;
589 sender->td_maxwin = (win == 0 ? 1 : win);
590 sender->td_maxend = end + sender->td_maxwin;
592 * We haven't seen traffic in the other direction yet
593 * but we have to tweak window tracking to pass III
594 * and IV until that happens.
596 if (receiver->td_maxwin == 0)
597 receiver->td_end = receiver->td_maxend = sack;
599 } else if (((state->state == TCP_CONNTRACK_SYN_SENT
600 && dir == IP_CT_DIR_ORIGINAL)
601 || (state->state == TCP_CONNTRACK_SYN_RECV
602 && dir == IP_CT_DIR_REPLY))
603 && after(end, sender->td_end)) {
605 * RFC 793: "if a TCP is reinitialized ... then it need
606 * not wait at all; it must only be sure to use sequence
607 * numbers larger than those recently used."
609 sender->td_end =
610 sender->td_maxend = end;
611 sender->td_maxwin = (win == 0 ? 1 : win);
613 tcp_options(skb, dataoff, tcph, sender);
616 if (!(tcph->ack)) {
618 * If there is no ACK, just pretend it was set and OK.
620 ack = sack = receiver->td_end;
621 } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
622 (TCP_FLAG_ACK|TCP_FLAG_RST))
623 && (ack == 0)) {
625 * Broken TCP stacks, that set ACK in RST packets as well
626 * with zero ack value.
628 ack = sack = receiver->td_end;
631 if (seq == end
632 && (!tcph->rst
633 || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)))
635 * Packets contains no data: we assume it is valid
636 * and check the ack value only.
637 * However RST segments are always validated by their
638 * SEQ number, except when seq == 0 (reset sent answering
639 * SYN.
641 seq = end = sender->td_end;
643 pr_debug("tcp_in_window: ");
644 nf_ct_dump_tuple(tuple);
645 pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
646 seq, ack, receiver_offset, sack, receiver_offset, win, end);
647 pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
648 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
649 sender->td_end, sender->td_maxend, sender->td_maxwin,
650 sender->td_scale,
651 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
652 receiver->td_scale);
654 pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
655 before(seq, sender->td_maxend + 1),
656 after(end, sender->td_end - receiver->td_maxwin - 1),
657 before(sack, receiver->td_end + 1),
658 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1));
660 if (before(seq, sender->td_maxend + 1) &&
661 after(end, sender->td_end - receiver->td_maxwin - 1) &&
662 before(sack, receiver->td_end + 1) &&
663 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) {
665 * Take into account window scaling (RFC 1323).
667 if (!tcph->syn)
668 win <<= sender->td_scale;
671 * Update sender data.
673 swin = win + (sack - ack);
674 if (sender->td_maxwin < swin)
675 sender->td_maxwin = swin;
676 if (after(end, sender->td_end)) {
677 sender->td_end = end;
678 sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
680 if (tcph->ack) {
681 if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) {
682 sender->td_maxack = ack;
683 sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET;
684 } else if (after(ack, sender->td_maxack))
685 sender->td_maxack = ack;
689 * Update receiver data.
691 if (receiver->td_maxwin != 0 && after(end, sender->td_maxend))
692 receiver->td_maxwin += end - sender->td_maxend;
693 if (after(sack + win, receiver->td_maxend - 1)) {
694 receiver->td_maxend = sack + win;
695 if (win == 0)
696 receiver->td_maxend++;
698 if (ack == receiver->td_end)
699 receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
702 * Check retransmissions.
704 if (index == TCP_ACK_SET) {
705 if (state->last_dir == dir
706 && state->last_seq == seq
707 && state->last_ack == ack
708 && state->last_end == end
709 && state->last_win == win)
710 state->retrans++;
711 else {
712 state->last_dir = dir;
713 state->last_seq = seq;
714 state->last_ack = ack;
715 state->last_end = end;
716 state->last_win = win;
717 state->retrans = 0;
720 res = true;
721 } else {
722 res = false;
723 if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
724 nf_ct_tcp_be_liberal)
725 res = true;
726 if (!res && LOG_INVALID(net, IPPROTO_TCP))
727 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
728 "nf_ct_tcp: %s ",
729 before(seq, sender->td_maxend + 1) ?
730 after(end, sender->td_end - receiver->td_maxwin - 1) ?
731 before(sack, receiver->td_end + 1) ?
732 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG"
733 : "ACK is under the lower bound (possible overly delayed ACK)"
734 : "ACK is over the upper bound (ACKed data not seen yet)"
735 : "SEQ is under the lower bound (already ACKed data retransmitted)"
736 : "SEQ is over the upper bound (over the window of the receiver)");
739 pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u "
740 "receiver end=%u maxend=%u maxwin=%u\n",
741 res, sender->td_end, sender->td_maxend, sender->td_maxwin,
742 receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
744 return res;
747 /* table of valid flag combinations - PUSH, ECE and CWR are always valid */
748 static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
749 TCPHDR_URG) + 1] =
751 [TCPHDR_SYN] = 1,
752 [TCPHDR_SYN|TCPHDR_URG] = 1,
753 [TCPHDR_SYN|TCPHDR_ACK] = 1,
754 [TCPHDR_RST] = 1,
755 [TCPHDR_RST|TCPHDR_ACK] = 1,
756 [TCPHDR_FIN|TCPHDR_ACK] = 1,
757 [TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG] = 1,
758 [TCPHDR_ACK] = 1,
759 [TCPHDR_ACK|TCPHDR_URG] = 1,
762 /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */
763 static int BCMFASTPATH_HOST tcp_error(struct net *net, struct nf_conn *tmpl,
764 struct sk_buff *skb,
765 unsigned int dataoff,
766 enum ip_conntrack_info *ctinfo,
767 u_int8_t pf,
768 unsigned int hooknum)
770 const struct tcphdr *th;
771 struct tcphdr _tcph;
772 unsigned int tcplen = skb->len - dataoff;
773 u_int8_t tcpflags;
775 /* Smaller that minimal TCP header? */
776 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
777 if (th == NULL) {
778 if (LOG_INVALID(net, IPPROTO_TCP))
779 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
780 "nf_ct_tcp: short packet ");
781 return -NF_ACCEPT;
784 /* Not whole TCP header or malformed packet */
785 if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
786 if (LOG_INVALID(net, IPPROTO_TCP))
787 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
788 "nf_ct_tcp: truncated/malformed packet ");
789 return -NF_ACCEPT;
792 /* Checksum invalid? Ignore.
793 * We skip checking packets on the outgoing path
794 * because the checksum is assumed to be correct.
796 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
797 nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
798 if (LOG_INVALID(net, IPPROTO_TCP))
799 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
800 "nf_ct_tcp: bad TCP checksum ");
801 return -NF_ACCEPT;
804 /* Check TCP flags. */
805 tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
806 if (!tcp_valid_flags[tcpflags]) {
807 if (LOG_INVALID(net, IPPROTO_TCP))
808 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
809 "nf_ct_tcp: invalid TCP flag combination ");
810 return -NF_ACCEPT;
813 return NF_ACCEPT;
816 /* Returns verdict for packet, or -1 for invalid. */
817 static int BCMFASTPATH_HOST tcp_packet(struct nf_conn *ct,
818 const struct sk_buff *skb,
819 unsigned int dataoff,
820 enum ip_conntrack_info ctinfo,
821 u_int8_t pf,
822 unsigned int hooknum)
824 struct net *net = nf_ct_net(ct);
825 struct nf_conntrack_tuple *tuple;
826 enum tcp_conntrack new_state, old_state;
827 enum ip_conntrack_dir dir;
828 const struct tcphdr *th;
829 struct tcphdr _tcph;
830 unsigned long timeout;
831 unsigned int index;
833 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
834 BUG_ON(th == NULL);
836 spin_lock_bh(&ct->lock);
837 old_state = ct->proto.tcp.state;
838 dir = CTINFO2DIR(ctinfo);
839 index = get_conntrack_index(th);
840 new_state = tcp_conntracks[dir][index][old_state];
841 tuple = &ct->tuplehash[dir].tuple;
843 switch (new_state) {
844 case TCP_CONNTRACK_SYN_SENT:
845 if (old_state < TCP_CONNTRACK_TIME_WAIT)
846 break;
847 /* RFC 1122: "When a connection is closed actively,
848 * it MUST linger in TIME-WAIT state for a time 2xMSL
849 * (Maximum Segment Lifetime). However, it MAY accept
850 * a new SYN from the remote TCP to reopen the connection
851 * directly from TIME-WAIT state, if..."
852 * We ignore the conditions because we are in the
853 * TIME-WAIT state anyway.
855 * Handle aborted connections: we and the server
856 * think there is an existing connection but the client
857 * aborts it and starts a new one.
859 if (((ct->proto.tcp.seen[dir].flags
860 | ct->proto.tcp.seen[!dir].flags)
861 & IP_CT_TCP_FLAG_CLOSE_INIT)
862 || (ct->proto.tcp.last_dir == dir
863 && ct->proto.tcp.last_index == TCP_RST_SET)) {
864 /* Attempt to reopen a closed/aborted connection.
865 * Delete this connection and look up again. */
866 spin_unlock_bh(&ct->lock);
868 /* Only repeat if we can actually remove the timer.
869 * Destruction may already be in progress in process
870 * context and we must give it a chance to terminate.
872 if (nf_ct_kill(ct))
873 return -NF_REPEAT;
874 return NF_DROP;
876 /* Fall through */
877 case TCP_CONNTRACK_IGNORE:
878 /* Ignored packets:
880 * Our connection entry may be out of sync, so ignore
881 * packets which may signal the real connection between
882 * the client and the server.
884 * a) SYN in ORIGINAL
885 * b) SYN/ACK in REPLY
886 * c) ACK in reply direction after initial SYN in original.
888 * If the ignored packet is invalid, the receiver will send
889 * a RST we'll catch below.
891 if (index == TCP_SYNACK_SET
892 && ct->proto.tcp.last_index == TCP_SYN_SET
893 && ct->proto.tcp.last_dir != dir
894 && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
895 /* b) This SYN/ACK acknowledges a SYN that we earlier
896 * ignored as invalid. This means that the client and
897 * the server are both in sync, while the firewall is
898 * not. We get in sync from the previously annotated
899 * values.
901 old_state = TCP_CONNTRACK_SYN_SENT;
902 new_state = TCP_CONNTRACK_SYN_RECV;
903 ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_end =
904 ct->proto.tcp.last_end;
905 ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxend =
906 ct->proto.tcp.last_end;
907 ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxwin =
908 ct->proto.tcp.last_win == 0 ?
909 1 : ct->proto.tcp.last_win;
910 ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale =
911 ct->proto.tcp.last_wscale;
912 ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags =
913 ct->proto.tcp.last_flags;
914 memset(&ct->proto.tcp.seen[dir], 0,
915 sizeof(struct ip_ct_tcp_state));
916 break;
918 ct->proto.tcp.last_index = index;
919 ct->proto.tcp.last_dir = dir;
920 ct->proto.tcp.last_seq = ntohl(th->seq);
921 ct->proto.tcp.last_end =
922 segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
923 ct->proto.tcp.last_win = ntohs(th->window);
925 /* a) This is a SYN in ORIGINAL. The client and the server
926 * may be in sync but we are not. In that case, we annotate
927 * the TCP options and let the packet go through. If it is a
928 * valid SYN packet, the server will reply with a SYN/ACK, and
929 * then we'll get in sync. Otherwise, the server ignores it. */
930 if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) {
931 struct ip_ct_tcp_state seen = {};
933 ct->proto.tcp.last_flags =
934 ct->proto.tcp.last_wscale = 0;
935 tcp_options(skb, dataoff, th, &seen);
936 if (seen.flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
937 ct->proto.tcp.last_flags |=
938 IP_CT_TCP_FLAG_WINDOW_SCALE;
939 ct->proto.tcp.last_wscale = seen.td_scale;
941 if (seen.flags & IP_CT_TCP_FLAG_SACK_PERM) {
942 ct->proto.tcp.last_flags |=
943 IP_CT_TCP_FLAG_SACK_PERM;
946 spin_unlock_bh(&ct->lock);
947 if (LOG_INVALID(net, IPPROTO_TCP))
948 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
949 "nf_ct_tcp: invalid packet ignored ");
950 return NF_ACCEPT;
951 case TCP_CONNTRACK_MAX:
952 /* Invalid packet */
953 pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
954 dir, get_conntrack_index(th), old_state);
955 spin_unlock_bh(&ct->lock);
956 if (LOG_INVALID(net, IPPROTO_TCP))
957 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
958 "nf_ct_tcp: invalid state ");
959 return -NF_ACCEPT;
960 case TCP_CONNTRACK_CLOSE:
961 if (index == TCP_RST_SET
962 && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
963 && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
964 /* Invalid RST */
965 spin_unlock_bh(&ct->lock);
966 if (LOG_INVALID(net, IPPROTO_TCP))
967 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
968 "nf_ct_tcp: invalid RST ");
969 return -NF_ACCEPT;
971 if (index == TCP_RST_SET
972 && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
973 && ct->proto.tcp.last_index == TCP_SYN_SET)
974 || (!test_bit(IPS_ASSURED_BIT, &ct->status)
975 && ct->proto.tcp.last_index == TCP_ACK_SET))
976 && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
977 /* RST sent to invalid SYN or ACK we had let through
978 * at a) and c) above:
980 * a) SYN was in window then
981 * c) we hold a half-open connection.
983 * Delete our connection entry.
984 * We skip window checking, because packet might ACK
985 * segments we ignored. */
986 goto in_window;
988 /* Just fall through */
989 default:
990 /* Keep compilers happy. */
991 break;
994 #ifdef HNDCTF
995 /* Remove the ipc entries on receipt of FIN or RST */
996 if (CTF_ENAB(kcih)) {
997 if (ct->ctf_flags & CTF_FLAGS_CACHED) {
998 if (th->fin || th->rst) {
999 ip_conntrack_ipct_delete(ct, 0);
1001 goto in_window;
1004 #endif /* HNDCTF */
1006 if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
1007 skb, dataoff, th, pf)) {
1008 spin_unlock_bh(&ct->lock);
1009 return -NF_ACCEPT;
1011 in_window:
1012 /* From now on we have got in-window packets */
1013 ct->proto.tcp.last_index = index;
1014 ct->proto.tcp.last_dir = dir;
1016 pr_debug("tcp_conntracks: ");
1017 nf_ct_dump_tuple(tuple);
1018 pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
1019 (th->syn ? 1 : 0), (th->ack ? 1 : 0),
1020 (th->fin ? 1 : 0), (th->rst ? 1 : 0),
1021 old_state, new_state);
1023 ct->proto.tcp.state = new_state;
1024 if (old_state != new_state
1025 && new_state == TCP_CONNTRACK_FIN_WAIT)
1026 ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
1028 if (ct->proto.tcp.retrans >= nf_ct_tcp_max_retrans &&
1029 tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans)
1030 timeout = nf_ct_tcp_timeout_max_retrans;
1031 else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) &
1032 IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
1033 tcp_timeouts[new_state] > nf_ct_tcp_timeout_unacknowledged)
1034 timeout = nf_ct_tcp_timeout_unacknowledged;
1035 else
1036 timeout = tcp_timeouts[new_state];
1037 spin_unlock_bh(&ct->lock);
1039 if (new_state != old_state)
1040 nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
1042 if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
1043 /* If only reply is a RST, we can consider ourselves not to
1044 have an established connection: this is a fairly common
1045 problem case, so we can delete the conntrack
1046 immediately. --RR */
1047 if (th->rst) {
1048 nf_ct_kill_acct(ct, ctinfo, skb);
1049 return NF_ACCEPT;
1051 } else if (!test_bit(IPS_ASSURED_BIT, &ct->status)
1052 && (old_state == TCP_CONNTRACK_SYN_RECV
1053 || old_state == TCP_CONNTRACK_ESTABLISHED)
1054 && new_state == TCP_CONNTRACK_ESTABLISHED) {
1055 /* Set ASSURED if we see see valid ack in ESTABLISHED
1056 after SYN_RECV or a valid answer for a picked up
1057 connection. */
1058 set_bit(IPS_ASSURED_BIT, &ct->status);
1059 nf_conntrack_event_cache(IPCT_ASSURED, ct);
1061 nf_ct_refresh_acct(ct, ctinfo, skb, timeout);
1063 return NF_ACCEPT;
1066 /* Called when a new connection for this protocol found. */
1067 static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
1068 unsigned int dataoff)
1070 enum tcp_conntrack new_state;
1071 const struct tcphdr *th;
1072 struct tcphdr _tcph;
1073 const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
1074 const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
1076 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
1077 BUG_ON(th == NULL);
1079 /* Don't need lock here: this conntrack not in circulation yet */
1080 new_state
1081 = tcp_conntracks[0][get_conntrack_index(th)]
1082 [TCP_CONNTRACK_NONE];
1084 /* Invalid: delete conntrack */
1085 if (new_state >= TCP_CONNTRACK_MAX) {
1086 pr_debug("nf_ct_tcp: invalid new deleting.\n");
1087 return false;
1090 if (new_state == TCP_CONNTRACK_SYN_SENT) {
1091 /* SYN packet */
1092 ct->proto.tcp.seen[0].td_end =
1093 segment_seq_plus_len(ntohl(th->seq), skb->len,
1094 dataoff, th);
1095 ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1096 if (ct->proto.tcp.seen[0].td_maxwin == 0)
1097 ct->proto.tcp.seen[0].td_maxwin = 1;
1098 ct->proto.tcp.seen[0].td_maxend =
1099 ct->proto.tcp.seen[0].td_end;
1101 tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
1102 ct->proto.tcp.seen[1].flags = 0;
1103 } else if (nf_ct_tcp_loose == 0) {
1104 /* Don't try to pick up connections. */
1105 return false;
1106 } else {
1108 * We are in the middle of a connection,
1109 * its history is lost for us.
1110 * Let's try to use the data from the packet.
1112 ct->proto.tcp.seen[0].td_end =
1113 segment_seq_plus_len(ntohl(th->seq), skb->len,
1114 dataoff, th);
1115 ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1116 if (ct->proto.tcp.seen[0].td_maxwin == 0)
1117 ct->proto.tcp.seen[0].td_maxwin = 1;
1118 ct->proto.tcp.seen[0].td_maxend =
1119 ct->proto.tcp.seen[0].td_end +
1120 ct->proto.tcp.seen[0].td_maxwin;
1121 ct->proto.tcp.seen[0].td_scale = 0;
1123 /* We assume SACK and liberal window checking to handle
1124 * window scaling */
1125 ct->proto.tcp.seen[0].flags =
1126 ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
1127 IP_CT_TCP_FLAG_BE_LIBERAL;
1130 ct->proto.tcp.seen[1].td_end = 0;
1131 ct->proto.tcp.seen[1].td_maxend = 0;
1132 ct->proto.tcp.seen[1].td_maxwin = 0;
1133 ct->proto.tcp.seen[1].td_scale = 0;
1135 /* tcp_packet will set them */
1136 ct->proto.tcp.state = TCP_CONNTRACK_NONE;
1137 ct->proto.tcp.last_index = TCP_NONE_SET;
1139 pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
1140 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
1141 sender->td_end, sender->td_maxend, sender->td_maxwin,
1142 sender->td_scale,
1143 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
1144 receiver->td_scale);
1145 return true;
1148 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
1150 #include <linux/netfilter/nfnetlink.h>
1151 #include <linux/netfilter/nfnetlink_conntrack.h>
1153 static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
1154 struct nf_conn *ct)
1156 struct nlattr *nest_parms;
1157 struct nf_ct_tcp_flags tmp = {};
1159 spin_lock_bh(&ct->lock);
1160 nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED);
1161 if (!nest_parms)
1162 goto nla_put_failure;
1164 NLA_PUT_U8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state);
1166 NLA_PUT_U8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
1167 ct->proto.tcp.seen[0].td_scale);
1169 NLA_PUT_U8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY,
1170 ct->proto.tcp.seen[1].td_scale);
1172 tmp.flags = ct->proto.tcp.seen[0].flags;
1173 NLA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
1174 sizeof(struct nf_ct_tcp_flags), &tmp);
1176 tmp.flags = ct->proto.tcp.seen[1].flags;
1177 NLA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
1178 sizeof(struct nf_ct_tcp_flags), &tmp);
1179 spin_unlock_bh(&ct->lock);
1181 nla_nest_end(skb, nest_parms);
1183 return 0;
1185 nla_put_failure:
1186 spin_unlock_bh(&ct->lock);
1187 return -1;
1190 static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
1191 [CTA_PROTOINFO_TCP_STATE] = { .type = NLA_U8 },
1192 [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 },
1193 [CTA_PROTOINFO_TCP_WSCALE_REPLY] = { .type = NLA_U8 },
1194 [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL] = { .len = sizeof(struct nf_ct_tcp_flags) },
1195 [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .len = sizeof(struct nf_ct_tcp_flags) },
1198 static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
1200 struct nlattr *pattr = cda[CTA_PROTOINFO_TCP];
1201 struct nlattr *tb[CTA_PROTOINFO_TCP_MAX+1];
1202 int err;
1204 /* updates could not contain anything about the private
1205 * protocol info, in that case skip the parsing */
1206 if (!pattr)
1207 return 0;
1209 err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr, tcp_nla_policy);
1210 if (err < 0)
1211 return err;
1213 if (tb[CTA_PROTOINFO_TCP_STATE] &&
1214 nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
1215 return -EINVAL;
1217 spin_lock_bh(&ct->lock);
1218 if (tb[CTA_PROTOINFO_TCP_STATE])
1219 ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
1221 if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) {
1222 struct nf_ct_tcp_flags *attr =
1223 nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]);
1224 ct->proto.tcp.seen[0].flags &= ~attr->mask;
1225 ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask;
1228 if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]) {
1229 struct nf_ct_tcp_flags *attr =
1230 nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]);
1231 ct->proto.tcp.seen[1].flags &= ~attr->mask;
1232 ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask;
1235 if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] &&
1236 tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] &&
1237 ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
1238 ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
1239 ct->proto.tcp.seen[0].td_scale =
1240 nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]);
1241 ct->proto.tcp.seen[1].td_scale =
1242 nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
1244 spin_unlock_bh(&ct->lock);
1246 return 0;
1249 static int tcp_nlattr_size(void)
1251 return nla_total_size(0) /* CTA_PROTOINFO_TCP */
1252 + nla_policy_len(tcp_nla_policy, CTA_PROTOINFO_TCP_MAX + 1);
1255 static int tcp_nlattr_tuple_size(void)
1257 return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
1259 #endif
1261 #ifdef CONFIG_SYSCTL
1262 static unsigned int tcp_sysctl_table_users;
1263 static struct ctl_table_header *tcp_sysctl_header;
1264 static struct ctl_table tcp_sysctl_table[] = {
1266 .procname = "nf_conntrack_tcp_timeout_syn_sent",
1267 .data = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT],
1268 .maxlen = sizeof(unsigned int),
1269 .mode = 0644,
1270 .proc_handler = proc_dointvec_jiffies,
1273 .procname = "nf_conntrack_tcp_timeout_syn_recv",
1274 .data = &tcp_timeouts[TCP_CONNTRACK_SYN_RECV],
1275 .maxlen = sizeof(unsigned int),
1276 .mode = 0644,
1277 .proc_handler = proc_dointvec_jiffies,
1280 .procname = "nf_conntrack_tcp_timeout_established",
1281 .data = &tcp_timeouts[TCP_CONNTRACK_ESTABLISHED],
1282 .maxlen = sizeof(unsigned int),
1283 .mode = 0644,
1284 .proc_handler = proc_dointvec_jiffies,
1287 .procname = "nf_conntrack_tcp_timeout_fin_wait",
1288 .data = &tcp_timeouts[TCP_CONNTRACK_FIN_WAIT],
1289 .maxlen = sizeof(unsigned int),
1290 .mode = 0644,
1291 .proc_handler = proc_dointvec_jiffies,
1294 .procname = "nf_conntrack_tcp_timeout_close_wait",
1295 .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE_WAIT],
1296 .maxlen = sizeof(unsigned int),
1297 .mode = 0644,
1298 .proc_handler = proc_dointvec_jiffies,
1301 .procname = "nf_conntrack_tcp_timeout_last_ack",
1302 .data = &tcp_timeouts[TCP_CONNTRACK_LAST_ACK],
1303 .maxlen = sizeof(unsigned int),
1304 .mode = 0644,
1305 .proc_handler = proc_dointvec_jiffies,
1308 .procname = "nf_conntrack_tcp_timeout_time_wait",
1309 .data = &tcp_timeouts[TCP_CONNTRACK_TIME_WAIT],
1310 .maxlen = sizeof(unsigned int),
1311 .mode = 0644,
1312 .proc_handler = proc_dointvec_jiffies,
1315 .procname = "nf_conntrack_tcp_timeout_close",
1316 .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE],
1317 .maxlen = sizeof(unsigned int),
1318 .mode = 0644,
1319 .proc_handler = proc_dointvec_jiffies,
1322 .procname = "nf_conntrack_tcp_timeout_max_retrans",
1323 .data = &nf_ct_tcp_timeout_max_retrans,
1324 .maxlen = sizeof(unsigned int),
1325 .mode = 0644,
1326 .proc_handler = proc_dointvec_jiffies,
1329 .procname = "nf_conntrack_tcp_timeout_unacknowledged",
1330 .data = &nf_ct_tcp_timeout_unacknowledged,
1331 .maxlen = sizeof(unsigned int),
1332 .mode = 0644,
1333 .proc_handler = proc_dointvec_jiffies,
1336 .procname = "nf_conntrack_tcp_loose",
1337 .data = &nf_ct_tcp_loose,
1338 .maxlen = sizeof(unsigned int),
1339 .mode = 0644,
1340 .proc_handler = proc_dointvec,
1343 .procname = "nf_conntrack_tcp_be_liberal",
1344 .data = &nf_ct_tcp_be_liberal,
1345 .maxlen = sizeof(unsigned int),
1346 .mode = 0644,
1347 .proc_handler = proc_dointvec,
1350 .procname = "nf_conntrack_tcp_max_retrans",
1351 .data = &nf_ct_tcp_max_retrans,
1352 .maxlen = sizeof(unsigned int),
1353 .mode = 0644,
1354 .proc_handler = proc_dointvec,
1359 #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
1360 static struct ctl_table tcp_compat_sysctl_table[] = {
1362 .procname = "ip_conntrack_tcp_timeout_syn_sent",
1363 .data = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT],
1364 .maxlen = sizeof(unsigned int),
1365 .mode = 0644,
1366 .proc_handler = proc_dointvec_jiffies,
1369 .procname = "ip_conntrack_tcp_timeout_syn_sent2",
1370 .data = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT2],
1371 .maxlen = sizeof(unsigned int),
1372 .mode = 0644,
1373 .proc_handler = proc_dointvec_jiffies,
1376 .procname = "ip_conntrack_tcp_timeout_syn_recv",
1377 .data = &tcp_timeouts[TCP_CONNTRACK_SYN_RECV],
1378 .maxlen = sizeof(unsigned int),
1379 .mode = 0644,
1380 .proc_handler = proc_dointvec_jiffies,
1383 .procname = "ip_conntrack_tcp_timeout_established",
1384 .data = &tcp_timeouts[TCP_CONNTRACK_ESTABLISHED],
1385 .maxlen = sizeof(unsigned int),
1386 .mode = 0644,
1387 .proc_handler = proc_dointvec_jiffies,
1390 .procname = "ip_conntrack_tcp_timeout_fin_wait",
1391 .data = &tcp_timeouts[TCP_CONNTRACK_FIN_WAIT],
1392 .maxlen = sizeof(unsigned int),
1393 .mode = 0644,
1394 .proc_handler = proc_dointvec_jiffies,
1397 .procname = "ip_conntrack_tcp_timeout_close_wait",
1398 .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE_WAIT],
1399 .maxlen = sizeof(unsigned int),
1400 .mode = 0644,
1401 .proc_handler = proc_dointvec_jiffies,
1404 .procname = "ip_conntrack_tcp_timeout_last_ack",
1405 .data = &tcp_timeouts[TCP_CONNTRACK_LAST_ACK],
1406 .maxlen = sizeof(unsigned int),
1407 .mode = 0644,
1408 .proc_handler = proc_dointvec_jiffies,
1411 .procname = "ip_conntrack_tcp_timeout_time_wait",
1412 .data = &tcp_timeouts[TCP_CONNTRACK_TIME_WAIT],
1413 .maxlen = sizeof(unsigned int),
1414 .mode = 0644,
1415 .proc_handler = proc_dointvec_jiffies,
1418 .procname = "ip_conntrack_tcp_timeout_close",
1419 .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE],
1420 .maxlen = sizeof(unsigned int),
1421 .mode = 0644,
1422 .proc_handler = proc_dointvec_jiffies,
1425 .procname = "ip_conntrack_tcp_timeout_max_retrans",
1426 .data = &nf_ct_tcp_timeout_max_retrans,
1427 .maxlen = sizeof(unsigned int),
1428 .mode = 0644,
1429 .proc_handler = proc_dointvec_jiffies,
1432 .procname = "ip_conntrack_tcp_loose",
1433 .data = &nf_ct_tcp_loose,
1434 .maxlen = sizeof(unsigned int),
1435 .mode = 0644,
1436 .proc_handler = proc_dointvec,
1439 .procname = "ip_conntrack_tcp_be_liberal",
1440 .data = &nf_ct_tcp_be_liberal,
1441 .maxlen = sizeof(unsigned int),
1442 .mode = 0644,
1443 .proc_handler = proc_dointvec,
1446 .procname = "ip_conntrack_tcp_max_retrans",
1447 .data = &nf_ct_tcp_max_retrans,
1448 .maxlen = sizeof(unsigned int),
1449 .mode = 0644,
1450 .proc_handler = proc_dointvec,
1454 #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
1455 #endif /* CONFIG_SYSCTL */
1457 struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
1459 .l3proto = PF_INET,
1460 .l4proto = IPPROTO_TCP,
1461 .name = "tcp",
1462 .pkt_to_tuple = tcp_pkt_to_tuple,
1463 .invert_tuple = tcp_invert_tuple,
1464 .print_tuple = tcp_print_tuple,
1465 .print_conntrack = tcp_print_conntrack,
1466 .packet = tcp_packet,
1467 .new = tcp_new,
1468 .error = tcp_error,
1469 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
1470 .to_nlattr = tcp_to_nlattr,
1471 .nlattr_size = tcp_nlattr_size,
1472 .from_nlattr = nlattr_to_tcp,
1473 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
1474 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
1475 .nlattr_tuple_size = tcp_nlattr_tuple_size,
1476 .nla_policy = nf_ct_port_nla_policy,
1477 #endif
1478 #ifdef CONFIG_SYSCTL
1479 .ctl_table_users = &tcp_sysctl_table_users,
1480 .ctl_table_header = &tcp_sysctl_header,
1481 .ctl_table = tcp_sysctl_table,
1482 #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
1483 .ctl_compat_table = tcp_compat_sysctl_table,
1484 #endif
1485 #endif
1487 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4);
1489 struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
1491 .l3proto = PF_INET6,
1492 .l4proto = IPPROTO_TCP,
1493 .name = "tcp",
1494 .pkt_to_tuple = tcp_pkt_to_tuple,
1495 .invert_tuple = tcp_invert_tuple,
1496 .print_tuple = tcp_print_tuple,
1497 .print_conntrack = tcp_print_conntrack,
1498 .packet = tcp_packet,
1499 .new = tcp_new,
1500 .error = tcp_error,
1501 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
1502 .to_nlattr = tcp_to_nlattr,
1503 .nlattr_size = tcp_nlattr_size,
1504 .from_nlattr = nlattr_to_tcp,
1505 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
1506 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
1507 .nlattr_tuple_size = tcp_nlattr_tuple_size,
1508 .nla_policy = nf_ct_port_nla_policy,
1509 #endif
1510 #ifdef CONFIG_SYSCTL
1511 .ctl_table_users = &tcp_sysctl_table_users,
1512 .ctl_table_header = &tcp_sysctl_header,
1513 .ctl_table = tcp_sysctl_table,
1514 #endif
1516 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6);