1 /* Connection state tracking for netfilter. This is separated from,
2 but required by, the NAT layer; it can also be used by an iptables
5 /* (C) 1999-2001 Paul `Rusty' Russell
6 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
7 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
14 #include <linux/types.h>
15 #include <linux/netfilter.h>
16 #include <linux/module.h>
17 #include <linux/skbuff.h>
18 #include <linux/proc_fs.h>
19 #include <linux/vmalloc.h>
20 #include <linux/stddef.h>
21 #include <linux/slab.h>
22 #include <linux/random.h>
23 #include <linux/jhash.h>
24 #include <linux/err.h>
25 #include <linux/percpu.h>
26 #include <linux/moduleparam.h>
27 #include <linux/notifier.h>
28 #include <linux/kernel.h>
29 #include <linux/netdevice.h>
30 #include <linux/socket.h>
32 //#ifdef CONFIG_BCM_NAT
36 #include <net/netfilter/nf_conntrack.h>
37 #include <net/netfilter/nf_conntrack_l3proto.h>
38 #include <net/netfilter/nf_conntrack_l4proto.h>
39 #include <net/netfilter/nf_conntrack_expect.h>
40 #include <net/netfilter/nf_conntrack_helper.h>
41 #include <net/netfilter/nf_conntrack_core.h>
43 #define NF_CONNTRACK_VERSION "0.5.0"
48 #define DEBUGP(format, args...)
51 DEFINE_RWLOCK(nf_conntrack_lock
);
52 EXPORT_SYMBOL_GPL(nf_conntrack_lock
);
54 /* nf_conntrack_standalone needs this */
55 atomic_t nf_conntrack_count
= ATOMIC_INIT(0);
56 EXPORT_SYMBOL_GPL(nf_conntrack_count
);
58 void (*nf_conntrack_destroyed
)(struct nf_conn
*conntrack
);
59 EXPORT_SYMBOL_GPL(nf_conntrack_destroyed
);
61 unsigned int nf_conntrack_htable_size __read_mostly
;
62 EXPORT_SYMBOL_GPL(nf_conntrack_htable_size
);
64 int nf_conntrack_max __read_mostly
;
65 EXPORT_SYMBOL_GPL(nf_conntrack_max
);
67 struct list_head
*nf_conntrack_hash __read_mostly
;
68 EXPORT_SYMBOL_GPL(nf_conntrack_hash
);
70 struct nf_conn nf_conntrack_untracked __read_mostly
;
71 EXPORT_SYMBOL_GPL(nf_conntrack_untracked
);
73 unsigned int nf_ct_log_invalid __read_mostly
;
74 LIST_HEAD(unconfirmed
);
75 static int nf_conntrack_vmalloc __read_mostly
;
77 static unsigned int nf_conntrack_next_id
;
79 DEFINE_PER_CPU(struct ip_conntrack_stat
, nf_conntrack_stat
);
80 EXPORT_PER_CPU_SYMBOL(nf_conntrack_stat
);
83 extern int ip_conntrack_ipct_delete(struct nf_conn
*ct
, int ct_timeout
);
86 #if defined(CONFIG_BCM_NAT) || defined(CONFIG_BCM_NAT_MODULE)
87 extern int ipv4_conntrack_fastnat
;
89 typedef int (*bcmNatBindHook
)(struct nf_conn
*ct
,
90 enum ip_conntrack_info ctinfo
,
92 struct nf_conntrack_l3proto
*l3proto
,
93 struct nf_conntrack_l4proto
*l4proto
);
94 typedef int (*bcmNatHitHook
)(struct sk_buff
*skb
);
96 bcmNatBindHook bcm_nat_bind_hook
= NULL
;
97 bcmNatHitHook bcm_nat_hit_hook
= NULL
;
98 #ifdef CONFIG_BCM_NAT_MODULE
99 EXPORT_SYMBOL(bcm_nat_hit_hook
);
100 EXPORT_SYMBOL(bcm_nat_bind_hook
);
105 * This scheme offers various size of "struct nf_conn" dependent on
106 * features(helper, nat, ...)
109 #define NF_CT_FEATURES_NAMELEN 256
111 /* name of slab cache. printed in /proc/slabinfo */
114 /* size of slab cache */
117 /* slab cache pointer */
118 struct kmem_cache
*cachep
;
120 /* allocated slab cache + modules which uses this slab cache */
123 } nf_ct_cache
[NF_CT_F_NUM
];
125 /* protect members of nf_ct_cache except of "use" */
126 DEFINE_RWLOCK(nf_ct_cache_lock
);
128 /* This avoids calling kmem_cache_create() with same name simultaneously */
129 static DEFINE_MUTEX(nf_ct_cache_mutex
);
131 static unsigned int nf_conntrack_hash_rnd __read_mostly
;
133 static u_int32_t
__hash_conntrack(const struct nf_conntrack_tuple
*tuple
,
134 unsigned int size
, unsigned int rnd
)
139 /* The direction must be ignored, so we hash everything up to the
140 * destination ports (which is a multiple of 4) and treat the last
141 * three bytes manually.
143 n
= (sizeof(tuple
->src
) + sizeof(tuple
->dst
.u3
)) / sizeof(u32
);
144 h
= jhash2((u32
*)tuple
, n
,
145 rnd
^ (((__force __u16
)tuple
->dst
.u
.all
<< 16) |
146 tuple
->dst
.protonum
));
148 return ((u64
)h
* size
) >> 32;
151 static inline u_int32_t
hash_conntrack(const struct nf_conntrack_tuple
*tuple
)
153 return __hash_conntrack(tuple
, nf_conntrack_htable_size
,
154 nf_conntrack_hash_rnd
);
157 int nf_conntrack_register_cache(u_int32_t features
, const char *name
,
162 struct kmem_cache
*cachep
;
164 DEBUGP("nf_conntrack_register_cache: features=0x%x, name=%s, size=%d\n",
165 features
, name
, size
);
167 if (features
< NF_CT_F_BASIC
|| features
>= NF_CT_F_NUM
) {
168 DEBUGP("nf_conntrack_register_cache: invalid features.: 0x%x\n",
173 mutex_lock(&nf_ct_cache_mutex
);
175 write_lock_bh(&nf_ct_cache_lock
);
176 /* e.g: multiple helpers are loaded */
177 if (nf_ct_cache
[features
].use
> 0) {
178 DEBUGP("nf_conntrack_register_cache: already resisterd.\n");
179 if ((!strncmp(nf_ct_cache
[features
].name
, name
,
180 NF_CT_FEATURES_NAMELEN
))
181 && nf_ct_cache
[features
].size
== size
) {
182 DEBUGP("nf_conntrack_register_cache: reusing.\n");
183 nf_ct_cache
[features
].use
++;
188 write_unlock_bh(&nf_ct_cache_lock
);
189 mutex_unlock(&nf_ct_cache_mutex
);
192 write_unlock_bh(&nf_ct_cache_lock
);
195 * The memory space for name of slab cache must be alive until
196 * cache is destroyed.
198 cache_name
= kmalloc(sizeof(char)*NF_CT_FEATURES_NAMELEN
, GFP_ATOMIC
);
199 if (cache_name
== NULL
) {
200 DEBUGP("nf_conntrack_register_cache: can't alloc cache_name\n");
205 if (strlcpy(cache_name
, name
, NF_CT_FEATURES_NAMELEN
)
206 >= NF_CT_FEATURES_NAMELEN
) {
207 printk("nf_conntrack_register_cache: name too long\n");
212 cachep
= kmem_cache_create(cache_name
, size
, 0, 0,
215 printk("nf_conntrack_register_cache: Can't create slab cache "
216 "for the features = 0x%x\n", features
);
221 write_lock_bh(&nf_ct_cache_lock
);
222 nf_ct_cache
[features
].use
= 1;
223 nf_ct_cache
[features
].size
= size
;
224 nf_ct_cache
[features
].cachep
= cachep
;
225 nf_ct_cache
[features
].name
= cache_name
;
226 write_unlock_bh(&nf_ct_cache_lock
);
233 mutex_unlock(&nf_ct_cache_mutex
);
236 EXPORT_SYMBOL_GPL(nf_conntrack_register_cache
);
238 /* FIXME: In the current, only nf_conntrack_cleanup() can call this function. */
239 void nf_conntrack_unregister_cache(u_int32_t features
)
241 struct kmem_cache
*cachep
;
245 * This assures that kmem_cache_create() isn't called before destroying
248 DEBUGP("nf_conntrack_unregister_cache: 0x%04x\n", features
);
249 mutex_lock(&nf_ct_cache_mutex
);
251 write_lock_bh(&nf_ct_cache_lock
);
252 if (--nf_ct_cache
[features
].use
> 0) {
253 write_unlock_bh(&nf_ct_cache_lock
);
254 mutex_unlock(&nf_ct_cache_mutex
);
257 cachep
= nf_ct_cache
[features
].cachep
;
258 name
= nf_ct_cache
[features
].name
;
259 nf_ct_cache
[features
].cachep
= NULL
;
260 nf_ct_cache
[features
].name
= NULL
;
261 nf_ct_cache
[features
].size
= 0;
262 write_unlock_bh(&nf_ct_cache_lock
);
266 kmem_cache_destroy(cachep
);
269 mutex_unlock(&nf_ct_cache_mutex
);
271 EXPORT_SYMBOL_GPL(nf_conntrack_unregister_cache
);
274 nf_ct_get_tuple(const struct sk_buff
*skb
,
276 unsigned int dataoff
,
279 struct nf_conntrack_tuple
*tuple
,
280 const struct nf_conntrack_l3proto
*l3proto
,
281 const struct nf_conntrack_l4proto
*l4proto
)
283 memset(tuple
, 0, sizeof(*tuple
));
285 tuple
->src
.l3num
= l3num
;
286 if (l3proto
->pkt_to_tuple(skb
, nhoff
, tuple
) == 0)
288 tuple
->dst
.protonum
= protonum
;
289 tuple
->dst
.dir
= IP_CT_DIR_ORIGINAL
;
291 return l4proto
->pkt_to_tuple(skb
, dataoff
, tuple
);
293 EXPORT_SYMBOL_GPL(nf_ct_get_tuple
);
296 nf_ct_invert_tuple(struct nf_conntrack_tuple
*inverse
,
297 const struct nf_conntrack_tuple
*orig
,
298 const struct nf_conntrack_l3proto
*l3proto
,
299 const struct nf_conntrack_l4proto
*l4proto
)
301 memset(inverse
, 0, sizeof(*inverse
));
303 inverse
->src
.l3num
= orig
->src
.l3num
;
304 if (l3proto
->invert_tuple(inverse
, orig
) == 0)
307 inverse
->dst
.dir
= !orig
->dst
.dir
;
309 inverse
->dst
.protonum
= orig
->dst
.protonum
;
310 return l4proto
->invert_tuple(inverse
, orig
);
312 EXPORT_SYMBOL_GPL(nf_ct_invert_tuple
);
314 #if defined(CONFIG_BCM_NAT) || defined(CONFIG_BCM_NAT_MODULE)
315 #ifndef CONFIG_BCM_NAT_MODULE
318 int bcm_nf_ct_invert_tuple(struct nf_conntrack_tuple
*inverse
,
319 const struct nf_conntrack_tuple
*orig
,
320 const struct nf_conntrack_l3proto
*l3proto
,
321 const struct nf_conntrack_l4proto
*l4proto
)
323 return nf_ct_invert_tuple(inverse
, orig
, l3proto
,l4proto
);
325 #ifdef CONFIG_BCM_NAT_MODULE
326 EXPORT_SYMBOL(bcm_nf_ct_invert_tuple
);
331 clean_from_lists(struct nf_conn
*ct
)
333 DEBUGP("clean_from_lists(%p)\n", ct
);
334 list_del(&ct
->tuplehash
[IP_CT_DIR_ORIGINAL
].list
);
335 list_del(&ct
->tuplehash
[IP_CT_DIR_REPLY
].list
);
337 /* Destroy all pending expectations */
338 nf_ct_remove_expectations(ct
);
342 destroy_conntrack(struct nf_conntrack
*nfct
)
344 struct nf_conn
*ct
= (struct nf_conn
*)nfct
;
345 struct nf_conntrack_l4proto
*l4proto
;
346 typeof(nf_conntrack_destroyed
) destroyed
;
348 DEBUGP("destroy_conntrack(%p)\n", ct
);
349 NF_CT_ASSERT(atomic_read(&nfct
->use
) == 0);
350 NF_CT_ASSERT(!timer_pending(&ct
->timeout
));
353 ip_conntrack_ipct_delete(ct
, 0);
356 nf_conntrack_event(IPCT_DESTROY
, ct
);
357 set_bit(IPS_DYING_BIT
, &ct
->status
);
359 /* To make sure we don't get any weird locking issues here:
360 * destroy_conntrack() MUST NOT be called with a write lock
361 * to nf_conntrack_lock!!! -HW */
363 l4proto
= __nf_ct_l4proto_find(ct
->tuplehash
[IP_CT_DIR_REPLY
].tuple
.src
.l3num
,
364 ct
->tuplehash
[IP_CT_DIR_REPLY
].tuple
.dst
.protonum
);
365 if (l4proto
&& l4proto
->destroy
)
366 l4proto
->destroy(ct
);
368 destroyed
= rcu_dereference(nf_conntrack_destroyed
);
374 write_lock_bh(&nf_conntrack_lock
);
375 /* Expectations will have been removed in clean_from_lists,
376 * except TFTP can create an expectation on the first packet,
377 * before connection is in the list, so we need to clean here,
379 nf_ct_remove_expectations(ct
);
381 #if defined(CONFIG_NETFILTER_XT_MATCH_LAYER7) || defined(CONFIG_NETFILTER_XT_MATCH_LAYER7_MODULE)
382 if(ct
->layer7
.app_proto
)
383 kfree(ct
->layer7
.app_proto
);
384 if(ct
->layer7
.app_data
)
385 kfree(ct
->layer7
.app_data
);
389 /* We overload first tuple to link into unconfirmed list. */
390 if (!nf_ct_is_confirmed(ct
)) {
391 BUG_ON(list_empty(&ct
->tuplehash
[IP_CT_DIR_ORIGINAL
].list
));
392 list_del(&ct
->tuplehash
[IP_CT_DIR_ORIGINAL
].list
);
395 NF_CT_STAT_INC(delete);
396 write_unlock_bh(&nf_conntrack_lock
);
399 nf_ct_put(ct
->master
);
401 DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct
);
402 nf_conntrack_free(ct
);
405 static void death_by_timeout(unsigned long ul_conntrack
)
407 struct nf_conn
*ct
= (void *)ul_conntrack
;
408 struct nf_conn_help
*help
= nfct_help(ct
);
409 struct nf_conntrack_helper
*helper
;
412 /* If negative error is returned it means the entry hasn't
415 if (ip_conntrack_ipct_delete(ct
, jiffies
>= ct
->timeout
.expires
? 1 : 0) != 0)
421 helper
= rcu_dereference(help
->helper
);
422 if (helper
&& helper
->destroy
)
427 write_lock_bh(&nf_conntrack_lock
);
428 /* Inside lock so preempt is disabled on module removal path.
429 * Otherwise we can get spurious warnings. */
430 NF_CT_STAT_INC(delete_list
);
431 clean_from_lists(ct
);
432 write_unlock_bh(&nf_conntrack_lock
);
436 struct nf_conntrack_tuple_hash
*
437 __nf_conntrack_find(const struct nf_conntrack_tuple
*tuple
)
439 struct nf_conntrack_tuple_hash
*h
;
440 unsigned int hash
= hash_conntrack(tuple
);
442 list_for_each_entry(h
, &nf_conntrack_hash
[hash
], list
) {
443 if (nf_ct_tuple_equal(tuple
, &h
->tuple
)) {
444 NF_CT_STAT_INC(found
);
447 NF_CT_STAT_INC(searched
);
452 EXPORT_SYMBOL_GPL(__nf_conntrack_find
);
454 /* Find a connection corresponding to a tuple. */
455 struct nf_conntrack_tuple_hash
*
456 nf_conntrack_find_get(const struct nf_conntrack_tuple
*tuple
,
457 const struct nf_conn
*ignored_conntrack
)
459 struct nf_conntrack_tuple_hash
*h
;
462 read_lock_bh(&nf_conntrack_lock
);
463 h
= __nf_conntrack_find(tuple
);
465 ct
= nf_ct_tuplehash_to_ctrack(h
);
466 if (unlikely(nf_ct_is_dying(ct
) ||
467 !atomic_inc_not_zero(&ct
->ct_general
.use
)))
470 read_unlock_bh(&nf_conntrack_lock
);
474 EXPORT_SYMBOL_GPL(nf_conntrack_find_get
);
476 static void __nf_conntrack_hash_insert(struct nf_conn
*ct
,
478 unsigned int repl_hash
)
480 ct
->id
= ++nf_conntrack_next_id
;
481 list_add(&ct
->tuplehash
[IP_CT_DIR_ORIGINAL
].list
,
482 &nf_conntrack_hash
[hash
]);
483 list_add(&ct
->tuplehash
[IP_CT_DIR_REPLY
].list
,
484 &nf_conntrack_hash
[repl_hash
]);
487 void nf_conntrack_hash_insert(struct nf_conn
*ct
)
489 unsigned int hash
, repl_hash
;
491 hash
= hash_conntrack(&ct
->tuplehash
[IP_CT_DIR_ORIGINAL
].tuple
);
492 repl_hash
= hash_conntrack(&ct
->tuplehash
[IP_CT_DIR_REPLY
].tuple
);
494 write_lock_bh(&nf_conntrack_lock
);
495 __nf_conntrack_hash_insert(ct
, hash
, repl_hash
);
496 write_unlock_bh(&nf_conntrack_lock
);
498 EXPORT_SYMBOL_GPL(nf_conntrack_hash_insert
);
500 /* Confirm a connection given skb; places it in hash table */
502 __nf_conntrack_confirm(struct sk_buff
*skb
)
504 unsigned int hash
, repl_hash
;
505 struct nf_conntrack_tuple_hash
*h
;
507 struct nf_conn_help
*help
;
508 enum ip_conntrack_info ctinfo
;
510 ct
= nf_ct_get(skb
, &ctinfo
);
512 /* ipt_REJECT uses nf_conntrack_attach to attach related
513 ICMP/TCP RST packets in other direction. Actual packet
514 which created connection will be IP_CT_NEW or for an
515 expected connection, IP_CT_RELATED. */
516 if (CTINFO2DIR(ctinfo
) != IP_CT_DIR_ORIGINAL
)
519 hash
= hash_conntrack(&ct
->tuplehash
[IP_CT_DIR_ORIGINAL
].tuple
);
520 repl_hash
= hash_conntrack(&ct
->tuplehash
[IP_CT_DIR_REPLY
].tuple
);
522 /* We're not in hash table, and we refuse to set up related
523 connections for unconfirmed conns. But packet copies and
524 REJECT will give spurious warnings here. */
525 /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
527 /* No external references means noone else could have
529 NF_CT_ASSERT(!nf_ct_is_confirmed(ct
));
530 DEBUGP("Confirming conntrack %p\n", ct
);
532 write_lock_bh(&nf_conntrack_lock
);
534 /* We have to check the DYING flag inside the lock to prevent
535 a race against nf_ct_get_next_corpse() possibly called from
536 user context, else we insert an already 'dead' hash, blocking
537 further use of that particular connection -JM */
539 if (unlikely(nf_ct_is_dying(ct
))) {
540 write_unlock_bh(&nf_conntrack_lock
);
544 /* See if there's one in the list already, including reverse:
545 NAT could have grabbed it without realizing, since we're
546 not in the hash. If there is, we lost race. */
547 list_for_each_entry(h
, &nf_conntrack_hash
[hash
], list
)
548 if (nf_ct_tuple_equal(&ct
->tuplehash
[IP_CT_DIR_ORIGINAL
].tuple
,
551 list_for_each_entry(h
, &nf_conntrack_hash
[repl_hash
], list
)
552 if (nf_ct_tuple_equal(&ct
->tuplehash
[IP_CT_DIR_REPLY
].tuple
,
556 /* Remove from unconfirmed list */
557 list_del(&ct
->tuplehash
[IP_CT_DIR_ORIGINAL
].list
);
559 __nf_conntrack_hash_insert(ct
, hash
, repl_hash
);
560 /* Timer relative to confirmation time, not original
561 setting time, otherwise we'd get timer wrap in
562 weird delay cases. */
563 ct
->timeout
.expires
+= jiffies
;
564 add_timer(&ct
->timeout
);
565 atomic_inc(&ct
->ct_general
.use
);
566 set_bit(IPS_CONFIRMED_BIT
, &ct
->status
);
567 NF_CT_STAT_INC(insert
);
568 write_unlock_bh(&nf_conntrack_lock
);
569 help
= nfct_help(ct
);
570 if (help
&& help
->helper
)
571 nf_conntrack_event_cache(IPCT_HELPER
, skb
);
572 #ifdef CONFIG_NF_NAT_NEEDED
573 if (test_bit(IPS_SRC_NAT_DONE_BIT
, &ct
->status
) ||
574 test_bit(IPS_DST_NAT_DONE_BIT
, &ct
->status
))
575 nf_conntrack_event_cache(IPCT_NATINFO
, skb
);
577 nf_conntrack_event_cache(master_ct(ct
) ?
578 IPCT_RELATED
: IPCT_NEW
, skb
);
582 NF_CT_STAT_INC(insert_failed
);
583 write_unlock_bh(&nf_conntrack_lock
);
586 EXPORT_SYMBOL_GPL(__nf_conntrack_confirm
);
588 /* Returns true if a connection correspondings to the tuple (required
591 nf_conntrack_tuple_taken(const struct nf_conntrack_tuple
*tuple
,
592 const struct nf_conn
*ignored_conntrack
)
594 struct nf_conntrack_tuple_hash
*h
;
595 unsigned int hash
= hash_conntrack(tuple
);
597 read_lock_bh(&nf_conntrack_lock
);
598 list_for_each_entry(h
, &nf_conntrack_hash
[hash
], list
) {
599 if (nf_ct_tuplehash_to_ctrack(h
) != ignored_conntrack
&&
600 nf_ct_tuple_equal(tuple
, &h
->tuple
)) {
601 NF_CT_STAT_INC(found
);
602 read_unlock_bh(&nf_conntrack_lock
);
605 NF_CT_STAT_INC(searched
);
607 read_unlock_bh(&nf_conntrack_lock
);
611 EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken
);
613 /* There's a small race here where we may free a just-assured
614 connection. Too bad: we're in trouble anyway. */
615 static noinline
int early_drop(struct list_head
*chain
)
617 /* Traverse backwards: gives us oldest, which is roughly LRU */
618 struct nf_conntrack_tuple_hash
*h
;
619 struct nf_conn
*ct
= NULL
, *tmp
;
622 read_lock_bh(&nf_conntrack_lock
);
623 list_for_each_entry_reverse(h
, chain
, list
) {
624 tmp
= nf_ct_tuplehash_to_ctrack(h
);
625 if (!test_bit(IPS_ASSURED_BIT
, &tmp
->status
)) {
627 atomic_inc(&ct
->ct_general
.use
);
631 read_unlock_bh(&nf_conntrack_lock
);
637 ip_conntrack_ipct_delete(ct
, 0);
640 if (del_timer(&ct
->timeout
)) {
641 death_by_timeout((unsigned long)ct
);
643 NF_CT_STAT_INC_ATOMIC(early_drop
);
649 static struct nf_conn
*
650 __nf_conntrack_alloc(const struct nf_conntrack_tuple
*orig
,
651 const struct nf_conntrack_tuple
*repl
,
652 const struct nf_conntrack_l3proto
*l3proto
,
655 struct nf_conn
*conntrack
= NULL
;
656 struct nf_conntrack_helper
*helper
;
658 if (unlikely(!nf_conntrack_hash_rnd
)) {
662 * Why not initialize nf_conntrack_rnd in a "init()" function ?
663 * Because there isn't enough entropy when system initializing,
664 * and we initialize it as late as possible.
667 get_random_bytes(&rand
, sizeof(rand
));
669 cmpxchg(&nf_conntrack_hash_rnd
, 0, rand
);
672 /* We don't want any race condition at early drop stage */
673 atomic_inc(&nf_conntrack_count
);
675 if (nf_conntrack_max
&&
676 unlikely(atomic_read(&nf_conntrack_count
) > nf_conntrack_max
)) {
677 unsigned int hash
= hash_conntrack(orig
);
678 /* Try dropping from this hash chain. */
679 if (!early_drop(&nf_conntrack_hash
[hash
])) {
680 atomic_dec(&nf_conntrack_count
);
683 "nf_conntrack: table full, dropping"
685 return ERR_PTR(-ENOMEM
);
689 /* find features needed by this conntrack. */
690 features
|= l3proto
->get_features(orig
);
692 /* FIXME: protect helper list per RCU */
693 read_lock_bh(&nf_conntrack_lock
);
694 helper
= __nf_ct_helper_find(repl
);
695 /* NAT might want to assign a helper later */
696 if (helper
|| features
& NF_CT_F_NAT
)
697 features
|= NF_CT_F_HELP
;
698 read_unlock_bh(&nf_conntrack_lock
);
700 DEBUGP("nf_conntrack_alloc: features=0x%x\n", features
);
702 read_lock_bh(&nf_ct_cache_lock
);
704 if (unlikely(!nf_ct_cache
[features
].use
)) {
705 DEBUGP("nf_conntrack_alloc: not supported features = 0x%x\n",
710 conntrack
= kmem_cache_zalloc(nf_ct_cache
[features
].cachep
, GFP_ATOMIC
);
711 if (conntrack
== NULL
) {
712 DEBUGP("nf_conntrack_alloc: Can't alloc conntrack from cache\n");
716 conntrack
->features
= features
;
717 atomic_set(&conntrack
->ct_general
.use
, 1);
718 conntrack
->tuplehash
[IP_CT_DIR_ORIGINAL
].tuple
= *orig
;
719 conntrack
->tuplehash
[IP_CT_DIR_REPLY
].tuple
= *repl
;
720 /* Don't set timer yet: wait for confirmation */
721 setup_timer(&conntrack
->timeout
, death_by_timeout
,
722 (unsigned long)conntrack
);
723 read_unlock_bh(&nf_ct_cache_lock
);
727 read_unlock_bh(&nf_ct_cache_lock
);
728 atomic_dec(&nf_conntrack_count
);
732 struct nf_conn
*nf_conntrack_alloc(const struct nf_conntrack_tuple
*orig
,
733 const struct nf_conntrack_tuple
*repl
)
735 struct nf_conntrack_l3proto
*l3proto
;
739 l3proto
= __nf_ct_l3proto_find(orig
->src
.l3num
);
740 ct
= __nf_conntrack_alloc(orig
, repl
, l3proto
, 0);
745 EXPORT_SYMBOL_GPL(nf_conntrack_alloc
);
747 void nf_conntrack_free(struct nf_conn
*conntrack
)
749 u_int32_t features
= conntrack
->features
;
750 NF_CT_ASSERT(features
>= NF_CT_F_BASIC
&& features
< NF_CT_F_NUM
);
751 DEBUGP("nf_conntrack_free: features = 0x%x, conntrack=%p\n", features
,
753 kmem_cache_free(nf_ct_cache
[features
].cachep
, conntrack
);
754 atomic_dec(&nf_conntrack_count
);
756 EXPORT_SYMBOL_GPL(nf_conntrack_free
);
758 /* Allocate a new conntrack: we return -ENOMEM if classification
759 failed due to stress. Otherwise it really is unclassifiable. */
760 static struct nf_conntrack_tuple_hash
*
761 init_conntrack(const struct nf_conntrack_tuple
*tuple
,
762 struct nf_conntrack_l3proto
*l3proto
,
763 struct nf_conntrack_l4proto
*l4proto
,
765 unsigned int dataoff
)
767 struct nf_conn
*conntrack
;
768 struct nf_conn_help
*help
;
769 struct nf_conntrack_tuple repl_tuple
;
770 struct nf_conntrack_expect
*exp
;
771 u_int32_t features
= 0;
773 if (!nf_ct_invert_tuple(&repl_tuple
, tuple
, l3proto
, l4proto
)) {
774 DEBUGP("Can't invert tuple.\n");
778 read_lock_bh(&nf_conntrack_lock
);
779 exp
= __nf_conntrack_expect_find(tuple
);
780 if (exp
&& exp
->helper
)
781 features
= NF_CT_F_HELP
;
782 read_unlock_bh(&nf_conntrack_lock
);
784 conntrack
= __nf_conntrack_alloc(tuple
, &repl_tuple
, l3proto
, features
);
785 if (conntrack
== NULL
|| IS_ERR(conntrack
)) {
786 DEBUGP("Can't allocate conntrack.\n");
787 return (struct nf_conntrack_tuple_hash
*)conntrack
;
790 if (!l4proto
->new(conntrack
, skb
, dataoff
)) {
791 nf_conntrack_free(conntrack
);
792 DEBUGP("init conntrack: can't track with proto module\n");
796 write_lock_bh(&nf_conntrack_lock
);
798 exp
= find_expectation(tuple
);
800 help
= nfct_help(conntrack
);
802 DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
804 /* Welcome, Mr. Bond. We've been expecting you... */
805 __set_bit(IPS_EXPECTED_BIT
, &conntrack
->status
);
806 conntrack
->master
= exp
->master
;
808 rcu_assign_pointer(help
->helper
, exp
->helper
);
809 #ifdef CONFIG_NF_CONNTRACK_MARK
810 conntrack
->mark
= exp
->master
->mark
;
812 #ifdef CONFIG_NF_CONNTRACK_SECMARK
813 conntrack
->secmark
= exp
->master
->secmark
;
815 nf_conntrack_get(&conntrack
->master
->ct_general
);
816 NF_CT_STAT_INC(expect_new
);
819 /* not in hash table yet, so not strictly necessary */
820 rcu_assign_pointer(help
->helper
,
821 __nf_ct_helper_find(&repl_tuple
));
826 /* Overload tuple linked list to put us in unconfirmed list. */
827 list_add(&conntrack
->tuplehash
[IP_CT_DIR_ORIGINAL
].list
, &unconfirmed
);
829 write_unlock_bh(&nf_conntrack_lock
);
833 exp
->expectfn(conntrack
, exp
);
834 nf_conntrack_expect_put(exp
);
837 return &conntrack
->tuplehash
[IP_CT_DIR_ORIGINAL
];
840 /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
841 static inline struct nf_conn
*
842 resolve_normal_ct(struct sk_buff
*skb
,
843 unsigned int dataoff
,
846 struct nf_conntrack_l3proto
*l3proto
,
847 struct nf_conntrack_l4proto
*l4proto
,
849 enum ip_conntrack_info
*ctinfo
)
851 struct nf_conntrack_tuple tuple
;
852 struct nf_conntrack_tuple_hash
*h
;
855 if (!nf_ct_get_tuple(skb
, skb_network_offset(skb
),
856 dataoff
, l3num
, protonum
, &tuple
, l3proto
,
858 DEBUGP("resolve_normal_ct: Can't get tuple\n");
862 /* look for tuple match */
863 h
= nf_conntrack_find_get(&tuple
, NULL
);
865 h
= init_conntrack(&tuple
, l3proto
, l4proto
, skb
, dataoff
);
871 ct
= nf_ct_tuplehash_to_ctrack(h
);
873 /* It exists; we have (non-exclusive) reference. */
874 if (NF_CT_DIRECTION(h
) == IP_CT_DIR_REPLY
) {
875 *ctinfo
= IP_CT_ESTABLISHED
+ IP_CT_IS_REPLY
;
876 /* Please set reply bit if this packet OK */
879 /* Once we've had two way comms, always ESTABLISHED. */
880 if (test_bit(IPS_SEEN_REPLY_BIT
, &ct
->status
)) {
881 DEBUGP("nf_conntrack_in: normal packet for %p\n", ct
);
882 *ctinfo
= IP_CT_ESTABLISHED
;
883 } else if (test_bit(IPS_EXPECTED_BIT
, &ct
->status
)) {
884 DEBUGP("nf_conntrack_in: related packet for %p\n", ct
);
885 *ctinfo
= IP_CT_RELATED
;
887 DEBUGP("nf_conntrack_in: new packet for %p\n", ct
);
892 skb
->nfct
= &ct
->ct_general
;
893 skb
->nfctinfo
= *ctinfo
;
897 #if defined(CONFIG_BCM_NAT) || defined(CONFIG_BCM_NAT_MODULE)
898 extern int nf_ct_ipv4_gather_frags(struct sk_buff
*skb
, u_int32_t user
);
902 nf_conntrack_in(int pf
, unsigned int hooknum
, struct sk_buff
*skb
)
905 enum ip_conntrack_info ctinfo
;
906 struct nf_conntrack_l3proto
*l3proto
;
907 struct nf_conntrack_l4proto
*l4proto
;
908 unsigned int dataoff
;
912 #if defined(CONFIG_BCM_NAT) || defined(CONFIG_BCM_NAT_MODULE)
913 struct nf_conn_nat
*nat
= NULL
;
916 /* Previously seen (loopback or untracked)? Ignore. */
918 NF_CT_STAT_INC_ATOMIC(ignore
);
922 /* rcu_read_lock()ed by nf_hook_slow */
923 l3proto
= __nf_ct_l3proto_find((u_int16_t
)pf
);
925 if ((ret
= l3proto
->prepare(skb
, hooknum
, &dataoff
, &protonum
)) <= 0) {
926 DEBUGP("not prepared to track yet or error occured\n");
930 #if defined(CONFIG_BCM_NAT) || defined(CONFIG_BCM_NAT_MODULE)
931 if (pf
== PF_INET
&& ipv4_conntrack_fastnat
) {
932 /* Gather fragments. */
933 if (ip_hdr(skb
)->frag_off
& htons(IP_MF
| IP_OFFSET
)) {
934 if (nf_ct_ipv4_gather_frags(skb
,
935 hooknum
== NF_IP_PRE_ROUTING
?
936 IP_DEFRAG_CONNTRACK_IN
:
937 IP_DEFRAG_CONNTRACK_OUT
))
943 l4proto
= __nf_ct_l4proto_find((u_int16_t
)pf
, protonum
);
945 /* It may be an special packet, error, unclean...
946 * inverse of the return code tells to the netfilter
947 * core what to do with the packet. */
948 if (l4proto
->error
!= NULL
&&
949 (ret
= l4proto
->error(skb
, dataoff
, &ctinfo
, pf
, hooknum
)) <= 0) {
950 NF_CT_STAT_INC_ATOMIC(error
);
951 NF_CT_STAT_INC_ATOMIC(invalid
);
955 ct
= resolve_normal_ct(skb
, dataoff
, pf
, protonum
, l3proto
, l4proto
,
956 &set_reply
, &ctinfo
);
958 /* Not valid part of a connection */
959 NF_CT_STAT_INC_ATOMIC(invalid
);
964 /* Too stressed to deal. */
965 NF_CT_STAT_INC_ATOMIC(drop
);
969 NF_CT_ASSERT(skb
->nfct
);
971 ret
= l4proto
->packet(ct
, skb
, dataoff
, ctinfo
, pf
, hooknum
);
973 /* Invalid: inverse of the return code tells
974 * the netfilter core what to do */
975 DEBUGP("nf_conntrack_in: Can't track with proto module\n");
976 nf_conntrack_put(skb
->nfct
);
978 NF_CT_STAT_INC_ATOMIC(invalid
);
980 NF_CT_STAT_INC_ATOMIC(drop
);
984 #if defined(CONFIG_BCM_NAT) || defined(CONFIG_BCM_NAT_MODULE)
988 if (nat
&& hooknum
== NF_IP_PRE_ROUTING
&&
989 ipv4_conntrack_fastnat
&& bcm_nat_bind_hook
) {
990 struct nf_conn_help
*help
= nfct_help(ct
);
992 if (!(nat
->info
.nat_type
& BCM_FASTNAT_DENY
) &&
994 (ctinfo
== IP_CT_ESTABLISHED
|| ctinfo
== IP_CT_IS_REPLY
) &&
995 (protonum
== IPPROTO_TCP
|| protonum
== IPPROTO_UDP
)) {
996 struct nf_conntrack_tuple
*t1
, *t2
;
998 t1
= &ct
->tuplehash
[IP_CT_DIR_ORIGINAL
].tuple
;
999 t2
= &ct
->tuplehash
[IP_CT_DIR_REPLY
].tuple
;
1000 if (!(t1
->dst
.u3
.ip
== t2
->src
.u3
.ip
&&
1001 t1
->src
.u3
.ip
== t2
->dst
.u3
.ip
&&
1002 t1
->dst
.u
.all
== t2
->src
.u
.all
&&
1003 t1
->src
.u
.all
== t2
->dst
.u
.all
)) {
1004 ret
= bcm_nat_bind_hook(ct
, ctinfo
, skb
, l3proto
, l4proto
);
1010 if (set_reply
&& !test_and_set_bit(IPS_SEEN_REPLY_BIT
, &ct
->status
)) {
1011 #if defined(CONFIG_BCM_NAT) || defined(CONFIG_BCM_NAT_MODULE)
1012 if (nat
&& hooknum
== NF_IP_LOCAL_OUT
)
1013 nat
->info
.nat_type
|= BCM_FASTNAT_DENY
;
1015 nf_conntrack_event_cache(IPCT_STATUS
, skb
);
1019 EXPORT_SYMBOL_GPL(nf_conntrack_in
);
1021 int nf_ct_invert_tuplepr(struct nf_conntrack_tuple
*inverse
,
1022 const struct nf_conntrack_tuple
*orig
)
1027 ret
= nf_ct_invert_tuple(inverse
, orig
,
1028 __nf_ct_l3proto_find(orig
->src
.l3num
),
1029 __nf_ct_l4proto_find(orig
->src
.l3num
,
1030 orig
->dst
.protonum
));
1034 EXPORT_SYMBOL_GPL(nf_ct_invert_tuplepr
);
1036 static const u8 expecting_none
[NF_CT_MAX_EXPECT_CLASSES
] = { 0 };
1037 static inline int nfct_help_expecting(struct nf_conn_help
*help
)
1039 return (memcmp(&(help
->expecting
), &expecting_none
, sizeof(help
->expecting
)) != 0);
1042 /* Alter reply tuple (maybe alter helper). This is for NAT, and is
1043 implicitly racy: see __nf_conntrack_confirm */
1044 void nf_conntrack_alter_reply(struct nf_conn
*ct
,
1045 const struct nf_conntrack_tuple
*newreply
)
1047 struct nf_conn_help
*help
= nfct_help(ct
);
1049 write_lock_bh(&nf_conntrack_lock
);
1050 /* Should be unconfirmed, so not in hash table yet */
1051 NF_CT_ASSERT(!nf_ct_is_confirmed(ct
));
1053 DEBUGP("Altering reply tuple of %p to ", ct
);
1054 NF_CT_DUMP_TUPLE(newreply
);
1056 ct
->tuplehash
[IP_CT_DIR_REPLY
].tuple
= *newreply
;
1057 if (!ct
->master
&& help
&& nfct_help_expecting(help
) == 0) {
1058 struct nf_conntrack_helper
*helper
;
1059 helper
= __nf_ct_helper_find(newreply
);
1061 memset(&help
->help
, 0, sizeof(help
->help
));
1062 /* not in hash table yet, so not strictly necessary */
1063 rcu_assign_pointer(help
->helper
, helper
);
1065 write_unlock_bh(&nf_conntrack_lock
);
1067 EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply
);
1069 /* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
1070 void __nf_ct_refresh_acct(struct nf_conn
*ct
,
1071 enum ip_conntrack_info ctinfo
,
1072 const struct sk_buff
*skb
,
1073 unsigned long extra_jiffies
,
1078 NF_CT_ASSERT(ct
->timeout
.data
== (unsigned long)ct
);
1081 write_lock_bh(&nf_conntrack_lock
);
1083 /* Only update if this is not a fixed timeout */
1084 if (test_bit(IPS_FIXED_TIMEOUT_BIT
, &ct
->status
))
1087 /* If not in hash table, timer will not be active yet */
1088 if (!nf_ct_is_confirmed(ct
)) {
1090 ct
->expire_jiffies
= extra_jiffies
;
1092 ct
->timeout
.expires
= extra_jiffies
;
1093 event
= IPCT_REFRESH
;
1095 unsigned long newtime
= jiffies
+ extra_jiffies
;
1097 /* Only update the timeout if the new timeout is at least
1098 HZ jiffies from the old timeout. Need del_timer for race
1099 avoidance (may already be dying). */
1100 if (newtime
- ct
->timeout
.expires
>= HZ
1101 && del_timer(&ct
->timeout
)) {
1103 ct
->expire_jiffies
= extra_jiffies
;
1105 ct
->timeout
.expires
= newtime
;
1106 add_timer(&ct
->timeout
);
1107 event
= IPCT_REFRESH
;
1112 #ifdef CONFIG_NF_CT_ACCT
1114 ct
->counters
[CTINFO2DIR(ctinfo
)].packets
++;
1115 ct
->counters
[CTINFO2DIR(ctinfo
)].bytes
+=
1116 skb
->len
- skb_network_offset(skb
);
1118 if ((ct
->counters
[CTINFO2DIR(ctinfo
)].packets
& 0x80000000)
1119 || (ct
->counters
[CTINFO2DIR(ctinfo
)].bytes
& 0x80000000))
1120 event
|= IPCT_COUNTER_FILLING
;
1124 write_unlock_bh(&nf_conntrack_lock
);
1126 /* must be unlocked when calling event cache */
1128 nf_conntrack_event_cache(event
, skb
);
1130 EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct
);
1132 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
1134 #include <linux/netfilter/nfnetlink.h>
1135 #include <linux/netfilter/nfnetlink_conntrack.h>
1136 #include <linux/mutex.h>
1139 /* Generic function for tcp/udp/sctp/dccp and alike. This needs to be
1140 * in ip_conntrack_core, since we don't want the protocols to autoload
1141 * or depend on ctnetlink */
1142 int nf_ct_port_tuple_to_nfattr(struct sk_buff
*skb
,
1143 const struct nf_conntrack_tuple
*tuple
)
1145 NFA_PUT(skb
, CTA_PROTO_SRC_PORT
, sizeof(u_int16_t
),
1146 &tuple
->src
.u
.tcp
.port
);
1147 NFA_PUT(skb
, CTA_PROTO_DST_PORT
, sizeof(u_int16_t
),
1148 &tuple
->dst
.u
.tcp
.port
);
1154 EXPORT_SYMBOL_GPL(nf_ct_port_tuple_to_nfattr
);
1156 static const size_t cta_min_proto
[CTA_PROTO_MAX
] = {
1157 [CTA_PROTO_SRC_PORT
-1] = sizeof(u_int16_t
),
1158 [CTA_PROTO_DST_PORT
-1] = sizeof(u_int16_t
)
1161 int nf_ct_port_nfattr_to_tuple(struct nfattr
*tb
[],
1162 struct nf_conntrack_tuple
*t
)
1164 if (!tb
[CTA_PROTO_SRC_PORT
-1] || !tb
[CTA_PROTO_DST_PORT
-1])
1167 if (nfattr_bad_size(tb
, CTA_PROTO_MAX
, cta_min_proto
))
1170 t
->src
.u
.tcp
.port
= *(__be16
*)NFA_DATA(tb
[CTA_PROTO_SRC_PORT
-1]);
1171 t
->dst
.u
.tcp
.port
= *(__be16
*)NFA_DATA(tb
[CTA_PROTO_DST_PORT
-1]);
1175 EXPORT_SYMBOL_GPL(nf_ct_port_nfattr_to_tuple
);
1178 /* Used by ipt_REJECT and ip6t_REJECT. */
1179 void __nf_conntrack_attach(struct sk_buff
*nskb
, struct sk_buff
*skb
)
1182 enum ip_conntrack_info ctinfo
;
1184 /* This ICMP is in reverse direction to the packet which caused it */
1185 ct
= nf_ct_get(skb
, &ctinfo
);
1186 if (CTINFO2DIR(ctinfo
) == IP_CT_DIR_ORIGINAL
)
1187 ctinfo
= IP_CT_RELATED
+ IP_CT_IS_REPLY
;
1189 ctinfo
= IP_CT_RELATED
;
1191 /* Attach to new skbuff, and increment count */
1192 nskb
->nfct
= &ct
->ct_general
;
1193 nskb
->nfctinfo
= ctinfo
;
1194 nf_conntrack_get(nskb
->nfct
);
1196 EXPORT_SYMBOL_GPL(__nf_conntrack_attach
);
1199 do_iter(const struct nf_conntrack_tuple_hash
*i
,
1200 int (*iter
)(struct nf_conn
*i
, void *data
),
1203 return iter(nf_ct_tuplehash_to_ctrack(i
), data
);
1206 /* Bring out ya dead! */
1207 static struct nf_conn
*
1208 get_next_corpse(int (*iter
)(struct nf_conn
*i
, void *data
),
1209 void *data
, unsigned int *bucket
)
1211 struct nf_conntrack_tuple_hash
*h
;
1214 write_lock_bh(&nf_conntrack_lock
);
1215 for (; *bucket
< nf_conntrack_htable_size
; (*bucket
)++) {
1216 list_for_each_entry(h
, &nf_conntrack_hash
[*bucket
], list
) {
1217 ct
= nf_ct_tuplehash_to_ctrack(h
);
1222 list_for_each_entry(h
, &unconfirmed
, list
) {
1223 ct
= nf_ct_tuplehash_to_ctrack(h
);
1225 set_bit(IPS_DYING_BIT
, &ct
->status
);
1227 write_unlock_bh(&nf_conntrack_lock
);
1230 atomic_inc(&ct
->ct_general
.use
);
1231 write_unlock_bh(&nf_conntrack_lock
);
1236 nf_ct_iterate_cleanup(int (*iter
)(struct nf_conn
*i
, void *data
), void *data
)
1239 unsigned int bucket
= 0;
1241 while ((ct
= get_next_corpse(iter
, data
, &bucket
)) != NULL
) {
1243 ip_conntrack_ipct_delete(ct
, 0);
1245 /* Time to push up daises... */
1246 if (del_timer(&ct
->timeout
))
1247 death_by_timeout((unsigned long)ct
);
1248 /* ... else the timer will get him soon. */
1253 EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup
);
1255 static int kill_all(struct nf_conn
*i
, void *data
)
1260 static void free_conntrack_hash(struct list_head
*hash
, int vmalloced
, int size
)
1265 free_pages((unsigned long)hash
,
1266 get_order(sizeof(struct list_head
) * size
));
1269 void nf_conntrack_flush(void)
1271 nf_ct_iterate_cleanup(kill_all
, NULL
);
1273 EXPORT_SYMBOL_GPL(nf_conntrack_flush
);
1275 /* Mishearing the voices in his head, our hero wonders how he's
1276 supposed to kill the mall. */
1277 void nf_conntrack_cleanup(void)
1281 rcu_assign_pointer(ip_ct_attach
, NULL
);
1283 /* This makes sure all current packets have passed through
1284 netfilter framework. Roll on, two-stage module
1288 nf_ct_event_cache_flush();
1290 nf_conntrack_flush();
1291 if (atomic_read(&nf_conntrack_count
) != 0) {
1293 goto i_see_dead_people
;
1295 /* wait until all references to nf_conntrack_untracked are dropped */
1296 while (atomic_read(&nf_conntrack_untracked
.ct_general
.use
) > 1)
1299 rcu_assign_pointer(nf_ct_destroy
, NULL
);
1301 for (i
= 0; i
< NF_CT_F_NUM
; i
++) {
1302 if (nf_ct_cache
[i
].use
== 0)
1305 NF_CT_ASSERT(nf_ct_cache
[i
].use
== 1);
1306 nf_ct_cache
[i
].use
= 1;
1307 nf_conntrack_unregister_cache(i
);
1309 kmem_cache_destroy(nf_conntrack_expect_cachep
);
1310 free_conntrack_hash(nf_conntrack_hash
, nf_conntrack_vmalloc
,
1311 nf_conntrack_htable_size
);
1313 nf_conntrack_proto_fini();
1316 static struct list_head
*alloc_hashtable(int *sizep
, int *vmalloced
)
1318 struct list_head
*hash
;
1319 unsigned int size
, i
;
1323 size
= *sizep
= roundup(*sizep
, PAGE_SIZE
/ sizeof(struct list_head
));
1324 hash
= (void*)__get_free_pages(GFP_KERNEL
|__GFP_NOWARN
,
1325 get_order(sizeof(struct list_head
)
1329 printk(KERN_WARNING
"nf_conntrack: falling back to vmalloc.\n");
1330 hash
= vmalloc(sizeof(struct list_head
) * size
);
1334 for (i
= 0; i
< size
; i
++)
1335 INIT_LIST_HEAD(&hash
[i
]);
1340 int set_hashsize(const char *val
, struct kernel_param
*kp
)
1342 int i
, bucket
, hashsize
, vmalloced
;
1343 int old_vmalloced
, old_size
;
1345 struct list_head
*hash
, *old_hash
;
1346 struct nf_conntrack_tuple_hash
*h
;
1348 /* On boot, we can set this without any fancy locking. */
1349 if (!nf_conntrack_htable_size
)
1350 return param_set_uint(val
, kp
);
1352 hashsize
= simple_strtol(val
, NULL
, 0);
1356 hash
= alloc_hashtable(&hashsize
, &vmalloced
);
1360 /* We have to rehahs for the new table anyway, so we also can
1361 * use a newrandom seed */
1362 get_random_bytes(&rnd
, 4);
1364 write_lock_bh(&nf_conntrack_lock
);
1365 for (i
= 0; i
< nf_conntrack_htable_size
; i
++) {
1366 while (!list_empty(&nf_conntrack_hash
[i
])) {
1367 h
= list_entry(nf_conntrack_hash
[i
].next
,
1368 struct nf_conntrack_tuple_hash
, list
);
1370 bucket
= __hash_conntrack(&h
->tuple
, hashsize
, rnd
);
1371 list_add_tail(&h
->list
, &hash
[bucket
]);
1374 old_size
= nf_conntrack_htable_size
;
1375 old_vmalloced
= nf_conntrack_vmalloc
;
1376 old_hash
= nf_conntrack_hash
;
1378 nf_conntrack_htable_size
= hashsize
;
1379 nf_conntrack_vmalloc
= vmalloced
;
1380 nf_conntrack_hash
= hash
;
1381 nf_conntrack_hash_rnd
= rnd
;
1382 write_unlock_bh(&nf_conntrack_lock
);
1384 free_conntrack_hash(old_hash
, old_vmalloced
, old_size
);
1388 module_param_call(hashsize
, set_hashsize
, param_get_uint
,
1389 &nf_conntrack_htable_size
, 0600);
1391 s16 (*nf_ct_nat_offset
)(const struct nf_conn
*ct
,
1392 enum ip_conntrack_dir dir
,
1394 EXPORT_SYMBOL_GPL(nf_ct_nat_offset
);
1396 int __init
nf_conntrack_init(void)
1400 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
1401 * machine has 256 buckets. >= 1GB machines have 8192 buckets. */
1402 if (!nf_conntrack_htable_size
) {
1403 nf_conntrack_htable_size
1404 = (((num_physpages
<< PAGE_SHIFT
) / 16384)
1405 / sizeof(struct list_head
));
1406 if (num_physpages
> (1024 * 1024 * 1024 / PAGE_SIZE
))
1407 nf_conntrack_htable_size
= 8192;
1408 if (nf_conntrack_htable_size
< 16)
1409 nf_conntrack_htable_size
= 16;
1412 nf_conntrack_hash
= alloc_hashtable(&nf_conntrack_htable_size
,
1413 &nf_conntrack_vmalloc
);
1414 if (!nf_conntrack_hash
) {
1415 printk(KERN_ERR
"Unable to create nf_conntrack_hash\n");
1419 nf_conntrack_max
= 8 * nf_conntrack_htable_size
;
1421 printk("nf_conntrack version %s (%u buckets, %d max)\n",
1422 NF_CONNTRACK_VERSION
, nf_conntrack_htable_size
,
1425 ret
= nf_conntrack_register_cache(NF_CT_F_BASIC
, "nf_conntrack:basic",
1426 sizeof(struct nf_conn
));
1428 printk(KERN_ERR
"Unable to create nf_conn slab cache\n");
1432 nf_conntrack_expect_cachep
= kmem_cache_create("nf_conntrack_expect",
1433 sizeof(struct nf_conntrack_expect
),
1435 if (!nf_conntrack_expect_cachep
) {
1436 printk(KERN_ERR
"Unable to create nf_expect slab cache\n");
1437 goto err_free_conntrack_slab
;
1440 ret
= nf_conntrack_proto_init();
1442 goto out_free_expect_slab
;
1444 /* For use by REJECT target */
1445 rcu_assign_pointer(ip_ct_attach
, __nf_conntrack_attach
);
1446 rcu_assign_pointer(nf_ct_destroy
, destroy_conntrack
);
1448 /* Howto get NAT offsets */
1449 rcu_assign_pointer(nf_ct_nat_offset
, NULL
);
1451 /* Set up fake conntrack:
1452 - to never be deleted, not in any hashes */
1453 atomic_set(&nf_conntrack_untracked
.ct_general
.use
, 1);
1454 /* - and look it like as a confirmed connection */
1455 set_bit(IPS_CONFIRMED_BIT
, &nf_conntrack_untracked
.status
);
1459 out_free_expect_slab
:
1460 kmem_cache_destroy(nf_conntrack_expect_cachep
);
1461 err_free_conntrack_slab
:
1462 nf_conntrack_unregister_cache(NF_CT_F_BASIC
);
1464 free_conntrack_hash(nf_conntrack_hash
, nf_conntrack_vmalloc
,
1465 nf_conntrack_htable_size
);