[ALSA] hda-codec - Allocate connection lists dynamically in generic parser
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / net / netfilter / nf_conntrack_core.c
bloba7c7b490cf226b99a376fdd8a98e25729fc8c9de
1 /* Connection state tracking for netfilter. This is separated from,
2 but required by, the NAT layer; it can also be used by an iptables
3 extension. */
5 /* (C) 1999-2001 Paul `Rusty' Russell
6 * (C) 2002-2005 Netfilter Core Team <coreteam@netfilter.org>
7 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
13 * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
14 * - new API and handling of conntrack/nat helpers
15 * - now capable of multiple expectations for one master
16 * 16 Jul 2002: Harald Welte <laforge@gnumonks.org>
17 * - add usage/reference counts to ip_conntrack_expect
18 * - export ip_conntrack[_expect]_{find_get,put} functions
19 * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
20 * - generalize L3 protocol denendent part.
21 * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
22 * - add support various size of conntrack structures.
24 * Derived from net/ipv4/netfilter/ip_conntrack_core.c
27 #include <linux/config.h>
28 #include <linux/types.h>
29 #include <linux/netfilter.h>
30 #include <linux/module.h>
31 #include <linux/skbuff.h>
32 #include <linux/proc_fs.h>
33 #include <linux/vmalloc.h>
34 #include <linux/stddef.h>
35 #include <linux/slab.h>
36 #include <linux/random.h>
37 #include <linux/jhash.h>
38 #include <linux/err.h>
39 #include <linux/percpu.h>
40 #include <linux/moduleparam.h>
41 #include <linux/notifier.h>
42 #include <linux/kernel.h>
43 #include <linux/netdevice.h>
44 #include <linux/socket.h>
46 /* This rwlock protects the main hash table, protocol/helper/expected
47 registrations, conntrack timers*/
48 #define ASSERT_READ_LOCK(x)
49 #define ASSERT_WRITE_LOCK(x)
51 #include <net/netfilter/nf_conntrack.h>
52 #include <net/netfilter/nf_conntrack_l3proto.h>
53 #include <net/netfilter/nf_conntrack_protocol.h>
54 #include <net/netfilter/nf_conntrack_helper.h>
55 #include <net/netfilter/nf_conntrack_core.h>
56 #include <linux/netfilter_ipv4/listhelp.h>
58 #define NF_CONNTRACK_VERSION "0.4.1"
60 #if 0
61 #define DEBUGP printk
62 #else
63 #define DEBUGP(format, args...)
64 #endif
66 DEFINE_RWLOCK(nf_conntrack_lock);
68 /* nf_conntrack_standalone needs this */
69 atomic_t nf_conntrack_count = ATOMIC_INIT(0);
71 void (*nf_conntrack_destroyed)(struct nf_conn *conntrack) = NULL;
72 LIST_HEAD(nf_conntrack_expect_list);
73 struct nf_conntrack_protocol **nf_ct_protos[PF_MAX];
74 struct nf_conntrack_l3proto *nf_ct_l3protos[PF_MAX];
75 static LIST_HEAD(helpers);
76 unsigned int nf_conntrack_htable_size = 0;
77 int nf_conntrack_max;
78 struct list_head *nf_conntrack_hash;
79 static kmem_cache_t *nf_conntrack_expect_cachep;
80 struct nf_conn nf_conntrack_untracked;
81 unsigned int nf_ct_log_invalid;
82 static LIST_HEAD(unconfirmed);
83 static int nf_conntrack_vmalloc;
85 #ifdef CONFIG_NF_CONNTRACK_EVENTS
86 struct notifier_block *nf_conntrack_chain;
87 struct notifier_block *nf_conntrack_expect_chain;
89 DEFINE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache);
91 /* deliver cached events and clear cache entry - must be called with locally
92 * disabled softirqs */
93 static inline void
94 __nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache)
96 DEBUGP("ecache: delivering events for %p\n", ecache->ct);
97 if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct)
98 && ecache->events)
99 notifier_call_chain(&nf_conntrack_chain, ecache->events,
100 ecache->ct);
102 ecache->events = 0;
103 nf_ct_put(ecache->ct);
104 ecache->ct = NULL;
107 /* Deliver all cached events for a particular conntrack. This is called
108 * by code prior to async packet handling for freeing the skb */
109 void nf_ct_deliver_cached_events(const struct nf_conn *ct)
111 struct nf_conntrack_ecache *ecache;
113 local_bh_disable();
114 ecache = &__get_cpu_var(nf_conntrack_ecache);
115 if (ecache->ct == ct)
116 __nf_ct_deliver_cached_events(ecache);
117 local_bh_enable();
120 /* Deliver cached events for old pending events, if current conntrack != old */
121 void __nf_ct_event_cache_init(struct nf_conn *ct)
123 struct nf_conntrack_ecache *ecache;
125 /* take care of delivering potentially old events */
126 ecache = &__get_cpu_var(nf_conntrack_ecache);
127 BUG_ON(ecache->ct == ct);
128 if (ecache->ct)
129 __nf_ct_deliver_cached_events(ecache);
130 /* initialize for this conntrack/packet */
131 ecache->ct = ct;
132 nf_conntrack_get(&ct->ct_general);
135 /* flush the event cache - touches other CPU's data and must not be called
136 * while packets are still passing through the code */
137 static void nf_ct_event_cache_flush(void)
139 struct nf_conntrack_ecache *ecache;
140 int cpu;
142 for_each_cpu(cpu) {
143 ecache = &per_cpu(nf_conntrack_ecache, cpu);
144 if (ecache->ct)
145 nf_ct_put(ecache->ct);
148 #else
149 static inline void nf_ct_event_cache_flush(void) {}
150 #endif /* CONFIG_NF_CONNTRACK_EVENTS */
152 DEFINE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
153 EXPORT_PER_CPU_SYMBOL(nf_conntrack_stat);
156 * This scheme offers various size of "struct nf_conn" dependent on
157 * features(helper, nat, ...)
160 #define NF_CT_FEATURES_NAMELEN 256
161 static struct {
162 /* name of slab cache. printed in /proc/slabinfo */
163 char *name;
165 /* size of slab cache */
166 size_t size;
168 /* slab cache pointer */
169 kmem_cache_t *cachep;
171 /* allocated slab cache + modules which uses this slab cache */
172 int use;
174 /* Initialization */
175 int (*init_conntrack)(struct nf_conn *, u_int32_t);
177 } nf_ct_cache[NF_CT_F_NUM];
179 /* protect members of nf_ct_cache except of "use" */
180 DEFINE_RWLOCK(nf_ct_cache_lock);
182 /* This avoids calling kmem_cache_create() with same name simultaneously */
183 DECLARE_MUTEX(nf_ct_cache_mutex);
185 extern struct nf_conntrack_protocol nf_conntrack_generic_protocol;
186 struct nf_conntrack_protocol *
187 nf_ct_find_proto(u_int16_t l3proto, u_int8_t protocol)
189 if (unlikely(nf_ct_protos[l3proto] == NULL))
190 return &nf_conntrack_generic_protocol;
192 return nf_ct_protos[l3proto][protocol];
195 static int nf_conntrack_hash_rnd_initted;
196 static unsigned int nf_conntrack_hash_rnd;
198 static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
199 unsigned int size, unsigned int rnd)
201 unsigned int a, b;
202 a = jhash((void *)tuple->src.u3.all, sizeof(tuple->src.u3.all),
203 ((tuple->src.l3num) << 16) | tuple->dst.protonum);
204 b = jhash((void *)tuple->dst.u3.all, sizeof(tuple->dst.u3.all),
205 (tuple->src.u.all << 16) | tuple->dst.u.all);
207 return jhash_2words(a, b, rnd) % size;
210 static inline u_int32_t hash_conntrack(const struct nf_conntrack_tuple *tuple)
212 return __hash_conntrack(tuple, nf_conntrack_htable_size,
213 nf_conntrack_hash_rnd);
216 /* Initialize "struct nf_conn" which has spaces for helper */
217 static int
218 init_conntrack_for_helper(struct nf_conn *conntrack, u_int32_t features)
221 conntrack->help = (union nf_conntrack_help *)
222 (((unsigned long)conntrack->data
223 + (__alignof__(union nf_conntrack_help) - 1))
224 & (~((unsigned long)(__alignof__(union nf_conntrack_help) -1))));
225 return 0;
228 int nf_conntrack_register_cache(u_int32_t features, const char *name,
229 size_t size,
230 int (*init)(struct nf_conn *, u_int32_t))
232 int ret = 0;
233 char *cache_name;
234 kmem_cache_t *cachep;
236 DEBUGP("nf_conntrack_register_cache: features=0x%x, name=%s, size=%d\n",
237 features, name, size);
239 if (features < NF_CT_F_BASIC || features >= NF_CT_F_NUM) {
240 DEBUGP("nf_conntrack_register_cache: invalid features.: 0x%x\n",
241 features);
242 return -EINVAL;
245 down(&nf_ct_cache_mutex);
247 write_lock_bh(&nf_ct_cache_lock);
248 /* e.g: multiple helpers are loaded */
249 if (nf_ct_cache[features].use > 0) {
250 DEBUGP("nf_conntrack_register_cache: already resisterd.\n");
251 if ((!strncmp(nf_ct_cache[features].name, name,
252 NF_CT_FEATURES_NAMELEN))
253 && nf_ct_cache[features].size == size
254 && nf_ct_cache[features].init_conntrack == init) {
255 DEBUGP("nf_conntrack_register_cache: reusing.\n");
256 nf_ct_cache[features].use++;
257 ret = 0;
258 } else
259 ret = -EBUSY;
261 write_unlock_bh(&nf_ct_cache_lock);
262 up(&nf_ct_cache_mutex);
263 return ret;
265 write_unlock_bh(&nf_ct_cache_lock);
268 * The memory space for name of slab cache must be alive until
269 * cache is destroyed.
271 cache_name = kmalloc(sizeof(char)*NF_CT_FEATURES_NAMELEN, GFP_ATOMIC);
272 if (cache_name == NULL) {
273 DEBUGP("nf_conntrack_register_cache: can't alloc cache_name\n");
274 ret = -ENOMEM;
275 goto out_up_mutex;
278 if (strlcpy(cache_name, name, NF_CT_FEATURES_NAMELEN)
279 >= NF_CT_FEATURES_NAMELEN) {
280 printk("nf_conntrack_register_cache: name too long\n");
281 ret = -EINVAL;
282 goto out_free_name;
285 cachep = kmem_cache_create(cache_name, size, 0, 0,
286 NULL, NULL);
287 if (!cachep) {
288 printk("nf_conntrack_register_cache: Can't create slab cache "
289 "for the features = 0x%x\n", features);
290 ret = -ENOMEM;
291 goto out_free_name;
294 write_lock_bh(&nf_ct_cache_lock);
295 nf_ct_cache[features].use = 1;
296 nf_ct_cache[features].size = size;
297 nf_ct_cache[features].init_conntrack = init;
298 nf_ct_cache[features].cachep = cachep;
299 nf_ct_cache[features].name = cache_name;
300 write_unlock_bh(&nf_ct_cache_lock);
302 goto out_up_mutex;
304 out_free_name:
305 kfree(cache_name);
306 out_up_mutex:
307 up(&nf_ct_cache_mutex);
308 return ret;
311 /* FIXME: In the current, only nf_conntrack_cleanup() can call this function. */
312 void nf_conntrack_unregister_cache(u_int32_t features)
314 kmem_cache_t *cachep;
315 char *name;
318 * This assures that kmem_cache_create() isn't called before destroying
319 * slab cache.
321 DEBUGP("nf_conntrack_unregister_cache: 0x%04x\n", features);
322 down(&nf_ct_cache_mutex);
324 write_lock_bh(&nf_ct_cache_lock);
325 if (--nf_ct_cache[features].use > 0) {
326 write_unlock_bh(&nf_ct_cache_lock);
327 up(&nf_ct_cache_mutex);
328 return;
330 cachep = nf_ct_cache[features].cachep;
331 name = nf_ct_cache[features].name;
332 nf_ct_cache[features].cachep = NULL;
333 nf_ct_cache[features].name = NULL;
334 nf_ct_cache[features].init_conntrack = NULL;
335 nf_ct_cache[features].size = 0;
336 write_unlock_bh(&nf_ct_cache_lock);
338 synchronize_net();
340 kmem_cache_destroy(cachep);
341 kfree(name);
343 up(&nf_ct_cache_mutex);
347 nf_ct_get_tuple(const struct sk_buff *skb,
348 unsigned int nhoff,
349 unsigned int dataoff,
350 u_int16_t l3num,
351 u_int8_t protonum,
352 struct nf_conntrack_tuple *tuple,
353 const struct nf_conntrack_l3proto *l3proto,
354 const struct nf_conntrack_protocol *protocol)
356 NF_CT_TUPLE_U_BLANK(tuple);
358 tuple->src.l3num = l3num;
359 if (l3proto->pkt_to_tuple(skb, nhoff, tuple) == 0)
360 return 0;
362 tuple->dst.protonum = protonum;
363 tuple->dst.dir = IP_CT_DIR_ORIGINAL;
365 return protocol->pkt_to_tuple(skb, dataoff, tuple);
369 nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
370 const struct nf_conntrack_tuple *orig,
371 const struct nf_conntrack_l3proto *l3proto,
372 const struct nf_conntrack_protocol *protocol)
374 NF_CT_TUPLE_U_BLANK(inverse);
376 inverse->src.l3num = orig->src.l3num;
377 if (l3proto->invert_tuple(inverse, orig) == 0)
378 return 0;
380 inverse->dst.dir = !orig->dst.dir;
382 inverse->dst.protonum = orig->dst.protonum;
383 return protocol->invert_tuple(inverse, orig);
386 /* nf_conntrack_expect helper functions */
387 static void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
389 ASSERT_WRITE_LOCK(&nf_conntrack_lock);
390 NF_CT_ASSERT(!timer_pending(&exp->timeout));
391 list_del(&exp->list);
392 NF_CT_STAT_INC(expect_delete);
393 exp->master->expecting--;
394 nf_conntrack_expect_put(exp);
397 static void expectation_timed_out(unsigned long ul_expect)
399 struct nf_conntrack_expect *exp = (void *)ul_expect;
401 write_lock_bh(&nf_conntrack_lock);
402 nf_ct_unlink_expect(exp);
403 write_unlock_bh(&nf_conntrack_lock);
404 nf_conntrack_expect_put(exp);
407 /* If an expectation for this connection is found, it gets delete from
408 * global list then returned. */
409 static struct nf_conntrack_expect *
410 find_expectation(const struct nf_conntrack_tuple *tuple)
412 struct nf_conntrack_expect *i;
414 list_for_each_entry(i, &nf_conntrack_expect_list, list) {
415 /* If master is not in hash table yet (ie. packet hasn't left
416 this machine yet), how can other end know about expected?
417 Hence these are not the droids you are looking for (if
418 master ct never got confirmed, we'd hold a reference to it
419 and weird things would happen to future packets). */
420 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)
421 && nf_ct_is_confirmed(i->master)) {
422 if (i->flags & NF_CT_EXPECT_PERMANENT) {
423 atomic_inc(&i->use);
424 return i;
425 } else if (del_timer(&i->timeout)) {
426 nf_ct_unlink_expect(i);
427 return i;
431 return NULL;
434 /* delete all expectations for this conntrack */
435 static void remove_expectations(struct nf_conn *ct)
437 struct nf_conntrack_expect *i, *tmp;
439 /* Optimization: most connection never expect any others. */
440 if (ct->expecting == 0)
441 return;
443 list_for_each_entry_safe(i, tmp, &nf_conntrack_expect_list, list) {
444 if (i->master == ct && del_timer(&i->timeout)) {
445 nf_ct_unlink_expect(i);
446 nf_conntrack_expect_put(i);
451 static void
452 clean_from_lists(struct nf_conn *ct)
454 unsigned int ho, hr;
456 DEBUGP("clean_from_lists(%p)\n", ct);
457 ASSERT_WRITE_LOCK(&nf_conntrack_lock);
459 ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
460 hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
461 LIST_DELETE(&nf_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
462 LIST_DELETE(&nf_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
464 /* Destroy all pending expectations */
465 remove_expectations(ct);
468 static void
469 destroy_conntrack(struct nf_conntrack *nfct)
471 struct nf_conn *ct = (struct nf_conn *)nfct;
472 struct nf_conntrack_l3proto *l3proto;
473 struct nf_conntrack_protocol *proto;
475 DEBUGP("destroy_conntrack(%p)\n", ct);
476 NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
477 NF_CT_ASSERT(!timer_pending(&ct->timeout));
479 nf_conntrack_event(IPCT_DESTROY, ct);
480 set_bit(IPS_DYING_BIT, &ct->status);
482 /* To make sure we don't get any weird locking issues here:
483 * destroy_conntrack() MUST NOT be called with a write lock
484 * to nf_conntrack_lock!!! -HW */
485 l3proto = nf_ct_find_l3proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num);
486 if (l3proto && l3proto->destroy)
487 l3proto->destroy(ct);
489 proto = nf_ct_find_proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num,
490 ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
491 if (proto && proto->destroy)
492 proto->destroy(ct);
494 if (nf_conntrack_destroyed)
495 nf_conntrack_destroyed(ct);
497 write_lock_bh(&nf_conntrack_lock);
498 /* Expectations will have been removed in clean_from_lists,
499 * except TFTP can create an expectation on the first packet,
500 * before connection is in the list, so we need to clean here,
501 * too. */
502 remove_expectations(ct);
504 /* We overload first tuple to link into unconfirmed list. */
505 if (!nf_ct_is_confirmed(ct)) {
506 BUG_ON(list_empty(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list));
507 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
510 NF_CT_STAT_INC(delete);
511 write_unlock_bh(&nf_conntrack_lock);
513 if (ct->master)
514 nf_ct_put(ct->master);
516 DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
517 nf_conntrack_free(ct);
520 static void death_by_timeout(unsigned long ul_conntrack)
522 struct nf_conn *ct = (void *)ul_conntrack;
524 write_lock_bh(&nf_conntrack_lock);
525 /* Inside lock so preempt is disabled on module removal path.
526 * Otherwise we can get spurious warnings. */
527 NF_CT_STAT_INC(delete_list);
528 clean_from_lists(ct);
529 write_unlock_bh(&nf_conntrack_lock);
530 nf_ct_put(ct);
533 static inline int
534 conntrack_tuple_cmp(const struct nf_conntrack_tuple_hash *i,
535 const struct nf_conntrack_tuple *tuple,
536 const struct nf_conn *ignored_conntrack)
538 ASSERT_READ_LOCK(&nf_conntrack_lock);
539 return nf_ct_tuplehash_to_ctrack(i) != ignored_conntrack
540 && nf_ct_tuple_equal(tuple, &i->tuple);
543 static struct nf_conntrack_tuple_hash *
544 __nf_conntrack_find(const struct nf_conntrack_tuple *tuple,
545 const struct nf_conn *ignored_conntrack)
547 struct nf_conntrack_tuple_hash *h;
548 unsigned int hash = hash_conntrack(tuple);
550 ASSERT_READ_LOCK(&nf_conntrack_lock);
551 list_for_each_entry(h, &nf_conntrack_hash[hash], list) {
552 if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) {
553 NF_CT_STAT_INC(found);
554 return h;
556 NF_CT_STAT_INC(searched);
559 return NULL;
562 /* Find a connection corresponding to a tuple. */
563 struct nf_conntrack_tuple_hash *
564 nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple,
565 const struct nf_conn *ignored_conntrack)
567 struct nf_conntrack_tuple_hash *h;
569 read_lock_bh(&nf_conntrack_lock);
570 h = __nf_conntrack_find(tuple, ignored_conntrack);
571 if (h)
572 atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use);
573 read_unlock_bh(&nf_conntrack_lock);
575 return h;
578 /* Confirm a connection given skb; places it in hash table */
580 __nf_conntrack_confirm(struct sk_buff **pskb)
582 unsigned int hash, repl_hash;
583 struct nf_conn *ct;
584 enum ip_conntrack_info ctinfo;
586 ct = nf_ct_get(*pskb, &ctinfo);
588 /* ipt_REJECT uses nf_conntrack_attach to attach related
589 ICMP/TCP RST packets in other direction. Actual packet
590 which created connection will be IP_CT_NEW or for an
591 expected connection, IP_CT_RELATED. */
592 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
593 return NF_ACCEPT;
595 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
596 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
598 /* We're not in hash table, and we refuse to set up related
599 connections for unconfirmed conns. But packet copies and
600 REJECT will give spurious warnings here. */
601 /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
603 /* No external references means noone else could have
604 confirmed us. */
605 NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
606 DEBUGP("Confirming conntrack %p\n", ct);
608 write_lock_bh(&nf_conntrack_lock);
610 /* See if there's one in the list already, including reverse:
611 NAT could have grabbed it without realizing, since we're
612 not in the hash. If there is, we lost race. */
613 if (!LIST_FIND(&nf_conntrack_hash[hash],
614 conntrack_tuple_cmp,
615 struct nf_conntrack_tuple_hash *,
616 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
617 && !LIST_FIND(&nf_conntrack_hash[repl_hash],
618 conntrack_tuple_cmp,
619 struct nf_conntrack_tuple_hash *,
620 &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
621 /* Remove from unconfirmed list */
622 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
624 list_prepend(&nf_conntrack_hash[hash],
625 &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
626 list_prepend(&nf_conntrack_hash[repl_hash],
627 &ct->tuplehash[IP_CT_DIR_REPLY]);
628 /* Timer relative to confirmation time, not original
629 setting time, otherwise we'd get timer wrap in
630 weird delay cases. */
631 ct->timeout.expires += jiffies;
632 add_timer(&ct->timeout);
633 atomic_inc(&ct->ct_general.use);
634 set_bit(IPS_CONFIRMED_BIT, &ct->status);
635 NF_CT_STAT_INC(insert);
636 write_unlock_bh(&nf_conntrack_lock);
637 if (ct->helper)
638 nf_conntrack_event_cache(IPCT_HELPER, *pskb);
639 #ifdef CONFIG_NF_NAT_NEEDED
640 if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
641 test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
642 nf_conntrack_event_cache(IPCT_NATINFO, *pskb);
643 #endif
644 nf_conntrack_event_cache(master_ct(ct) ?
645 IPCT_RELATED : IPCT_NEW, *pskb);
646 return NF_ACCEPT;
649 NF_CT_STAT_INC(insert_failed);
650 write_unlock_bh(&nf_conntrack_lock);
651 return NF_DROP;
654 /* Returns true if a connection correspondings to the tuple (required
655 for NAT). */
657 nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
658 const struct nf_conn *ignored_conntrack)
660 struct nf_conntrack_tuple_hash *h;
662 read_lock_bh(&nf_conntrack_lock);
663 h = __nf_conntrack_find(tuple, ignored_conntrack);
664 read_unlock_bh(&nf_conntrack_lock);
666 return h != NULL;
669 /* There's a small race here where we may free a just-assured
670 connection. Too bad: we're in trouble anyway. */
671 static inline int unreplied(const struct nf_conntrack_tuple_hash *i)
673 return !(test_bit(IPS_ASSURED_BIT,
674 &nf_ct_tuplehash_to_ctrack(i)->status));
677 static int early_drop(struct list_head *chain)
679 /* Traverse backwards: gives us oldest, which is roughly LRU */
680 struct nf_conntrack_tuple_hash *h;
681 struct nf_conn *ct = NULL;
682 int dropped = 0;
684 read_lock_bh(&nf_conntrack_lock);
685 h = LIST_FIND_B(chain, unreplied, struct nf_conntrack_tuple_hash *);
686 if (h) {
687 ct = nf_ct_tuplehash_to_ctrack(h);
688 atomic_inc(&ct->ct_general.use);
690 read_unlock_bh(&nf_conntrack_lock);
692 if (!ct)
693 return dropped;
695 if (del_timer(&ct->timeout)) {
696 death_by_timeout((unsigned long)ct);
697 dropped = 1;
698 NF_CT_STAT_INC(early_drop);
700 nf_ct_put(ct);
701 return dropped;
704 static inline int helper_cmp(const struct nf_conntrack_helper *i,
705 const struct nf_conntrack_tuple *rtuple)
707 return nf_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask);
710 static struct nf_conntrack_helper *
711 nf_ct_find_helper(const struct nf_conntrack_tuple *tuple)
713 return LIST_FIND(&helpers, helper_cmp,
714 struct nf_conntrack_helper *,
715 tuple);
718 static struct nf_conn *
719 __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
720 const struct nf_conntrack_tuple *repl,
721 const struct nf_conntrack_l3proto *l3proto)
723 struct nf_conn *conntrack = NULL;
724 u_int32_t features = 0;
726 if (!nf_conntrack_hash_rnd_initted) {
727 get_random_bytes(&nf_conntrack_hash_rnd, 4);
728 nf_conntrack_hash_rnd_initted = 1;
731 if (nf_conntrack_max
732 && atomic_read(&nf_conntrack_count) >= nf_conntrack_max) {
733 unsigned int hash = hash_conntrack(orig);
734 /* Try dropping from this hash chain. */
735 if (!early_drop(&nf_conntrack_hash[hash])) {
736 if (net_ratelimit())
737 printk(KERN_WARNING
738 "nf_conntrack: table full, dropping"
739 " packet.\n");
740 return ERR_PTR(-ENOMEM);
744 /* find features needed by this conntrack. */
745 features = l3proto->get_features(orig);
746 read_lock_bh(&nf_conntrack_lock);
747 if (nf_ct_find_helper(repl) != NULL)
748 features |= NF_CT_F_HELP;
749 read_unlock_bh(&nf_conntrack_lock);
751 DEBUGP("nf_conntrack_alloc: features=0x%x\n", features);
753 read_lock_bh(&nf_ct_cache_lock);
755 if (!nf_ct_cache[features].use) {
756 DEBUGP("nf_conntrack_alloc: not supported features = 0x%x\n",
757 features);
758 goto out;
761 conntrack = kmem_cache_alloc(nf_ct_cache[features].cachep, GFP_ATOMIC);
762 if (conntrack == NULL) {
763 DEBUGP("nf_conntrack_alloc: Can't alloc conntrack from cache\n");
764 goto out;
767 memset(conntrack, 0, nf_ct_cache[features].size);
768 conntrack->features = features;
769 if (nf_ct_cache[features].init_conntrack &&
770 nf_ct_cache[features].init_conntrack(conntrack, features) < 0) {
771 DEBUGP("nf_conntrack_alloc: failed to init\n");
772 kmem_cache_free(nf_ct_cache[features].cachep, conntrack);
773 conntrack = NULL;
774 goto out;
777 atomic_set(&conntrack->ct_general.use, 1);
778 conntrack->ct_general.destroy = destroy_conntrack;
779 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
780 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
781 /* Don't set timer yet: wait for confirmation */
782 init_timer(&conntrack->timeout);
783 conntrack->timeout.data = (unsigned long)conntrack;
784 conntrack->timeout.function = death_by_timeout;
786 atomic_inc(&nf_conntrack_count);
787 out:
788 read_unlock_bh(&nf_ct_cache_lock);
789 return conntrack;
792 struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
793 const struct nf_conntrack_tuple *repl)
795 struct nf_conntrack_l3proto *l3proto;
797 l3proto = nf_ct_find_l3proto(orig->src.l3num);
798 return __nf_conntrack_alloc(orig, repl, l3proto);
801 void nf_conntrack_free(struct nf_conn *conntrack)
803 u_int32_t features = conntrack->features;
804 NF_CT_ASSERT(features >= NF_CT_F_BASIC && features < NF_CT_F_NUM);
805 DEBUGP("nf_conntrack_free: features = 0x%x, conntrack=%p\n", features,
806 conntrack);
807 kmem_cache_free(nf_ct_cache[features].cachep, conntrack);
808 atomic_dec(&nf_conntrack_count);
811 /* Allocate a new conntrack: we return -ENOMEM if classification
812 failed due to stress. Otherwise it really is unclassifiable. */
813 static struct nf_conntrack_tuple_hash *
814 init_conntrack(const struct nf_conntrack_tuple *tuple,
815 struct nf_conntrack_l3proto *l3proto,
816 struct nf_conntrack_protocol *protocol,
817 struct sk_buff *skb,
818 unsigned int dataoff)
820 struct nf_conn *conntrack;
821 struct nf_conntrack_tuple repl_tuple;
822 struct nf_conntrack_expect *exp;
824 if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, protocol)) {
825 DEBUGP("Can't invert tuple.\n");
826 return NULL;
829 conntrack = __nf_conntrack_alloc(tuple, &repl_tuple, l3proto);
830 if (conntrack == NULL || IS_ERR(conntrack)) {
831 DEBUGP("Can't allocate conntrack.\n");
832 return (struct nf_conntrack_tuple_hash *)conntrack;
835 if (!protocol->new(conntrack, skb, dataoff)) {
836 nf_conntrack_free(conntrack);
837 DEBUGP("init conntrack: can't track with proto module\n");
838 return NULL;
841 write_lock_bh(&nf_conntrack_lock);
842 exp = find_expectation(tuple);
844 if (exp) {
845 DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
846 conntrack, exp);
847 /* Welcome, Mr. Bond. We've been expecting you... */
848 __set_bit(IPS_EXPECTED_BIT, &conntrack->status);
849 conntrack->master = exp->master;
850 #ifdef CONFIG_NF_CONNTRACK_MARK
851 conntrack->mark = exp->master->mark;
852 #endif
853 nf_conntrack_get(&conntrack->master->ct_general);
854 NF_CT_STAT_INC(expect_new);
855 } else {
856 conntrack->helper = nf_ct_find_helper(&repl_tuple);
858 NF_CT_STAT_INC(new);
861 /* Overload tuple linked list to put us in unconfirmed list. */
862 list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
864 write_unlock_bh(&nf_conntrack_lock);
866 if (exp) {
867 if (exp->expectfn)
868 exp->expectfn(conntrack, exp);
869 nf_conntrack_expect_put(exp);
872 return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
875 /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
876 static inline struct nf_conn *
877 resolve_normal_ct(struct sk_buff *skb,
878 unsigned int dataoff,
879 u_int16_t l3num,
880 u_int8_t protonum,
881 struct nf_conntrack_l3proto *l3proto,
882 struct nf_conntrack_protocol *proto,
883 int *set_reply,
884 enum ip_conntrack_info *ctinfo)
886 struct nf_conntrack_tuple tuple;
887 struct nf_conntrack_tuple_hash *h;
888 struct nf_conn *ct;
890 if (!nf_ct_get_tuple(skb, (unsigned int)(skb->nh.raw - skb->data),
891 dataoff, l3num, protonum, &tuple, l3proto,
892 proto)) {
893 DEBUGP("resolve_normal_ct: Can't get tuple\n");
894 return NULL;
897 /* look for tuple match */
898 h = nf_conntrack_find_get(&tuple, NULL);
899 if (!h) {
900 h = init_conntrack(&tuple, l3proto, proto, skb, dataoff);
901 if (!h)
902 return NULL;
903 if (IS_ERR(h))
904 return (void *)h;
906 ct = nf_ct_tuplehash_to_ctrack(h);
908 /* It exists; we have (non-exclusive) reference. */
909 if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) {
910 *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
911 /* Please set reply bit if this packet OK */
912 *set_reply = 1;
913 } else {
914 /* Once we've had two way comms, always ESTABLISHED. */
915 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
916 DEBUGP("nf_conntrack_in: normal packet for %p\n", ct);
917 *ctinfo = IP_CT_ESTABLISHED;
918 } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
919 DEBUGP("nf_conntrack_in: related packet for %p\n", ct);
920 *ctinfo = IP_CT_RELATED;
921 } else {
922 DEBUGP("nf_conntrack_in: new packet for %p\n", ct);
923 *ctinfo = IP_CT_NEW;
925 *set_reply = 0;
927 skb->nfct = &ct->ct_general;
928 skb->nfctinfo = *ctinfo;
929 return ct;
932 unsigned int
933 nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb)
935 struct nf_conn *ct;
936 enum ip_conntrack_info ctinfo;
937 struct nf_conntrack_l3proto *l3proto;
938 struct nf_conntrack_protocol *proto;
939 unsigned int dataoff;
940 u_int8_t protonum;
941 int set_reply = 0;
942 int ret;
944 /* Previously seen (loopback or untracked)? Ignore. */
945 if ((*pskb)->nfct) {
946 NF_CT_STAT_INC(ignore);
947 return NF_ACCEPT;
950 l3proto = nf_ct_find_l3proto((u_int16_t)pf);
951 if ((ret = l3proto->prepare(pskb, hooknum, &dataoff, &protonum)) <= 0) {
952 DEBUGP("not prepared to track yet or error occured\n");
953 return -ret;
956 proto = nf_ct_find_proto((u_int16_t)pf, protonum);
958 /* It may be an special packet, error, unclean...
959 * inverse of the return code tells to the netfilter
960 * core what to do with the packet. */
961 if (proto->error != NULL &&
962 (ret = proto->error(*pskb, dataoff, &ctinfo, pf, hooknum)) <= 0) {
963 NF_CT_STAT_INC(error);
964 NF_CT_STAT_INC(invalid);
965 return -ret;
968 ct = resolve_normal_ct(*pskb, dataoff, pf, protonum, l3proto, proto,
969 &set_reply, &ctinfo);
970 if (!ct) {
971 /* Not valid part of a connection */
972 NF_CT_STAT_INC(invalid);
973 return NF_ACCEPT;
976 if (IS_ERR(ct)) {
977 /* Too stressed to deal. */
978 NF_CT_STAT_INC(drop);
979 return NF_DROP;
982 NF_CT_ASSERT((*pskb)->nfct);
984 ret = proto->packet(ct, *pskb, dataoff, ctinfo, pf, hooknum);
985 if (ret < 0) {
986 /* Invalid: inverse of the return code tells
987 * the netfilter core what to do */
988 DEBUGP("nf_conntrack_in: Can't track with proto module\n");
989 nf_conntrack_put((*pskb)->nfct);
990 (*pskb)->nfct = NULL;
991 NF_CT_STAT_INC(invalid);
992 return -ret;
995 if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
996 nf_conntrack_event_cache(IPCT_STATUS, *pskb);
998 return ret;
1001 int nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
1002 const struct nf_conntrack_tuple *orig)
1004 return nf_ct_invert_tuple(inverse, orig,
1005 nf_ct_find_l3proto(orig->src.l3num),
1006 nf_ct_find_proto(orig->src.l3num,
1007 orig->dst.protonum));
1010 /* Would two expected things clash? */
1011 static inline int expect_clash(const struct nf_conntrack_expect *a,
1012 const struct nf_conntrack_expect *b)
1014 /* Part covered by intersection of masks must be unequal,
1015 otherwise they clash */
1016 struct nf_conntrack_tuple intersect_mask;
1017 int count;
1019 intersect_mask.src.l3num = a->mask.src.l3num & b->mask.src.l3num;
1020 intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
1021 intersect_mask.dst.u.all = a->mask.dst.u.all & b->mask.dst.u.all;
1022 intersect_mask.dst.protonum = a->mask.dst.protonum
1023 & b->mask.dst.protonum;
1025 for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
1026 intersect_mask.src.u3.all[count] =
1027 a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
1030 for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
1031 intersect_mask.dst.u3.all[count] =
1032 a->mask.dst.u3.all[count] & b->mask.dst.u3.all[count];
1035 return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
1038 static inline int expect_matches(const struct nf_conntrack_expect *a,
1039 const struct nf_conntrack_expect *b)
1041 return a->master == b->master
1042 && nf_ct_tuple_equal(&a->tuple, &b->tuple)
1043 && nf_ct_tuple_equal(&a->mask, &b->mask);
1046 /* Generally a bad idea to call this: could have matched already. */
1047 void nf_conntrack_unexpect_related(struct nf_conntrack_expect *exp)
1049 struct nf_conntrack_expect *i;
1051 write_lock_bh(&nf_conntrack_lock);
1052 /* choose the the oldest expectation to evict */
1053 list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) {
1054 if (expect_matches(i, exp) && del_timer(&i->timeout)) {
1055 nf_ct_unlink_expect(i);
1056 write_unlock_bh(&nf_conntrack_lock);
1057 nf_conntrack_expect_put(i);
1058 return;
1061 write_unlock_bh(&nf_conntrack_lock);
1064 /* We don't increase the master conntrack refcount for non-fulfilled
1065 * conntracks. During the conntrack destruction, the expectations are
1066 * always killed before the conntrack itself */
1067 struct nf_conntrack_expect *nf_conntrack_expect_alloc(struct nf_conn *me)
1069 struct nf_conntrack_expect *new;
1071 new = kmem_cache_alloc(nf_conntrack_expect_cachep, GFP_ATOMIC);
1072 if (!new) {
1073 DEBUGP("expect_related: OOM allocating expect\n");
1074 return NULL;
1076 new->master = me;
1077 atomic_set(&new->use, 1);
1078 return new;
1081 void nf_conntrack_expect_put(struct nf_conntrack_expect *exp)
1083 if (atomic_dec_and_test(&exp->use))
1084 kmem_cache_free(nf_conntrack_expect_cachep, exp);
1087 static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp)
1089 atomic_inc(&exp->use);
1090 exp->master->expecting++;
1091 list_add(&exp->list, &nf_conntrack_expect_list);
1093 init_timer(&exp->timeout);
1094 exp->timeout.data = (unsigned long)exp;
1095 exp->timeout.function = expectation_timed_out;
1096 exp->timeout.expires = jiffies + exp->master->helper->timeout * HZ;
1097 add_timer(&exp->timeout);
1099 atomic_inc(&exp->use);
1100 NF_CT_STAT_INC(expect_create);
1103 /* Race with expectations being used means we could have none to find; OK. */
1104 static void evict_oldest_expect(struct nf_conn *master)
1106 struct nf_conntrack_expect *i;
1108 list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) {
1109 if (i->master == master) {
1110 if (del_timer(&i->timeout)) {
1111 nf_ct_unlink_expect(i);
1112 nf_conntrack_expect_put(i);
1114 break;
1119 static inline int refresh_timer(struct nf_conntrack_expect *i)
1121 if (!del_timer(&i->timeout))
1122 return 0;
1124 i->timeout.expires = jiffies + i->master->helper->timeout*HZ;
1125 add_timer(&i->timeout);
1126 return 1;
1129 int nf_conntrack_expect_related(struct nf_conntrack_expect *expect)
1131 struct nf_conntrack_expect *i;
1132 int ret;
1134 DEBUGP("nf_conntrack_expect_related %p\n", related_to);
1135 DEBUGP("tuple: "); NF_CT_DUMP_TUPLE(&expect->tuple);
1136 DEBUGP("mask: "); NF_CT_DUMP_TUPLE(&expect->mask);
1138 write_lock_bh(&nf_conntrack_lock);
1139 list_for_each_entry(i, &nf_conntrack_expect_list, list) {
1140 if (expect_matches(i, expect)) {
1141 /* Refresh timer: if it's dying, ignore.. */
1142 if (refresh_timer(i)) {
1143 ret = 0;
1144 goto out;
1146 } else if (expect_clash(i, expect)) {
1147 ret = -EBUSY;
1148 goto out;
1151 /* Will be over limit? */
1152 if (expect->master->helper->max_expected &&
1153 expect->master->expecting >= expect->master->helper->max_expected)
1154 evict_oldest_expect(expect->master);
1156 nf_conntrack_expect_insert(expect);
1157 nf_conntrack_expect_event(IPEXP_NEW, expect);
1158 ret = 0;
1159 out:
1160 write_unlock_bh(&nf_conntrack_lock);
1161 return ret;
1164 /* Alter reply tuple (maybe alter helper). This is for NAT, and is
1165 implicitly racy: see __nf_conntrack_confirm */
1166 void nf_conntrack_alter_reply(struct nf_conn *conntrack,
1167 const struct nf_conntrack_tuple *newreply)
1169 write_lock_bh(&nf_conntrack_lock);
1170 /* Should be unconfirmed, so not in hash table yet */
1171 NF_CT_ASSERT(!nf_ct_is_confirmed(conntrack));
1173 DEBUGP("Altering reply tuple of %p to ", conntrack);
1174 NF_CT_DUMP_TUPLE(newreply);
1176 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
1177 if (!conntrack->master && conntrack->expecting == 0)
1178 conntrack->helper = nf_ct_find_helper(newreply);
1179 write_unlock_bh(&nf_conntrack_lock);
1182 int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
1184 int ret;
1185 BUG_ON(me->timeout == 0);
1187 ret = nf_conntrack_register_cache(NF_CT_F_HELP, "nf_conntrack:help",
1188 sizeof(struct nf_conn)
1189 + sizeof(union nf_conntrack_help)
1190 + __alignof__(union nf_conntrack_help),
1191 init_conntrack_for_helper);
1192 if (ret < 0) {
1193 printk(KERN_ERR "nf_conntrack_helper_reigster: Unable to create slab cache for conntracks\n");
1194 return ret;
1196 write_lock_bh(&nf_conntrack_lock);
1197 list_prepend(&helpers, me);
1198 write_unlock_bh(&nf_conntrack_lock);
1200 return 0;
1203 static inline int unhelp(struct nf_conntrack_tuple_hash *i,
1204 const struct nf_conntrack_helper *me)
1206 if (nf_ct_tuplehash_to_ctrack(i)->helper == me) {
1207 nf_conntrack_event(IPCT_HELPER, nf_ct_tuplehash_to_ctrack(i));
1208 nf_ct_tuplehash_to_ctrack(i)->helper = NULL;
1210 return 0;
1213 void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
1215 unsigned int i;
1216 struct nf_conntrack_expect *exp, *tmp;
1218 /* Need write lock here, to delete helper. */
1219 write_lock_bh(&nf_conntrack_lock);
1220 LIST_DELETE(&helpers, me);
1222 /* Get rid of expectations */
1223 list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list, list) {
1224 if (exp->master->helper == me && del_timer(&exp->timeout)) {
1225 nf_ct_unlink_expect(exp);
1226 nf_conntrack_expect_put(exp);
1230 /* Get rid of expecteds, set helpers to NULL. */
1231 LIST_FIND_W(&unconfirmed, unhelp, struct nf_conntrack_tuple_hash*, me);
1232 for (i = 0; i < nf_conntrack_htable_size; i++)
1233 LIST_FIND_W(&nf_conntrack_hash[i], unhelp,
1234 struct nf_conntrack_tuple_hash *, me);
1235 write_unlock_bh(&nf_conntrack_lock);
1237 /* Someone could be still looking at the helper in a bh. */
1238 synchronize_net();
1241 /* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
1242 void __nf_ct_refresh_acct(struct nf_conn *ct,
1243 enum ip_conntrack_info ctinfo,
1244 const struct sk_buff *skb,
1245 unsigned long extra_jiffies,
1246 int do_acct)
1248 int event = 0;
1250 NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
1251 NF_CT_ASSERT(skb);
1253 write_lock_bh(&nf_conntrack_lock);
1255 /* If not in hash table, timer will not be active yet */
1256 if (!nf_ct_is_confirmed(ct)) {
1257 ct->timeout.expires = extra_jiffies;
1258 event = IPCT_REFRESH;
1259 } else {
1260 /* Need del_timer for race avoidance (may already be dying). */
1261 if (del_timer(&ct->timeout)) {
1262 ct->timeout.expires = jiffies + extra_jiffies;
1263 add_timer(&ct->timeout);
1264 event = IPCT_REFRESH;
1268 #ifdef CONFIG_NF_CT_ACCT
1269 if (do_acct) {
1270 ct->counters[CTINFO2DIR(ctinfo)].packets++;
1271 ct->counters[CTINFO2DIR(ctinfo)].bytes +=
1272 skb->len - (unsigned int)(skb->nh.raw - skb->data);
1273 if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000)
1274 || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000))
1275 event |= IPCT_COUNTER_FILLING;
1277 #endif
1279 write_unlock_bh(&nf_conntrack_lock);
1281 /* must be unlocked when calling event cache */
1282 if (event)
1283 nf_conntrack_event_cache(event, skb);
1286 /* Used by ipt_REJECT and ip6t_REJECT. */
1287 void __nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
1289 struct nf_conn *ct;
1290 enum ip_conntrack_info ctinfo;
1292 /* This ICMP is in reverse direction to the packet which caused it */
1293 ct = nf_ct_get(skb, &ctinfo);
1294 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
1295 ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY;
1296 else
1297 ctinfo = IP_CT_RELATED;
1299 /* Attach to new skbuff, and increment count */
1300 nskb->nfct = &ct->ct_general;
1301 nskb->nfctinfo = ctinfo;
1302 nf_conntrack_get(nskb->nfct);
1305 static inline int
1306 do_iter(const struct nf_conntrack_tuple_hash *i,
1307 int (*iter)(struct nf_conn *i, void *data),
1308 void *data)
1310 return iter(nf_ct_tuplehash_to_ctrack(i), data);
1313 /* Bring out ya dead! */
1314 static struct nf_conntrack_tuple_hash *
1315 get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
1316 void *data, unsigned int *bucket)
1318 struct nf_conntrack_tuple_hash *h = NULL;
1320 write_lock_bh(&nf_conntrack_lock);
1321 for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
1322 h = LIST_FIND_W(&nf_conntrack_hash[*bucket], do_iter,
1323 struct nf_conntrack_tuple_hash *, iter, data);
1324 if (h)
1325 break;
1327 if (!h)
1328 h = LIST_FIND_W(&unconfirmed, do_iter,
1329 struct nf_conntrack_tuple_hash *, iter, data);
1330 if (h)
1331 atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use);
1332 write_unlock_bh(&nf_conntrack_lock);
1334 return h;
1337 void
1338 nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data)
1340 struct nf_conntrack_tuple_hash *h;
1341 unsigned int bucket = 0;
1343 while ((h = get_next_corpse(iter, data, &bucket)) != NULL) {
1344 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
1345 /* Time to push up daises... */
1346 if (del_timer(&ct->timeout))
1347 death_by_timeout((unsigned long)ct);
1348 /* ... else the timer will get him soon. */
1350 nf_ct_put(ct);
1354 static int kill_all(struct nf_conn *i, void *data)
1356 return 1;
1359 static void free_conntrack_hash(struct list_head *hash, int vmalloced, int size)
1361 if (vmalloced)
1362 vfree(hash);
1363 else
1364 free_pages((unsigned long)hash,
1365 get_order(sizeof(struct list_head) * size));
1368 /* Mishearing the voices in his head, our hero wonders how he's
1369 supposed to kill the mall. */
1370 void nf_conntrack_cleanup(void)
1372 int i;
1374 /* This makes sure all current packets have passed through
1375 netfilter framework. Roll on, two-stage module
1376 delete... */
1377 synchronize_net();
1379 nf_ct_event_cache_flush();
1380 i_see_dead_people:
1381 nf_ct_iterate_cleanup(kill_all, NULL);
1382 if (atomic_read(&nf_conntrack_count) != 0) {
1383 schedule();
1384 goto i_see_dead_people;
1386 /* wait until all references to nf_conntrack_untracked are dropped */
1387 while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1)
1388 schedule();
1390 for (i = 0; i < NF_CT_F_NUM; i++) {
1391 if (nf_ct_cache[i].use == 0)
1392 continue;
1394 NF_CT_ASSERT(nf_ct_cache[i].use == 1);
1395 nf_ct_cache[i].use = 1;
1396 nf_conntrack_unregister_cache(i);
1398 kmem_cache_destroy(nf_conntrack_expect_cachep);
1399 free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc,
1400 nf_conntrack_htable_size);
1402 /* free l3proto protocol tables */
1403 for (i = 0; i < PF_MAX; i++)
1404 if (nf_ct_protos[i]) {
1405 kfree(nf_ct_protos[i]);
1406 nf_ct_protos[i] = NULL;
1410 static struct list_head *alloc_hashtable(int size, int *vmalloced)
1412 struct list_head *hash;
1413 unsigned int i;
1415 *vmalloced = 0;
1416 hash = (void*)__get_free_pages(GFP_KERNEL,
1417 get_order(sizeof(struct list_head)
1418 * size));
1419 if (!hash) {
1420 *vmalloced = 1;
1421 printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n");
1422 hash = vmalloc(sizeof(struct list_head) * size);
1425 if (hash)
1426 for (i = 0; i < size; i++)
1427 INIT_LIST_HEAD(&hash[i]);
1429 return hash;
1432 int set_hashsize(const char *val, struct kernel_param *kp)
1434 int i, bucket, hashsize, vmalloced;
1435 int old_vmalloced, old_size;
1436 int rnd;
1437 struct list_head *hash, *old_hash;
1438 struct nf_conntrack_tuple_hash *h;
1440 /* On boot, we can set this without any fancy locking. */
1441 if (!nf_conntrack_htable_size)
1442 return param_set_uint(val, kp);
1444 hashsize = simple_strtol(val, NULL, 0);
1445 if (!hashsize)
1446 return -EINVAL;
1448 hash = alloc_hashtable(hashsize, &vmalloced);
1449 if (!hash)
1450 return -ENOMEM;
1452 /* We have to rehahs for the new table anyway, so we also can
1453 * use a newrandom seed */
1454 get_random_bytes(&rnd, 4);
1456 write_lock_bh(&nf_conntrack_lock);
1457 for (i = 0; i < nf_conntrack_htable_size; i++) {
1458 while (!list_empty(&nf_conntrack_hash[i])) {
1459 h = list_entry(nf_conntrack_hash[i].next,
1460 struct nf_conntrack_tuple_hash, list);
1461 list_del(&h->list);
1462 bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
1463 list_add_tail(&h->list, &hash[bucket]);
1466 old_size = nf_conntrack_htable_size;
1467 old_vmalloced = nf_conntrack_vmalloc;
1468 old_hash = nf_conntrack_hash;
1470 nf_conntrack_htable_size = hashsize;
1471 nf_conntrack_vmalloc = vmalloced;
1472 nf_conntrack_hash = hash;
1473 nf_conntrack_hash_rnd = rnd;
1474 write_unlock_bh(&nf_conntrack_lock);
1476 free_conntrack_hash(old_hash, old_vmalloced, old_size);
1477 return 0;
1480 module_param_call(hashsize, set_hashsize, param_get_uint,
1481 &nf_conntrack_htable_size, 0600);
1483 int __init nf_conntrack_init(void)
1485 unsigned int i;
1486 int ret;
1488 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
1489 * machine has 256 buckets. >= 1GB machines have 8192 buckets. */
1490 if (!nf_conntrack_htable_size) {
1491 nf_conntrack_htable_size
1492 = (((num_physpages << PAGE_SHIFT) / 16384)
1493 / sizeof(struct list_head));
1494 if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
1495 nf_conntrack_htable_size = 8192;
1496 if (nf_conntrack_htable_size < 16)
1497 nf_conntrack_htable_size = 16;
1499 nf_conntrack_max = 8 * nf_conntrack_htable_size;
1501 printk("nf_conntrack version %s (%u buckets, %d max)\n",
1502 NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
1503 nf_conntrack_max);
1505 nf_conntrack_hash = alloc_hashtable(nf_conntrack_htable_size,
1506 &nf_conntrack_vmalloc);
1507 if (!nf_conntrack_hash) {
1508 printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
1509 goto err_out;
1512 ret = nf_conntrack_register_cache(NF_CT_F_BASIC, "nf_conntrack:basic",
1513 sizeof(struct nf_conn), NULL);
1514 if (ret < 0) {
1515 printk(KERN_ERR "Unable to create nf_conn slab cache\n");
1516 goto err_free_hash;
1519 nf_conntrack_expect_cachep = kmem_cache_create("nf_conntrack_expect",
1520 sizeof(struct nf_conntrack_expect),
1521 0, 0, NULL, NULL);
1522 if (!nf_conntrack_expect_cachep) {
1523 printk(KERN_ERR "Unable to create nf_expect slab cache\n");
1524 goto err_free_conntrack_slab;
1527 /* Don't NEED lock here, but good form anyway. */
1528 write_lock_bh(&nf_conntrack_lock);
1529 for (i = 0; i < PF_MAX; i++)
1530 nf_ct_l3protos[i] = &nf_conntrack_generic_l3proto;
1531 write_unlock_bh(&nf_conntrack_lock);
1533 /* Set up fake conntrack:
1534 - to never be deleted, not in any hashes */
1535 atomic_set(&nf_conntrack_untracked.ct_general.use, 1);
1536 /* - and look it like as a confirmed connection */
1537 set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status);
1539 return ret;
1541 err_free_conntrack_slab:
1542 nf_conntrack_unregister_cache(NF_CT_F_BASIC);
1543 err_free_hash:
1544 free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc,
1545 nf_conntrack_htable_size);
1546 err_out:
1547 return -ENOMEM;