[PATCH] synclink_gt fix size of register value storage
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / net / netfilter / nf_conntrack_core.c
blob62bb509f05d4fc4bf32a7b0eb9db0ff0c8e972c5
1 /* Connection state tracking for netfilter. This is separated from,
2 but required by, the NAT layer; it can also be used by an iptables
3 extension. */
5 /* (C) 1999-2001 Paul `Rusty' Russell
6 * (C) 2002-2005 Netfilter Core Team <coreteam@netfilter.org>
7 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
13 * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
14 * - new API and handling of conntrack/nat helpers
15 * - now capable of multiple expectations for one master
16 * 16 Jul 2002: Harald Welte <laforge@gnumonks.org>
17 * - add usage/reference counts to ip_conntrack_expect
18 * - export ip_conntrack[_expect]_{find_get,put} functions
19 * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
20 * - generalize L3 protocol denendent part.
21 * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
22 * - add support various size of conntrack structures.
24 * Derived from net/ipv4/netfilter/ip_conntrack_core.c
27 #include <linux/config.h>
28 #include <linux/types.h>
29 #include <linux/netfilter.h>
30 #include <linux/module.h>
31 #include <linux/skbuff.h>
32 #include <linux/proc_fs.h>
33 #include <linux/vmalloc.h>
34 #include <linux/stddef.h>
35 #include <linux/slab.h>
36 #include <linux/random.h>
37 #include <linux/jhash.h>
38 #include <linux/err.h>
39 #include <linux/percpu.h>
40 #include <linux/moduleparam.h>
41 #include <linux/notifier.h>
42 #include <linux/kernel.h>
43 #include <linux/netdevice.h>
44 #include <linux/socket.h>
46 /* This rwlock protects the main hash table, protocol/helper/expected
47 registrations, conntrack timers*/
48 #define ASSERT_READ_LOCK(x)
49 #define ASSERT_WRITE_LOCK(x)
51 #include <net/netfilter/nf_conntrack.h>
52 #include <net/netfilter/nf_conntrack_l3proto.h>
53 #include <net/netfilter/nf_conntrack_protocol.h>
54 #include <net/netfilter/nf_conntrack_helper.h>
55 #include <net/netfilter/nf_conntrack_core.h>
56 #include <linux/netfilter_ipv4/listhelp.h>
58 #define NF_CONNTRACK_VERSION "0.4.1"
60 #if 0
61 #define DEBUGP printk
62 #else
63 #define DEBUGP(format, args...)
64 #endif
66 DEFINE_RWLOCK(nf_conntrack_lock);
68 /* nf_conntrack_standalone needs this */
69 atomic_t nf_conntrack_count = ATOMIC_INIT(0);
71 void (*nf_conntrack_destroyed)(struct nf_conn *conntrack) = NULL;
72 LIST_HEAD(nf_conntrack_expect_list);
73 struct nf_conntrack_protocol **nf_ct_protos[PF_MAX];
74 struct nf_conntrack_l3proto *nf_ct_l3protos[PF_MAX];
75 static LIST_HEAD(helpers);
76 unsigned int nf_conntrack_htable_size = 0;
77 int nf_conntrack_max;
78 struct list_head *nf_conntrack_hash;
79 static kmem_cache_t *nf_conntrack_expect_cachep;
80 struct nf_conn nf_conntrack_untracked;
81 unsigned int nf_ct_log_invalid;
82 static LIST_HEAD(unconfirmed);
83 static int nf_conntrack_vmalloc;
85 static unsigned int nf_conntrack_next_id = 1;
86 static unsigned int nf_conntrack_expect_next_id = 1;
87 #ifdef CONFIG_NF_CONNTRACK_EVENTS
88 struct notifier_block *nf_conntrack_chain;
89 struct notifier_block *nf_conntrack_expect_chain;
91 DEFINE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache);
93 /* deliver cached events and clear cache entry - must be called with locally
94 * disabled softirqs */
95 static inline void
96 __nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache)
98 DEBUGP("ecache: delivering events for %p\n", ecache->ct);
99 if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct)
100 && ecache->events)
101 notifier_call_chain(&nf_conntrack_chain, ecache->events,
102 ecache->ct);
104 ecache->events = 0;
105 nf_ct_put(ecache->ct);
106 ecache->ct = NULL;
109 /* Deliver all cached events for a particular conntrack. This is called
110 * by code prior to async packet handling for freeing the skb */
111 void nf_ct_deliver_cached_events(const struct nf_conn *ct)
113 struct nf_conntrack_ecache *ecache;
115 local_bh_disable();
116 ecache = &__get_cpu_var(nf_conntrack_ecache);
117 if (ecache->ct == ct)
118 __nf_ct_deliver_cached_events(ecache);
119 local_bh_enable();
122 /* Deliver cached events for old pending events, if current conntrack != old */
123 void __nf_ct_event_cache_init(struct nf_conn *ct)
125 struct nf_conntrack_ecache *ecache;
127 /* take care of delivering potentially old events */
128 ecache = &__get_cpu_var(nf_conntrack_ecache);
129 BUG_ON(ecache->ct == ct);
130 if (ecache->ct)
131 __nf_ct_deliver_cached_events(ecache);
132 /* initialize for this conntrack/packet */
133 ecache->ct = ct;
134 nf_conntrack_get(&ct->ct_general);
137 /* flush the event cache - touches other CPU's data and must not be called
138 * while packets are still passing through the code */
139 static void nf_ct_event_cache_flush(void)
141 struct nf_conntrack_ecache *ecache;
142 int cpu;
144 for_each_cpu(cpu) {
145 ecache = &per_cpu(nf_conntrack_ecache, cpu);
146 if (ecache->ct)
147 nf_ct_put(ecache->ct);
150 #else
151 static inline void nf_ct_event_cache_flush(void) {}
152 #endif /* CONFIG_NF_CONNTRACK_EVENTS */
154 DEFINE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
155 EXPORT_PER_CPU_SYMBOL(nf_conntrack_stat);
158 * This scheme offers various size of "struct nf_conn" dependent on
159 * features(helper, nat, ...)
162 #define NF_CT_FEATURES_NAMELEN 256
163 static struct {
164 /* name of slab cache. printed in /proc/slabinfo */
165 char *name;
167 /* size of slab cache */
168 size_t size;
170 /* slab cache pointer */
171 kmem_cache_t *cachep;
173 /* allocated slab cache + modules which uses this slab cache */
174 int use;
176 /* Initialization */
177 int (*init_conntrack)(struct nf_conn *, u_int32_t);
179 } nf_ct_cache[NF_CT_F_NUM];
181 /* protect members of nf_ct_cache except of "use" */
182 DEFINE_RWLOCK(nf_ct_cache_lock);
184 /* This avoids calling kmem_cache_create() with same name simultaneously */
185 DECLARE_MUTEX(nf_ct_cache_mutex);
187 extern struct nf_conntrack_protocol nf_conntrack_generic_protocol;
188 struct nf_conntrack_protocol *
189 __nf_ct_proto_find(u_int16_t l3proto, u_int8_t protocol)
191 if (unlikely(nf_ct_protos[l3proto] == NULL))
192 return &nf_conntrack_generic_protocol;
194 return nf_ct_protos[l3proto][protocol];
197 /* this is guaranteed to always return a valid protocol helper, since
198 * it falls back to generic_protocol */
199 struct nf_conntrack_protocol *
200 nf_ct_proto_find_get(u_int16_t l3proto, u_int8_t protocol)
202 struct nf_conntrack_protocol *p;
204 preempt_disable();
205 p = __nf_ct_proto_find(l3proto, protocol);
206 if (p) {
207 if (!try_module_get(p->me))
208 p = &nf_conntrack_generic_protocol;
210 preempt_enable();
212 return p;
215 void nf_ct_proto_put(struct nf_conntrack_protocol *p)
217 module_put(p->me);
220 struct nf_conntrack_l3proto *
221 nf_ct_l3proto_find_get(u_int16_t l3proto)
223 struct nf_conntrack_l3proto *p;
225 preempt_disable();
226 p = __nf_ct_l3proto_find(l3proto);
227 if (p) {
228 if (!try_module_get(p->me))
229 p = &nf_conntrack_generic_l3proto;
231 preempt_enable();
233 return p;
236 void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p)
238 module_put(p->me);
241 static int nf_conntrack_hash_rnd_initted;
242 static unsigned int nf_conntrack_hash_rnd;
244 static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
245 unsigned int size, unsigned int rnd)
247 unsigned int a, b;
248 a = jhash((void *)tuple->src.u3.all, sizeof(tuple->src.u3.all),
249 ((tuple->src.l3num) << 16) | tuple->dst.protonum);
250 b = jhash((void *)tuple->dst.u3.all, sizeof(tuple->dst.u3.all),
251 (tuple->src.u.all << 16) | tuple->dst.u.all);
253 return jhash_2words(a, b, rnd) % size;
256 static inline u_int32_t hash_conntrack(const struct nf_conntrack_tuple *tuple)
258 return __hash_conntrack(tuple, nf_conntrack_htable_size,
259 nf_conntrack_hash_rnd);
262 /* Initialize "struct nf_conn" which has spaces for helper */
263 static int
264 init_conntrack_for_helper(struct nf_conn *conntrack, u_int32_t features)
267 conntrack->help = (union nf_conntrack_help *)
268 (((unsigned long)conntrack->data
269 + (__alignof__(union nf_conntrack_help) - 1))
270 & (~((unsigned long)(__alignof__(union nf_conntrack_help) -1))));
271 return 0;
274 int nf_conntrack_register_cache(u_int32_t features, const char *name,
275 size_t size,
276 int (*init)(struct nf_conn *, u_int32_t))
278 int ret = 0;
279 char *cache_name;
280 kmem_cache_t *cachep;
282 DEBUGP("nf_conntrack_register_cache: features=0x%x, name=%s, size=%d\n",
283 features, name, size);
285 if (features < NF_CT_F_BASIC || features >= NF_CT_F_NUM) {
286 DEBUGP("nf_conntrack_register_cache: invalid features.: 0x%x\n",
287 features);
288 return -EINVAL;
291 down(&nf_ct_cache_mutex);
293 write_lock_bh(&nf_ct_cache_lock);
294 /* e.g: multiple helpers are loaded */
295 if (nf_ct_cache[features].use > 0) {
296 DEBUGP("nf_conntrack_register_cache: already resisterd.\n");
297 if ((!strncmp(nf_ct_cache[features].name, name,
298 NF_CT_FEATURES_NAMELEN))
299 && nf_ct_cache[features].size == size
300 && nf_ct_cache[features].init_conntrack == init) {
301 DEBUGP("nf_conntrack_register_cache: reusing.\n");
302 nf_ct_cache[features].use++;
303 ret = 0;
304 } else
305 ret = -EBUSY;
307 write_unlock_bh(&nf_ct_cache_lock);
308 up(&nf_ct_cache_mutex);
309 return ret;
311 write_unlock_bh(&nf_ct_cache_lock);
314 * The memory space for name of slab cache must be alive until
315 * cache is destroyed.
317 cache_name = kmalloc(sizeof(char)*NF_CT_FEATURES_NAMELEN, GFP_ATOMIC);
318 if (cache_name == NULL) {
319 DEBUGP("nf_conntrack_register_cache: can't alloc cache_name\n");
320 ret = -ENOMEM;
321 goto out_up_mutex;
324 if (strlcpy(cache_name, name, NF_CT_FEATURES_NAMELEN)
325 >= NF_CT_FEATURES_NAMELEN) {
326 printk("nf_conntrack_register_cache: name too long\n");
327 ret = -EINVAL;
328 goto out_free_name;
331 cachep = kmem_cache_create(cache_name, size, 0, 0,
332 NULL, NULL);
333 if (!cachep) {
334 printk("nf_conntrack_register_cache: Can't create slab cache "
335 "for the features = 0x%x\n", features);
336 ret = -ENOMEM;
337 goto out_free_name;
340 write_lock_bh(&nf_ct_cache_lock);
341 nf_ct_cache[features].use = 1;
342 nf_ct_cache[features].size = size;
343 nf_ct_cache[features].init_conntrack = init;
344 nf_ct_cache[features].cachep = cachep;
345 nf_ct_cache[features].name = cache_name;
346 write_unlock_bh(&nf_ct_cache_lock);
348 goto out_up_mutex;
350 out_free_name:
351 kfree(cache_name);
352 out_up_mutex:
353 up(&nf_ct_cache_mutex);
354 return ret;
357 /* FIXME: In the current, only nf_conntrack_cleanup() can call this function. */
358 void nf_conntrack_unregister_cache(u_int32_t features)
360 kmem_cache_t *cachep;
361 char *name;
364 * This assures that kmem_cache_create() isn't called before destroying
365 * slab cache.
367 DEBUGP("nf_conntrack_unregister_cache: 0x%04x\n", features);
368 down(&nf_ct_cache_mutex);
370 write_lock_bh(&nf_ct_cache_lock);
371 if (--nf_ct_cache[features].use > 0) {
372 write_unlock_bh(&nf_ct_cache_lock);
373 up(&nf_ct_cache_mutex);
374 return;
376 cachep = nf_ct_cache[features].cachep;
377 name = nf_ct_cache[features].name;
378 nf_ct_cache[features].cachep = NULL;
379 nf_ct_cache[features].name = NULL;
380 nf_ct_cache[features].init_conntrack = NULL;
381 nf_ct_cache[features].size = 0;
382 write_unlock_bh(&nf_ct_cache_lock);
384 synchronize_net();
386 kmem_cache_destroy(cachep);
387 kfree(name);
389 up(&nf_ct_cache_mutex);
393 nf_ct_get_tuple(const struct sk_buff *skb,
394 unsigned int nhoff,
395 unsigned int dataoff,
396 u_int16_t l3num,
397 u_int8_t protonum,
398 struct nf_conntrack_tuple *tuple,
399 const struct nf_conntrack_l3proto *l3proto,
400 const struct nf_conntrack_protocol *protocol)
402 NF_CT_TUPLE_U_BLANK(tuple);
404 tuple->src.l3num = l3num;
405 if (l3proto->pkt_to_tuple(skb, nhoff, tuple) == 0)
406 return 0;
408 tuple->dst.protonum = protonum;
409 tuple->dst.dir = IP_CT_DIR_ORIGINAL;
411 return protocol->pkt_to_tuple(skb, dataoff, tuple);
415 nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
416 const struct nf_conntrack_tuple *orig,
417 const struct nf_conntrack_l3proto *l3proto,
418 const struct nf_conntrack_protocol *protocol)
420 NF_CT_TUPLE_U_BLANK(inverse);
422 inverse->src.l3num = orig->src.l3num;
423 if (l3proto->invert_tuple(inverse, orig) == 0)
424 return 0;
426 inverse->dst.dir = !orig->dst.dir;
428 inverse->dst.protonum = orig->dst.protonum;
429 return protocol->invert_tuple(inverse, orig);
432 /* nf_conntrack_expect helper functions */
433 void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
435 ASSERT_WRITE_LOCK(&nf_conntrack_lock);
436 NF_CT_ASSERT(!timer_pending(&exp->timeout));
437 list_del(&exp->list);
438 NF_CT_STAT_INC(expect_delete);
439 exp->master->expecting--;
440 nf_conntrack_expect_put(exp);
443 static void expectation_timed_out(unsigned long ul_expect)
445 struct nf_conntrack_expect *exp = (void *)ul_expect;
447 write_lock_bh(&nf_conntrack_lock);
448 nf_ct_unlink_expect(exp);
449 write_unlock_bh(&nf_conntrack_lock);
450 nf_conntrack_expect_put(exp);
453 struct nf_conntrack_expect *
454 __nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple)
456 struct nf_conntrack_expect *i;
458 list_for_each_entry(i, &nf_conntrack_expect_list, list) {
459 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
460 atomic_inc(&i->use);
461 return i;
464 return NULL;
467 /* Just find a expectation corresponding to a tuple. */
468 struct nf_conntrack_expect *
469 nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple)
471 struct nf_conntrack_expect *i;
473 read_lock_bh(&nf_conntrack_lock);
474 i = __nf_conntrack_expect_find(tuple);
475 read_unlock_bh(&nf_conntrack_lock);
477 return i;
480 /* If an expectation for this connection is found, it gets delete from
481 * global list then returned. */
482 static struct nf_conntrack_expect *
483 find_expectation(const struct nf_conntrack_tuple *tuple)
485 struct nf_conntrack_expect *i;
487 list_for_each_entry(i, &nf_conntrack_expect_list, list) {
488 /* If master is not in hash table yet (ie. packet hasn't left
489 this machine yet), how can other end know about expected?
490 Hence these are not the droids you are looking for (if
491 master ct never got confirmed, we'd hold a reference to it
492 and weird things would happen to future packets). */
493 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)
494 && nf_ct_is_confirmed(i->master)) {
495 if (i->flags & NF_CT_EXPECT_PERMANENT) {
496 atomic_inc(&i->use);
497 return i;
498 } else if (del_timer(&i->timeout)) {
499 nf_ct_unlink_expect(i);
500 return i;
504 return NULL;
507 /* delete all expectations for this conntrack */
508 void nf_ct_remove_expectations(struct nf_conn *ct)
510 struct nf_conntrack_expect *i, *tmp;
512 /* Optimization: most connection never expect any others. */
513 if (ct->expecting == 0)
514 return;
516 list_for_each_entry_safe(i, tmp, &nf_conntrack_expect_list, list) {
517 if (i->master == ct && del_timer(&i->timeout)) {
518 nf_ct_unlink_expect(i);
519 nf_conntrack_expect_put(i);
524 static void
525 clean_from_lists(struct nf_conn *ct)
527 unsigned int ho, hr;
529 DEBUGP("clean_from_lists(%p)\n", ct);
530 ASSERT_WRITE_LOCK(&nf_conntrack_lock);
532 ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
533 hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
534 LIST_DELETE(&nf_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
535 LIST_DELETE(&nf_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
537 /* Destroy all pending expectations */
538 nf_ct_remove_expectations(ct);
541 static void
542 destroy_conntrack(struct nf_conntrack *nfct)
544 struct nf_conn *ct = (struct nf_conn *)nfct;
545 struct nf_conntrack_l3proto *l3proto;
546 struct nf_conntrack_protocol *proto;
548 DEBUGP("destroy_conntrack(%p)\n", ct);
549 NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
550 NF_CT_ASSERT(!timer_pending(&ct->timeout));
552 nf_conntrack_event(IPCT_DESTROY, ct);
553 set_bit(IPS_DYING_BIT, &ct->status);
555 /* To make sure we don't get any weird locking issues here:
556 * destroy_conntrack() MUST NOT be called with a write lock
557 * to nf_conntrack_lock!!! -HW */
558 l3proto = __nf_ct_l3proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num);
559 if (l3proto && l3proto->destroy)
560 l3proto->destroy(ct);
562 proto = __nf_ct_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num, ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
563 if (proto && proto->destroy)
564 proto->destroy(ct);
566 if (nf_conntrack_destroyed)
567 nf_conntrack_destroyed(ct);
569 write_lock_bh(&nf_conntrack_lock);
570 /* Expectations will have been removed in clean_from_lists,
571 * except TFTP can create an expectation on the first packet,
572 * before connection is in the list, so we need to clean here,
573 * too. */
574 nf_ct_remove_expectations(ct);
576 /* We overload first tuple to link into unconfirmed list. */
577 if (!nf_ct_is_confirmed(ct)) {
578 BUG_ON(list_empty(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list));
579 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
582 NF_CT_STAT_INC(delete);
583 write_unlock_bh(&nf_conntrack_lock);
585 if (ct->master)
586 nf_ct_put(ct->master);
588 DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
589 nf_conntrack_free(ct);
592 static void death_by_timeout(unsigned long ul_conntrack)
594 struct nf_conn *ct = (void *)ul_conntrack;
596 write_lock_bh(&nf_conntrack_lock);
597 /* Inside lock so preempt is disabled on module removal path.
598 * Otherwise we can get spurious warnings. */
599 NF_CT_STAT_INC(delete_list);
600 clean_from_lists(ct);
601 write_unlock_bh(&nf_conntrack_lock);
602 nf_ct_put(ct);
605 static inline int
606 conntrack_tuple_cmp(const struct nf_conntrack_tuple_hash *i,
607 const struct nf_conntrack_tuple *tuple,
608 const struct nf_conn *ignored_conntrack)
610 ASSERT_READ_LOCK(&nf_conntrack_lock);
611 return nf_ct_tuplehash_to_ctrack(i) != ignored_conntrack
612 && nf_ct_tuple_equal(tuple, &i->tuple);
615 struct nf_conntrack_tuple_hash *
616 __nf_conntrack_find(const struct nf_conntrack_tuple *tuple,
617 const struct nf_conn *ignored_conntrack)
619 struct nf_conntrack_tuple_hash *h;
620 unsigned int hash = hash_conntrack(tuple);
622 ASSERT_READ_LOCK(&nf_conntrack_lock);
623 list_for_each_entry(h, &nf_conntrack_hash[hash], list) {
624 if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) {
625 NF_CT_STAT_INC(found);
626 return h;
628 NF_CT_STAT_INC(searched);
631 return NULL;
634 /* Find a connection corresponding to a tuple. */
635 struct nf_conntrack_tuple_hash *
636 nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple,
637 const struct nf_conn *ignored_conntrack)
639 struct nf_conntrack_tuple_hash *h;
641 read_lock_bh(&nf_conntrack_lock);
642 h = __nf_conntrack_find(tuple, ignored_conntrack);
643 if (h)
644 atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use);
645 read_unlock_bh(&nf_conntrack_lock);
647 return h;
650 static void __nf_conntrack_hash_insert(struct nf_conn *ct,
651 unsigned int hash,
652 unsigned int repl_hash)
654 ct->id = ++nf_conntrack_next_id;
655 list_prepend(&nf_conntrack_hash[hash],
656 &ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
657 list_prepend(&nf_conntrack_hash[repl_hash],
658 &ct->tuplehash[IP_CT_DIR_REPLY].list);
661 void nf_conntrack_hash_insert(struct nf_conn *ct)
663 unsigned int hash, repl_hash;
665 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
666 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
668 write_lock_bh(&nf_conntrack_lock);
669 __nf_conntrack_hash_insert(ct, hash, repl_hash);
670 write_unlock_bh(&nf_conntrack_lock);
673 /* Confirm a connection given skb; places it in hash table */
675 __nf_conntrack_confirm(struct sk_buff **pskb)
677 unsigned int hash, repl_hash;
678 struct nf_conn *ct;
679 enum ip_conntrack_info ctinfo;
681 ct = nf_ct_get(*pskb, &ctinfo);
683 /* ipt_REJECT uses nf_conntrack_attach to attach related
684 ICMP/TCP RST packets in other direction. Actual packet
685 which created connection will be IP_CT_NEW or for an
686 expected connection, IP_CT_RELATED. */
687 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
688 return NF_ACCEPT;
690 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
691 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
693 /* We're not in hash table, and we refuse to set up related
694 connections for unconfirmed conns. But packet copies and
695 REJECT will give spurious warnings here. */
696 /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
698 /* No external references means noone else could have
699 confirmed us. */
700 NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
701 DEBUGP("Confirming conntrack %p\n", ct);
703 write_lock_bh(&nf_conntrack_lock);
705 /* See if there's one in the list already, including reverse:
706 NAT could have grabbed it without realizing, since we're
707 not in the hash. If there is, we lost race. */
708 if (!LIST_FIND(&nf_conntrack_hash[hash],
709 conntrack_tuple_cmp,
710 struct nf_conntrack_tuple_hash *,
711 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
712 && !LIST_FIND(&nf_conntrack_hash[repl_hash],
713 conntrack_tuple_cmp,
714 struct nf_conntrack_tuple_hash *,
715 &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
716 /* Remove from unconfirmed list */
717 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
719 __nf_conntrack_hash_insert(ct, hash, repl_hash);
720 /* Timer relative to confirmation time, not original
721 setting time, otherwise we'd get timer wrap in
722 weird delay cases. */
723 ct->timeout.expires += jiffies;
724 add_timer(&ct->timeout);
725 atomic_inc(&ct->ct_general.use);
726 set_bit(IPS_CONFIRMED_BIT, &ct->status);
727 NF_CT_STAT_INC(insert);
728 write_unlock_bh(&nf_conntrack_lock);
729 if (ct->helper)
730 nf_conntrack_event_cache(IPCT_HELPER, *pskb);
731 #ifdef CONFIG_NF_NAT_NEEDED
732 if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
733 test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
734 nf_conntrack_event_cache(IPCT_NATINFO, *pskb);
735 #endif
736 nf_conntrack_event_cache(master_ct(ct) ?
737 IPCT_RELATED : IPCT_NEW, *pskb);
738 return NF_ACCEPT;
741 NF_CT_STAT_INC(insert_failed);
742 write_unlock_bh(&nf_conntrack_lock);
743 return NF_DROP;
746 /* Returns true if a connection correspondings to the tuple (required
747 for NAT). */
749 nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
750 const struct nf_conn *ignored_conntrack)
752 struct nf_conntrack_tuple_hash *h;
754 read_lock_bh(&nf_conntrack_lock);
755 h = __nf_conntrack_find(tuple, ignored_conntrack);
756 read_unlock_bh(&nf_conntrack_lock);
758 return h != NULL;
761 /* There's a small race here where we may free a just-assured
762 connection. Too bad: we're in trouble anyway. */
763 static inline int unreplied(const struct nf_conntrack_tuple_hash *i)
765 return !(test_bit(IPS_ASSURED_BIT,
766 &nf_ct_tuplehash_to_ctrack(i)->status));
769 static int early_drop(struct list_head *chain)
771 /* Traverse backwards: gives us oldest, which is roughly LRU */
772 struct nf_conntrack_tuple_hash *h;
773 struct nf_conn *ct = NULL;
774 int dropped = 0;
776 read_lock_bh(&nf_conntrack_lock);
777 h = LIST_FIND_B(chain, unreplied, struct nf_conntrack_tuple_hash *);
778 if (h) {
779 ct = nf_ct_tuplehash_to_ctrack(h);
780 atomic_inc(&ct->ct_general.use);
782 read_unlock_bh(&nf_conntrack_lock);
784 if (!ct)
785 return dropped;
787 if (del_timer(&ct->timeout)) {
788 death_by_timeout((unsigned long)ct);
789 dropped = 1;
790 NF_CT_STAT_INC(early_drop);
792 nf_ct_put(ct);
793 return dropped;
796 static inline int helper_cmp(const struct nf_conntrack_helper *i,
797 const struct nf_conntrack_tuple *rtuple)
799 return nf_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask);
802 static struct nf_conntrack_helper *
803 __nf_ct_helper_find(const struct nf_conntrack_tuple *tuple)
805 return LIST_FIND(&helpers, helper_cmp,
806 struct nf_conntrack_helper *,
807 tuple);
810 struct nf_conntrack_helper *
811 nf_ct_helper_find_get( const struct nf_conntrack_tuple *tuple)
813 struct nf_conntrack_helper *helper;
815 /* need nf_conntrack_lock to assure that helper exists until
816 * try_module_get() is called */
817 read_lock_bh(&nf_conntrack_lock);
819 helper = __nf_ct_helper_find(tuple);
820 if (helper) {
821 /* need to increase module usage count to assure helper will
822 * not go away while the caller is e.g. busy putting a
823 * conntrack in the hash that uses the helper */
824 if (!try_module_get(helper->me))
825 helper = NULL;
828 read_unlock_bh(&nf_conntrack_lock);
830 return helper;
833 void nf_ct_helper_put(struct nf_conntrack_helper *helper)
835 module_put(helper->me);
838 static struct nf_conn *
839 __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
840 const struct nf_conntrack_tuple *repl,
841 const struct nf_conntrack_l3proto *l3proto)
843 struct nf_conn *conntrack = NULL;
844 u_int32_t features = 0;
846 if (!nf_conntrack_hash_rnd_initted) {
847 get_random_bytes(&nf_conntrack_hash_rnd, 4);
848 nf_conntrack_hash_rnd_initted = 1;
851 if (nf_conntrack_max
852 && atomic_read(&nf_conntrack_count) >= nf_conntrack_max) {
853 unsigned int hash = hash_conntrack(orig);
854 /* Try dropping from this hash chain. */
855 if (!early_drop(&nf_conntrack_hash[hash])) {
856 if (net_ratelimit())
857 printk(KERN_WARNING
858 "nf_conntrack: table full, dropping"
859 " packet.\n");
860 return ERR_PTR(-ENOMEM);
864 /* find features needed by this conntrack. */
865 features = l3proto->get_features(orig);
866 read_lock_bh(&nf_conntrack_lock);
867 if (__nf_ct_helper_find(repl) != NULL)
868 features |= NF_CT_F_HELP;
869 read_unlock_bh(&nf_conntrack_lock);
871 DEBUGP("nf_conntrack_alloc: features=0x%x\n", features);
873 read_lock_bh(&nf_ct_cache_lock);
875 if (!nf_ct_cache[features].use) {
876 DEBUGP("nf_conntrack_alloc: not supported features = 0x%x\n",
877 features);
878 goto out;
881 conntrack = kmem_cache_alloc(nf_ct_cache[features].cachep, GFP_ATOMIC);
882 if (conntrack == NULL) {
883 DEBUGP("nf_conntrack_alloc: Can't alloc conntrack from cache\n");
884 goto out;
887 memset(conntrack, 0, nf_ct_cache[features].size);
888 conntrack->features = features;
889 if (nf_ct_cache[features].init_conntrack &&
890 nf_ct_cache[features].init_conntrack(conntrack, features) < 0) {
891 DEBUGP("nf_conntrack_alloc: failed to init\n");
892 kmem_cache_free(nf_ct_cache[features].cachep, conntrack);
893 conntrack = NULL;
894 goto out;
897 atomic_set(&conntrack->ct_general.use, 1);
898 conntrack->ct_general.destroy = destroy_conntrack;
899 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
900 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
901 /* Don't set timer yet: wait for confirmation */
902 init_timer(&conntrack->timeout);
903 conntrack->timeout.data = (unsigned long)conntrack;
904 conntrack->timeout.function = death_by_timeout;
906 atomic_inc(&nf_conntrack_count);
907 out:
908 read_unlock_bh(&nf_ct_cache_lock);
909 return conntrack;
912 struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
913 const struct nf_conntrack_tuple *repl)
915 struct nf_conntrack_l3proto *l3proto;
917 l3proto = __nf_ct_l3proto_find(orig->src.l3num);
918 return __nf_conntrack_alloc(orig, repl, l3proto);
921 void nf_conntrack_free(struct nf_conn *conntrack)
923 u_int32_t features = conntrack->features;
924 NF_CT_ASSERT(features >= NF_CT_F_BASIC && features < NF_CT_F_NUM);
925 DEBUGP("nf_conntrack_free: features = 0x%x, conntrack=%p\n", features,
926 conntrack);
927 kmem_cache_free(nf_ct_cache[features].cachep, conntrack);
928 atomic_dec(&nf_conntrack_count);
931 /* Allocate a new conntrack: we return -ENOMEM if classification
932 failed due to stress. Otherwise it really is unclassifiable. */
933 static struct nf_conntrack_tuple_hash *
934 init_conntrack(const struct nf_conntrack_tuple *tuple,
935 struct nf_conntrack_l3proto *l3proto,
936 struct nf_conntrack_protocol *protocol,
937 struct sk_buff *skb,
938 unsigned int dataoff)
940 struct nf_conn *conntrack;
941 struct nf_conntrack_tuple repl_tuple;
942 struct nf_conntrack_expect *exp;
944 if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, protocol)) {
945 DEBUGP("Can't invert tuple.\n");
946 return NULL;
949 conntrack = __nf_conntrack_alloc(tuple, &repl_tuple, l3proto);
950 if (conntrack == NULL || IS_ERR(conntrack)) {
951 DEBUGP("Can't allocate conntrack.\n");
952 return (struct nf_conntrack_tuple_hash *)conntrack;
955 if (!protocol->new(conntrack, skb, dataoff)) {
956 nf_conntrack_free(conntrack);
957 DEBUGP("init conntrack: can't track with proto module\n");
958 return NULL;
961 write_lock_bh(&nf_conntrack_lock);
962 exp = find_expectation(tuple);
964 if (exp) {
965 DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
966 conntrack, exp);
967 /* Welcome, Mr. Bond. We've been expecting you... */
968 __set_bit(IPS_EXPECTED_BIT, &conntrack->status);
969 conntrack->master = exp->master;
970 #ifdef CONFIG_NF_CONNTRACK_MARK
971 conntrack->mark = exp->master->mark;
972 #endif
973 nf_conntrack_get(&conntrack->master->ct_general);
974 NF_CT_STAT_INC(expect_new);
975 } else {
976 conntrack->helper = __nf_ct_helper_find(&repl_tuple);
978 NF_CT_STAT_INC(new);
981 /* Overload tuple linked list to put us in unconfirmed list. */
982 list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
984 write_unlock_bh(&nf_conntrack_lock);
986 if (exp) {
987 if (exp->expectfn)
988 exp->expectfn(conntrack, exp);
989 nf_conntrack_expect_put(exp);
992 return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
995 /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
996 static inline struct nf_conn *
997 resolve_normal_ct(struct sk_buff *skb,
998 unsigned int dataoff,
999 u_int16_t l3num,
1000 u_int8_t protonum,
1001 struct nf_conntrack_l3proto *l3proto,
1002 struct nf_conntrack_protocol *proto,
1003 int *set_reply,
1004 enum ip_conntrack_info *ctinfo)
1006 struct nf_conntrack_tuple tuple;
1007 struct nf_conntrack_tuple_hash *h;
1008 struct nf_conn *ct;
1010 if (!nf_ct_get_tuple(skb, (unsigned int)(skb->nh.raw - skb->data),
1011 dataoff, l3num, protonum, &tuple, l3proto,
1012 proto)) {
1013 DEBUGP("resolve_normal_ct: Can't get tuple\n");
1014 return NULL;
1017 /* look for tuple match */
1018 h = nf_conntrack_find_get(&tuple, NULL);
1019 if (!h) {
1020 h = init_conntrack(&tuple, l3proto, proto, skb, dataoff);
1021 if (!h)
1022 return NULL;
1023 if (IS_ERR(h))
1024 return (void *)h;
1026 ct = nf_ct_tuplehash_to_ctrack(h);
1028 /* It exists; we have (non-exclusive) reference. */
1029 if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) {
1030 *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
1031 /* Please set reply bit if this packet OK */
1032 *set_reply = 1;
1033 } else {
1034 /* Once we've had two way comms, always ESTABLISHED. */
1035 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
1036 DEBUGP("nf_conntrack_in: normal packet for %p\n", ct);
1037 *ctinfo = IP_CT_ESTABLISHED;
1038 } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
1039 DEBUGP("nf_conntrack_in: related packet for %p\n", ct);
1040 *ctinfo = IP_CT_RELATED;
1041 } else {
1042 DEBUGP("nf_conntrack_in: new packet for %p\n", ct);
1043 *ctinfo = IP_CT_NEW;
1045 *set_reply = 0;
1047 skb->nfct = &ct->ct_general;
1048 skb->nfctinfo = *ctinfo;
1049 return ct;
1052 unsigned int
1053 nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb)
1055 struct nf_conn *ct;
1056 enum ip_conntrack_info ctinfo;
1057 struct nf_conntrack_l3proto *l3proto;
1058 struct nf_conntrack_protocol *proto;
1059 unsigned int dataoff;
1060 u_int8_t protonum;
1061 int set_reply = 0;
1062 int ret;
1064 /* Previously seen (loopback or untracked)? Ignore. */
1065 if ((*pskb)->nfct) {
1066 NF_CT_STAT_INC(ignore);
1067 return NF_ACCEPT;
1070 l3proto = __nf_ct_l3proto_find((u_int16_t)pf);
1071 if ((ret = l3proto->prepare(pskb, hooknum, &dataoff, &protonum)) <= 0) {
1072 DEBUGP("not prepared to track yet or error occured\n");
1073 return -ret;
1076 proto = __nf_ct_proto_find((u_int16_t)pf, protonum);
1078 /* It may be an special packet, error, unclean...
1079 * inverse of the return code tells to the netfilter
1080 * core what to do with the packet. */
1081 if (proto->error != NULL &&
1082 (ret = proto->error(*pskb, dataoff, &ctinfo, pf, hooknum)) <= 0) {
1083 NF_CT_STAT_INC(error);
1084 NF_CT_STAT_INC(invalid);
1085 return -ret;
1088 ct = resolve_normal_ct(*pskb, dataoff, pf, protonum, l3proto, proto,
1089 &set_reply, &ctinfo);
1090 if (!ct) {
1091 /* Not valid part of a connection */
1092 NF_CT_STAT_INC(invalid);
1093 return NF_ACCEPT;
1096 if (IS_ERR(ct)) {
1097 /* Too stressed to deal. */
1098 NF_CT_STAT_INC(drop);
1099 return NF_DROP;
1102 NF_CT_ASSERT((*pskb)->nfct);
1104 ret = proto->packet(ct, *pskb, dataoff, ctinfo, pf, hooknum);
1105 if (ret < 0) {
1106 /* Invalid: inverse of the return code tells
1107 * the netfilter core what to do */
1108 DEBUGP("nf_conntrack_in: Can't track with proto module\n");
1109 nf_conntrack_put((*pskb)->nfct);
1110 (*pskb)->nfct = NULL;
1111 NF_CT_STAT_INC(invalid);
1112 return -ret;
1115 if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
1116 nf_conntrack_event_cache(IPCT_STATUS, *pskb);
1118 return ret;
1121 int nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
1122 const struct nf_conntrack_tuple *orig)
1124 return nf_ct_invert_tuple(inverse, orig,
1125 __nf_ct_l3proto_find(orig->src.l3num),
1126 __nf_ct_proto_find(orig->src.l3num,
1127 orig->dst.protonum));
1130 /* Would two expected things clash? */
1131 static inline int expect_clash(const struct nf_conntrack_expect *a,
1132 const struct nf_conntrack_expect *b)
1134 /* Part covered by intersection of masks must be unequal,
1135 otherwise they clash */
1136 struct nf_conntrack_tuple intersect_mask;
1137 int count;
1139 intersect_mask.src.l3num = a->mask.src.l3num & b->mask.src.l3num;
1140 intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
1141 intersect_mask.dst.u.all = a->mask.dst.u.all & b->mask.dst.u.all;
1142 intersect_mask.dst.protonum = a->mask.dst.protonum
1143 & b->mask.dst.protonum;
1145 for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
1146 intersect_mask.src.u3.all[count] =
1147 a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
1150 for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
1151 intersect_mask.dst.u3.all[count] =
1152 a->mask.dst.u3.all[count] & b->mask.dst.u3.all[count];
1155 return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
1158 static inline int expect_matches(const struct nf_conntrack_expect *a,
1159 const struct nf_conntrack_expect *b)
1161 return a->master == b->master
1162 && nf_ct_tuple_equal(&a->tuple, &b->tuple)
1163 && nf_ct_tuple_equal(&a->mask, &b->mask);
1166 /* Generally a bad idea to call this: could have matched already. */
1167 void nf_conntrack_unexpect_related(struct nf_conntrack_expect *exp)
1169 struct nf_conntrack_expect *i;
1171 write_lock_bh(&nf_conntrack_lock);
1172 /* choose the the oldest expectation to evict */
1173 list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) {
1174 if (expect_matches(i, exp) && del_timer(&i->timeout)) {
1175 nf_ct_unlink_expect(i);
1176 write_unlock_bh(&nf_conntrack_lock);
1177 nf_conntrack_expect_put(i);
1178 return;
1181 write_unlock_bh(&nf_conntrack_lock);
1184 /* We don't increase the master conntrack refcount for non-fulfilled
1185 * conntracks. During the conntrack destruction, the expectations are
1186 * always killed before the conntrack itself */
1187 struct nf_conntrack_expect *nf_conntrack_expect_alloc(struct nf_conn *me)
1189 struct nf_conntrack_expect *new;
1191 new = kmem_cache_alloc(nf_conntrack_expect_cachep, GFP_ATOMIC);
1192 if (!new) {
1193 DEBUGP("expect_related: OOM allocating expect\n");
1194 return NULL;
1196 new->master = me;
1197 atomic_set(&new->use, 1);
1198 return new;
1201 void nf_conntrack_expect_put(struct nf_conntrack_expect *exp)
1203 if (atomic_dec_and_test(&exp->use))
1204 kmem_cache_free(nf_conntrack_expect_cachep, exp);
1207 static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp)
1209 atomic_inc(&exp->use);
1210 exp->master->expecting++;
1211 list_add(&exp->list, &nf_conntrack_expect_list);
1213 init_timer(&exp->timeout);
1214 exp->timeout.data = (unsigned long)exp;
1215 exp->timeout.function = expectation_timed_out;
1216 exp->timeout.expires = jiffies + exp->master->helper->timeout * HZ;
1217 add_timer(&exp->timeout);
1219 exp->id = ++nf_conntrack_expect_next_id;
1220 atomic_inc(&exp->use);
1221 NF_CT_STAT_INC(expect_create);
1224 /* Race with expectations being used means we could have none to find; OK. */
1225 static void evict_oldest_expect(struct nf_conn *master)
1227 struct nf_conntrack_expect *i;
1229 list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) {
1230 if (i->master == master) {
1231 if (del_timer(&i->timeout)) {
1232 nf_ct_unlink_expect(i);
1233 nf_conntrack_expect_put(i);
1235 break;
1240 static inline int refresh_timer(struct nf_conntrack_expect *i)
1242 if (!del_timer(&i->timeout))
1243 return 0;
1245 i->timeout.expires = jiffies + i->master->helper->timeout*HZ;
1246 add_timer(&i->timeout);
1247 return 1;
1250 int nf_conntrack_expect_related(struct nf_conntrack_expect *expect)
1252 struct nf_conntrack_expect *i;
1253 struct nf_conn *master = expect->master;
1254 int ret;
1256 DEBUGP("nf_conntrack_expect_related %p\n", related_to);
1257 DEBUGP("tuple: "); NF_CT_DUMP_TUPLE(&expect->tuple);
1258 DEBUGP("mask: "); NF_CT_DUMP_TUPLE(&expect->mask);
1260 write_lock_bh(&nf_conntrack_lock);
1261 list_for_each_entry(i, &nf_conntrack_expect_list, list) {
1262 if (expect_matches(i, expect)) {
1263 /* Refresh timer: if it's dying, ignore.. */
1264 if (refresh_timer(i)) {
1265 ret = 0;
1266 goto out;
1268 } else if (expect_clash(i, expect)) {
1269 ret = -EBUSY;
1270 goto out;
1273 /* Will be over limit? */
1274 if (master->helper->max_expected &&
1275 master->expecting >= master->helper->max_expected)
1276 evict_oldest_expect(master);
1278 nf_conntrack_expect_insert(expect);
1279 nf_conntrack_expect_event(IPEXP_NEW, expect);
1280 ret = 0;
1281 out:
1282 write_unlock_bh(&nf_conntrack_lock);
1283 return ret;
1286 /* Alter reply tuple (maybe alter helper). This is for NAT, and is
1287 implicitly racy: see __nf_conntrack_confirm */
1288 void nf_conntrack_alter_reply(struct nf_conn *conntrack,
1289 const struct nf_conntrack_tuple *newreply)
1291 write_lock_bh(&nf_conntrack_lock);
1292 /* Should be unconfirmed, so not in hash table yet */
1293 NF_CT_ASSERT(!nf_ct_is_confirmed(conntrack));
1295 DEBUGP("Altering reply tuple of %p to ", conntrack);
1296 NF_CT_DUMP_TUPLE(newreply);
1298 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
1299 if (!conntrack->master && conntrack->expecting == 0)
1300 conntrack->helper = __nf_ct_helper_find(newreply);
1301 write_unlock_bh(&nf_conntrack_lock);
1304 int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
1306 int ret;
1307 BUG_ON(me->timeout == 0);
1309 ret = nf_conntrack_register_cache(NF_CT_F_HELP, "nf_conntrack:help",
1310 sizeof(struct nf_conn)
1311 + sizeof(union nf_conntrack_help)
1312 + __alignof__(union nf_conntrack_help),
1313 init_conntrack_for_helper);
1314 if (ret < 0) {
1315 printk(KERN_ERR "nf_conntrack_helper_reigster: Unable to create slab cache for conntracks\n");
1316 return ret;
1318 write_lock_bh(&nf_conntrack_lock);
1319 list_prepend(&helpers, me);
1320 write_unlock_bh(&nf_conntrack_lock);
1322 return 0;
1325 struct nf_conntrack_helper *
1326 __nf_conntrack_helper_find_byname(const char *name)
1328 struct nf_conntrack_helper *h;
1330 list_for_each_entry(h, &helpers, list) {
1331 if (!strcmp(h->name, name))
1332 return h;
1335 return NULL;
1338 static inline int unhelp(struct nf_conntrack_tuple_hash *i,
1339 const struct nf_conntrack_helper *me)
1341 if (nf_ct_tuplehash_to_ctrack(i)->helper == me) {
1342 nf_conntrack_event(IPCT_HELPER, nf_ct_tuplehash_to_ctrack(i));
1343 nf_ct_tuplehash_to_ctrack(i)->helper = NULL;
1345 return 0;
1348 void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
1350 unsigned int i;
1351 struct nf_conntrack_expect *exp, *tmp;
1353 /* Need write lock here, to delete helper. */
1354 write_lock_bh(&nf_conntrack_lock);
1355 LIST_DELETE(&helpers, me);
1357 /* Get rid of expectations */
1358 list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list, list) {
1359 if (exp->master->helper == me && del_timer(&exp->timeout)) {
1360 nf_ct_unlink_expect(exp);
1361 nf_conntrack_expect_put(exp);
1365 /* Get rid of expecteds, set helpers to NULL. */
1366 LIST_FIND_W(&unconfirmed, unhelp, struct nf_conntrack_tuple_hash*, me);
1367 for (i = 0; i < nf_conntrack_htable_size; i++)
1368 LIST_FIND_W(&nf_conntrack_hash[i], unhelp,
1369 struct nf_conntrack_tuple_hash *, me);
1370 write_unlock_bh(&nf_conntrack_lock);
1372 /* Someone could be still looking at the helper in a bh. */
1373 synchronize_net();
1376 /* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
1377 void __nf_ct_refresh_acct(struct nf_conn *ct,
1378 enum ip_conntrack_info ctinfo,
1379 const struct sk_buff *skb,
1380 unsigned long extra_jiffies,
1381 int do_acct)
1383 int event = 0;
1385 NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
1386 NF_CT_ASSERT(skb);
1388 write_lock_bh(&nf_conntrack_lock);
1390 /* If not in hash table, timer will not be active yet */
1391 if (!nf_ct_is_confirmed(ct)) {
1392 ct->timeout.expires = extra_jiffies;
1393 event = IPCT_REFRESH;
1394 } else {
1395 /* Need del_timer for race avoidance (may already be dying). */
1396 if (del_timer(&ct->timeout)) {
1397 ct->timeout.expires = jiffies + extra_jiffies;
1398 add_timer(&ct->timeout);
1399 event = IPCT_REFRESH;
1403 #ifdef CONFIG_NF_CT_ACCT
1404 if (do_acct) {
1405 ct->counters[CTINFO2DIR(ctinfo)].packets++;
1406 ct->counters[CTINFO2DIR(ctinfo)].bytes +=
1407 skb->len - (unsigned int)(skb->nh.raw - skb->data);
1408 if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000)
1409 || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000))
1410 event |= IPCT_COUNTER_FILLING;
1412 #endif
1414 write_unlock_bh(&nf_conntrack_lock);
1416 /* must be unlocked when calling event cache */
1417 if (event)
1418 nf_conntrack_event_cache(event, skb);
1421 #if defined(CONFIG_NF_CT_NETLINK) || \
1422 defined(CONFIG_NF_CT_NETLINK_MODULE)
1424 #include <linux/netfilter/nfnetlink.h>
1425 #include <linux/netfilter/nfnetlink_conntrack.h>
1427 /* Generic function for tcp/udp/sctp/dccp and alike. This needs to be
1428 * in ip_conntrack_core, since we don't want the protocols to autoload
1429 * or depend on ctnetlink */
1430 int nf_ct_port_tuple_to_nfattr(struct sk_buff *skb,
1431 const struct nf_conntrack_tuple *tuple)
1433 NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(u_int16_t),
1434 &tuple->src.u.tcp.port);
1435 NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(u_int16_t),
1436 &tuple->dst.u.tcp.port);
1437 return 0;
1439 nfattr_failure:
1440 return -1;
1443 static const size_t cta_min_proto[CTA_PROTO_MAX] = {
1444 [CTA_PROTO_SRC_PORT-1] = sizeof(u_int16_t),
1445 [CTA_PROTO_DST_PORT-1] = sizeof(u_int16_t)
1448 int nf_ct_port_nfattr_to_tuple(struct nfattr *tb[],
1449 struct nf_conntrack_tuple *t)
1451 if (!tb[CTA_PROTO_SRC_PORT-1] || !tb[CTA_PROTO_DST_PORT-1])
1452 return -EINVAL;
1454 if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto))
1455 return -EINVAL;
1457 t->src.u.tcp.port =
1458 *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]);
1459 t->dst.u.tcp.port =
1460 *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]);
1462 return 0;
1464 #endif
1466 /* Used by ipt_REJECT and ip6t_REJECT. */
1467 void __nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
1469 struct nf_conn *ct;
1470 enum ip_conntrack_info ctinfo;
1472 /* This ICMP is in reverse direction to the packet which caused it */
1473 ct = nf_ct_get(skb, &ctinfo);
1474 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
1475 ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY;
1476 else
1477 ctinfo = IP_CT_RELATED;
1479 /* Attach to new skbuff, and increment count */
1480 nskb->nfct = &ct->ct_general;
1481 nskb->nfctinfo = ctinfo;
1482 nf_conntrack_get(nskb->nfct);
1485 static inline int
1486 do_iter(const struct nf_conntrack_tuple_hash *i,
1487 int (*iter)(struct nf_conn *i, void *data),
1488 void *data)
1490 return iter(nf_ct_tuplehash_to_ctrack(i), data);
1493 /* Bring out ya dead! */
1494 static struct nf_conntrack_tuple_hash *
1495 get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
1496 void *data, unsigned int *bucket)
1498 struct nf_conntrack_tuple_hash *h = NULL;
1500 write_lock_bh(&nf_conntrack_lock);
1501 for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
1502 h = LIST_FIND_W(&nf_conntrack_hash[*bucket], do_iter,
1503 struct nf_conntrack_tuple_hash *, iter, data);
1504 if (h)
1505 break;
1507 if (!h)
1508 h = LIST_FIND_W(&unconfirmed, do_iter,
1509 struct nf_conntrack_tuple_hash *, iter, data);
1510 if (h)
1511 atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use);
1512 write_unlock_bh(&nf_conntrack_lock);
1514 return h;
1517 void
1518 nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data)
1520 struct nf_conntrack_tuple_hash *h;
1521 unsigned int bucket = 0;
1523 while ((h = get_next_corpse(iter, data, &bucket)) != NULL) {
1524 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
1525 /* Time to push up daises... */
1526 if (del_timer(&ct->timeout))
1527 death_by_timeout((unsigned long)ct);
1528 /* ... else the timer will get him soon. */
1530 nf_ct_put(ct);
1534 static int kill_all(struct nf_conn *i, void *data)
1536 return 1;
1539 static void free_conntrack_hash(struct list_head *hash, int vmalloced, int size)
1541 if (vmalloced)
1542 vfree(hash);
1543 else
1544 free_pages((unsigned long)hash,
1545 get_order(sizeof(struct list_head) * size));
1548 void nf_conntrack_flush()
1550 nf_ct_iterate_cleanup(kill_all, NULL);
1553 /* Mishearing the voices in his head, our hero wonders how he's
1554 supposed to kill the mall. */
1555 void nf_conntrack_cleanup(void)
1557 int i;
1559 /* This makes sure all current packets have passed through
1560 netfilter framework. Roll on, two-stage module
1561 delete... */
1562 synchronize_net();
1564 nf_ct_event_cache_flush();
1565 i_see_dead_people:
1566 nf_conntrack_flush();
1567 if (atomic_read(&nf_conntrack_count) != 0) {
1568 schedule();
1569 goto i_see_dead_people;
1571 /* wait until all references to nf_conntrack_untracked are dropped */
1572 while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1)
1573 schedule();
1575 for (i = 0; i < NF_CT_F_NUM; i++) {
1576 if (nf_ct_cache[i].use == 0)
1577 continue;
1579 NF_CT_ASSERT(nf_ct_cache[i].use == 1);
1580 nf_ct_cache[i].use = 1;
1581 nf_conntrack_unregister_cache(i);
1583 kmem_cache_destroy(nf_conntrack_expect_cachep);
1584 free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc,
1585 nf_conntrack_htable_size);
1587 /* free l3proto protocol tables */
1588 for (i = 0; i < PF_MAX; i++)
1589 if (nf_ct_protos[i]) {
1590 kfree(nf_ct_protos[i]);
1591 nf_ct_protos[i] = NULL;
1595 static struct list_head *alloc_hashtable(int size, int *vmalloced)
1597 struct list_head *hash;
1598 unsigned int i;
1600 *vmalloced = 0;
1601 hash = (void*)__get_free_pages(GFP_KERNEL,
1602 get_order(sizeof(struct list_head)
1603 * size));
1604 if (!hash) {
1605 *vmalloced = 1;
1606 printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n");
1607 hash = vmalloc(sizeof(struct list_head) * size);
1610 if (hash)
1611 for (i = 0; i < size; i++)
1612 INIT_LIST_HEAD(&hash[i]);
1614 return hash;
1617 int set_hashsize(const char *val, struct kernel_param *kp)
1619 int i, bucket, hashsize, vmalloced;
1620 int old_vmalloced, old_size;
1621 int rnd;
1622 struct list_head *hash, *old_hash;
1623 struct nf_conntrack_tuple_hash *h;
1625 /* On boot, we can set this without any fancy locking. */
1626 if (!nf_conntrack_htable_size)
1627 return param_set_uint(val, kp);
1629 hashsize = simple_strtol(val, NULL, 0);
1630 if (!hashsize)
1631 return -EINVAL;
1633 hash = alloc_hashtable(hashsize, &vmalloced);
1634 if (!hash)
1635 return -ENOMEM;
1637 /* We have to rehahs for the new table anyway, so we also can
1638 * use a newrandom seed */
1639 get_random_bytes(&rnd, 4);
1641 write_lock_bh(&nf_conntrack_lock);
1642 for (i = 0; i < nf_conntrack_htable_size; i++) {
1643 while (!list_empty(&nf_conntrack_hash[i])) {
1644 h = list_entry(nf_conntrack_hash[i].next,
1645 struct nf_conntrack_tuple_hash, list);
1646 list_del(&h->list);
1647 bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
1648 list_add_tail(&h->list, &hash[bucket]);
1651 old_size = nf_conntrack_htable_size;
1652 old_vmalloced = nf_conntrack_vmalloc;
1653 old_hash = nf_conntrack_hash;
1655 nf_conntrack_htable_size = hashsize;
1656 nf_conntrack_vmalloc = vmalloced;
1657 nf_conntrack_hash = hash;
1658 nf_conntrack_hash_rnd = rnd;
1659 write_unlock_bh(&nf_conntrack_lock);
1661 free_conntrack_hash(old_hash, old_vmalloced, old_size);
1662 return 0;
1665 module_param_call(hashsize, set_hashsize, param_get_uint,
1666 &nf_conntrack_htable_size, 0600);
1668 int __init nf_conntrack_init(void)
1670 unsigned int i;
1671 int ret;
1673 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
1674 * machine has 256 buckets. >= 1GB machines have 8192 buckets. */
1675 if (!nf_conntrack_htable_size) {
1676 nf_conntrack_htable_size
1677 = (((num_physpages << PAGE_SHIFT) / 16384)
1678 / sizeof(struct list_head));
1679 if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
1680 nf_conntrack_htable_size = 8192;
1681 if (nf_conntrack_htable_size < 16)
1682 nf_conntrack_htable_size = 16;
1684 nf_conntrack_max = 8 * nf_conntrack_htable_size;
1686 printk("nf_conntrack version %s (%u buckets, %d max)\n",
1687 NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
1688 nf_conntrack_max);
1690 nf_conntrack_hash = alloc_hashtable(nf_conntrack_htable_size,
1691 &nf_conntrack_vmalloc);
1692 if (!nf_conntrack_hash) {
1693 printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
1694 goto err_out;
1697 ret = nf_conntrack_register_cache(NF_CT_F_BASIC, "nf_conntrack:basic",
1698 sizeof(struct nf_conn), NULL);
1699 if (ret < 0) {
1700 printk(KERN_ERR "Unable to create nf_conn slab cache\n");
1701 goto err_free_hash;
1704 nf_conntrack_expect_cachep = kmem_cache_create("nf_conntrack_expect",
1705 sizeof(struct nf_conntrack_expect),
1706 0, 0, NULL, NULL);
1707 if (!nf_conntrack_expect_cachep) {
1708 printk(KERN_ERR "Unable to create nf_expect slab cache\n");
1709 goto err_free_conntrack_slab;
1712 /* Don't NEED lock here, but good form anyway. */
1713 write_lock_bh(&nf_conntrack_lock);
1714 for (i = 0; i < PF_MAX; i++)
1715 nf_ct_l3protos[i] = &nf_conntrack_generic_l3proto;
1716 write_unlock_bh(&nf_conntrack_lock);
1718 /* Set up fake conntrack:
1719 - to never be deleted, not in any hashes */
1720 atomic_set(&nf_conntrack_untracked.ct_general.use, 1);
1721 /* - and look it like as a confirmed connection */
1722 set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status);
1724 return ret;
1726 err_free_conntrack_slab:
1727 nf_conntrack_unregister_cache(NF_CT_F_BASIC);
1728 err_free_hash:
1729 free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc,
1730 nf_conntrack_htable_size);
1731 err_out:
1732 return -ENOMEM;