[NETFILTER]: connection tracking event notifiers
[linux-2.6/sactl.git] / net / ipv4 / netfilter / ip_conntrack_core.c
blobcaf89deae116363adc076180b86c8a8fb3a0b13d
1 /* Connection state tracking for netfilter. This is separated from,
2 but required by, the NAT layer; it can also be used by an iptables
3 extension. */
5 /* (C) 1999-2001 Paul `Rusty' Russell
6 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
12 * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
13 * - new API and handling of conntrack/nat helpers
14 * - now capable of multiple expectations for one master
15 * 16 Jul 2002: Harald Welte <laforge@gnumonks.org>
16 * - add usage/reference counts to ip_conntrack_expect
17 * - export ip_conntrack[_expect]_{find_get,put} functions
18 * */
20 #include <linux/config.h>
21 #include <linux/types.h>
22 #include <linux/icmp.h>
23 #include <linux/ip.h>
24 #include <linux/netfilter.h>
25 #include <linux/netfilter_ipv4.h>
26 #include <linux/module.h>
27 #include <linux/skbuff.h>
28 #include <linux/proc_fs.h>
29 #include <linux/vmalloc.h>
30 #include <net/checksum.h>
31 #include <net/ip.h>
32 #include <linux/stddef.h>
33 #include <linux/sysctl.h>
34 #include <linux/slab.h>
35 #include <linux/random.h>
36 #include <linux/jhash.h>
37 #include <linux/err.h>
38 #include <linux/percpu.h>
39 #include <linux/moduleparam.h>
40 #include <linux/notifier.h>
42 /* ip_conntrack_lock protects the main hash table, protocol/helper/expected
43 registrations, conntrack timers*/
44 #define ASSERT_READ_LOCK(x)
45 #define ASSERT_WRITE_LOCK(x)
47 #include <linux/netfilter_ipv4/ip_conntrack.h>
48 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
49 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
50 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
51 #include <linux/netfilter_ipv4/listhelp.h>
53 #define IP_CONNTRACK_VERSION "2.2"
55 #if 0
56 #define DEBUGP printk
57 #else
58 #define DEBUGP(format, args...)
59 #endif
61 DEFINE_RWLOCK(ip_conntrack_lock);
63 /* ip_conntrack_standalone needs this */
64 atomic_t ip_conntrack_count = ATOMIC_INIT(0);
66 void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
67 LIST_HEAD(ip_conntrack_expect_list);
68 struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
69 static LIST_HEAD(helpers);
70 unsigned int ip_conntrack_htable_size = 0;
71 int ip_conntrack_max;
72 struct list_head *ip_conntrack_hash;
73 static kmem_cache_t *ip_conntrack_cachep;
74 static kmem_cache_t *ip_conntrack_expect_cachep;
75 struct ip_conntrack ip_conntrack_untracked;
76 unsigned int ip_ct_log_invalid;
77 static LIST_HEAD(unconfirmed);
78 static int ip_conntrack_vmalloc;
80 #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
81 struct notifier_block *ip_conntrack_chain;
82 struct notifier_block *ip_conntrack_expect_chain;
84 DEFINE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache);
86 static inline void __deliver_cached_events(struct ip_conntrack_ecache *ecache)
88 if (is_confirmed(ecache->ct) && !is_dying(ecache->ct) && ecache->events)
89 notifier_call_chain(&ip_conntrack_chain, ecache->events,
90 ecache->ct);
91 ecache->events = 0;
94 void __ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ecache)
96 __deliver_cached_events(ecache);
99 /* Deliver all cached events for a particular conntrack. This is called
100 * by code prior to async packet handling or freeing the skb */
101 void
102 ip_conntrack_deliver_cached_events_for(const struct ip_conntrack *ct)
104 struct ip_conntrack_ecache *ecache =
105 &__get_cpu_var(ip_conntrack_ecache);
107 if (!ct)
108 return;
110 if (ecache->ct == ct) {
111 DEBUGP("ecache: delivering event for %p\n", ct);
112 __deliver_cached_events(ecache);
113 } else {
114 if (net_ratelimit())
115 printk(KERN_WARNING "ecache: want to deliver for %p, "
116 "but cache has %p\n", ct, ecache->ct);
119 /* signalize that events have already been delivered */
120 ecache->ct = NULL;
123 /* Deliver cached events for old pending events, if current conntrack != old */
124 void ip_conntrack_event_cache_init(const struct sk_buff *skb)
126 struct ip_conntrack *ct = (struct ip_conntrack *) skb->nfct;
127 struct ip_conntrack_ecache *ecache =
128 &__get_cpu_var(ip_conntrack_ecache);
130 /* take care of delivering potentially old events */
131 if (ecache->ct != ct) {
132 enum ip_conntrack_info ctinfo;
133 /* we have to check, since at startup the cache is NULL */
134 if (likely(ecache->ct)) {
135 DEBUGP("ecache: entered for different conntrack: "
136 "ecache->ct=%p, skb->nfct=%p. delivering "
137 "events\n", ecache->ct, ct);
138 __deliver_cached_events(ecache);
139 ip_conntrack_put(ecache->ct);
140 } else {
141 DEBUGP("ecache: entered for conntrack %p, "
142 "cache was clean before\n", ct);
145 /* initialize for this conntrack/packet */
146 ecache->ct = ip_conntrack_get(skb, &ctinfo);
147 /* ecache->events cleared by __deliver_cached_devents() */
148 } else {
149 DEBUGP("ecache: re-entered for conntrack %p.\n", ct);
153 #endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */
155 DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
157 void
158 ip_conntrack_put(struct ip_conntrack *ct)
160 IP_NF_ASSERT(ct);
161 nf_conntrack_put(&ct->ct_general);
164 static int ip_conntrack_hash_rnd_initted;
165 static unsigned int ip_conntrack_hash_rnd;
167 static u_int32_t
168 hash_conntrack(const struct ip_conntrack_tuple *tuple)
170 #if 0
171 dump_tuple(tuple);
172 #endif
173 return (jhash_3words(tuple->src.ip,
174 (tuple->dst.ip ^ tuple->dst.protonum),
175 (tuple->src.u.all | (tuple->dst.u.all << 16)),
176 ip_conntrack_hash_rnd) % ip_conntrack_htable_size);
180 ip_ct_get_tuple(const struct iphdr *iph,
181 const struct sk_buff *skb,
182 unsigned int dataoff,
183 struct ip_conntrack_tuple *tuple,
184 const struct ip_conntrack_protocol *protocol)
186 /* Never happen */
187 if (iph->frag_off & htons(IP_OFFSET)) {
188 printk("ip_conntrack_core: Frag of proto %u.\n",
189 iph->protocol);
190 return 0;
193 tuple->src.ip = iph->saddr;
194 tuple->dst.ip = iph->daddr;
195 tuple->dst.protonum = iph->protocol;
196 tuple->dst.dir = IP_CT_DIR_ORIGINAL;
198 return protocol->pkt_to_tuple(skb, dataoff, tuple);
202 ip_ct_invert_tuple(struct ip_conntrack_tuple *inverse,
203 const struct ip_conntrack_tuple *orig,
204 const struct ip_conntrack_protocol *protocol)
206 inverse->src.ip = orig->dst.ip;
207 inverse->dst.ip = orig->src.ip;
208 inverse->dst.protonum = orig->dst.protonum;
209 inverse->dst.dir = !orig->dst.dir;
211 return protocol->invert_tuple(inverse, orig);
215 /* ip_conntrack_expect helper functions */
216 static void unlink_expect(struct ip_conntrack_expect *exp)
218 ASSERT_WRITE_LOCK(&ip_conntrack_lock);
219 IP_NF_ASSERT(!timer_pending(&exp->timeout));
220 list_del(&exp->list);
221 CONNTRACK_STAT_INC(expect_delete);
222 exp->master->expecting--;
225 static void expectation_timed_out(unsigned long ul_expect)
227 struct ip_conntrack_expect *exp = (void *)ul_expect;
229 write_lock_bh(&ip_conntrack_lock);
230 unlink_expect(exp);
231 write_unlock_bh(&ip_conntrack_lock);
232 ip_conntrack_expect_put(exp);
235 /* If an expectation for this connection is found, it gets delete from
236 * global list then returned. */
237 static struct ip_conntrack_expect *
238 find_expectation(const struct ip_conntrack_tuple *tuple)
240 struct ip_conntrack_expect *i;
242 list_for_each_entry(i, &ip_conntrack_expect_list, list) {
243 /* If master is not in hash table yet (ie. packet hasn't left
244 this machine yet), how can other end know about expected?
245 Hence these are not the droids you are looking for (if
246 master ct never got confirmed, we'd hold a reference to it
247 and weird things would happen to future packets). */
248 if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)
249 && is_confirmed(i->master)
250 && del_timer(&i->timeout)) {
251 unlink_expect(i);
252 return i;
255 return NULL;
258 /* delete all expectations for this conntrack */
259 static void remove_expectations(struct ip_conntrack *ct)
261 struct ip_conntrack_expect *i, *tmp;
263 /* Optimization: most connection never expect any others. */
264 if (ct->expecting == 0)
265 return;
267 list_for_each_entry_safe(i, tmp, &ip_conntrack_expect_list, list) {
268 if (i->master == ct && del_timer(&i->timeout)) {
269 unlink_expect(i);
270 ip_conntrack_expect_put(i);
275 static void
276 clean_from_lists(struct ip_conntrack *ct)
278 unsigned int ho, hr;
280 DEBUGP("clean_from_lists(%p)\n", ct);
281 ASSERT_WRITE_LOCK(&ip_conntrack_lock);
283 ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
284 hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
285 LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
286 LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
288 /* Destroy all pending expectations */
289 remove_expectations(ct);
292 static void
293 destroy_conntrack(struct nf_conntrack *nfct)
295 struct ip_conntrack *ct = (struct ip_conntrack *)nfct;
296 struct ip_conntrack_protocol *proto;
298 DEBUGP("destroy_conntrack(%p)\n", ct);
299 IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
300 IP_NF_ASSERT(!timer_pending(&ct->timeout));
302 set_bit(IPS_DYING_BIT, &ct->status);
304 /* To make sure we don't get any weird locking issues here:
305 * destroy_conntrack() MUST NOT be called with a write lock
306 * to ip_conntrack_lock!!! -HW */
307 proto = ip_ct_find_proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
308 if (proto && proto->destroy)
309 proto->destroy(ct);
311 if (ip_conntrack_destroyed)
312 ip_conntrack_destroyed(ct);
314 write_lock_bh(&ip_conntrack_lock);
315 /* Expectations will have been removed in clean_from_lists,
316 * except TFTP can create an expectation on the first packet,
317 * before connection is in the list, so we need to clean here,
318 * too. */
319 remove_expectations(ct);
321 /* We overload first tuple to link into unconfirmed list. */
322 if (!is_confirmed(ct)) {
323 BUG_ON(list_empty(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list));
324 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
327 CONNTRACK_STAT_INC(delete);
328 write_unlock_bh(&ip_conntrack_lock);
330 if (ct->master)
331 ip_conntrack_put(ct->master);
333 DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
334 kmem_cache_free(ip_conntrack_cachep, ct);
335 atomic_dec(&ip_conntrack_count);
338 static void death_by_timeout(unsigned long ul_conntrack)
340 struct ip_conntrack *ct = (void *)ul_conntrack;
342 ip_conntrack_event(IPCT_DESTROY, ct);
343 write_lock_bh(&ip_conntrack_lock);
344 /* Inside lock so preempt is disabled on module removal path.
345 * Otherwise we can get spurious warnings. */
346 CONNTRACK_STAT_INC(delete_list);
347 clean_from_lists(ct);
348 write_unlock_bh(&ip_conntrack_lock);
349 ip_conntrack_put(ct);
352 static inline int
353 conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i,
354 const struct ip_conntrack_tuple *tuple,
355 const struct ip_conntrack *ignored_conntrack)
357 ASSERT_READ_LOCK(&ip_conntrack_lock);
358 return tuplehash_to_ctrack(i) != ignored_conntrack
359 && ip_ct_tuple_equal(tuple, &i->tuple);
362 static struct ip_conntrack_tuple_hash *
363 __ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
364 const struct ip_conntrack *ignored_conntrack)
366 struct ip_conntrack_tuple_hash *h;
367 unsigned int hash = hash_conntrack(tuple);
369 ASSERT_READ_LOCK(&ip_conntrack_lock);
370 list_for_each_entry(h, &ip_conntrack_hash[hash], list) {
371 if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) {
372 CONNTRACK_STAT_INC(found);
373 return h;
375 CONNTRACK_STAT_INC(searched);
378 return NULL;
381 /* Find a connection corresponding to a tuple. */
382 struct ip_conntrack_tuple_hash *
383 ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple,
384 const struct ip_conntrack *ignored_conntrack)
386 struct ip_conntrack_tuple_hash *h;
388 read_lock_bh(&ip_conntrack_lock);
389 h = __ip_conntrack_find(tuple, ignored_conntrack);
390 if (h)
391 atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
392 read_unlock_bh(&ip_conntrack_lock);
394 return h;
397 /* Confirm a connection given skb; places it in hash table */
399 __ip_conntrack_confirm(struct sk_buff **pskb)
401 unsigned int hash, repl_hash;
402 struct ip_conntrack *ct;
403 enum ip_conntrack_info ctinfo;
405 ct = ip_conntrack_get(*pskb, &ctinfo);
407 /* ipt_REJECT uses ip_conntrack_attach to attach related
408 ICMP/TCP RST packets in other direction. Actual packet
409 which created connection will be IP_CT_NEW or for an
410 expected connection, IP_CT_RELATED. */
411 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
412 return NF_ACCEPT;
414 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
415 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
417 /* We're not in hash table, and we refuse to set up related
418 connections for unconfirmed conns. But packet copies and
419 REJECT will give spurious warnings here. */
420 /* IP_NF_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
422 /* No external references means noone else could have
423 confirmed us. */
424 IP_NF_ASSERT(!is_confirmed(ct));
425 DEBUGP("Confirming conntrack %p\n", ct);
427 write_lock_bh(&ip_conntrack_lock);
429 /* See if there's one in the list already, including reverse:
430 NAT could have grabbed it without realizing, since we're
431 not in the hash. If there is, we lost race. */
432 if (!LIST_FIND(&ip_conntrack_hash[hash],
433 conntrack_tuple_cmp,
434 struct ip_conntrack_tuple_hash *,
435 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
436 && !LIST_FIND(&ip_conntrack_hash[repl_hash],
437 conntrack_tuple_cmp,
438 struct ip_conntrack_tuple_hash *,
439 &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
440 /* Remove from unconfirmed list */
441 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
443 list_prepend(&ip_conntrack_hash[hash],
444 &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
445 list_prepend(&ip_conntrack_hash[repl_hash],
446 &ct->tuplehash[IP_CT_DIR_REPLY]);
447 /* Timer relative to confirmation time, not original
448 setting time, otherwise we'd get timer wrap in
449 weird delay cases. */
450 ct->timeout.expires += jiffies;
451 add_timer(&ct->timeout);
452 atomic_inc(&ct->ct_general.use);
453 set_bit(IPS_CONFIRMED_BIT, &ct->status);
454 CONNTRACK_STAT_INC(insert);
455 write_unlock_bh(&ip_conntrack_lock);
456 if (ct->helper)
457 ip_conntrack_event_cache(IPCT_HELPER, *pskb);
458 #ifdef CONFIG_IP_NF_NAT_NEEDED
459 if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
460 test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
461 ip_conntrack_event_cache(IPCT_NATINFO, *pskb);
462 #endif
463 ip_conntrack_event_cache(master_ct(ct) ?
464 IPCT_RELATED : IPCT_NEW, *pskb);
466 return NF_ACCEPT;
469 CONNTRACK_STAT_INC(insert_failed);
470 write_unlock_bh(&ip_conntrack_lock);
472 return NF_DROP;
475 /* Returns true if a connection correspondings to the tuple (required
476 for NAT). */
478 ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple,
479 const struct ip_conntrack *ignored_conntrack)
481 struct ip_conntrack_tuple_hash *h;
483 read_lock_bh(&ip_conntrack_lock);
484 h = __ip_conntrack_find(tuple, ignored_conntrack);
485 read_unlock_bh(&ip_conntrack_lock);
487 return h != NULL;
490 /* There's a small race here where we may free a just-assured
491 connection. Too bad: we're in trouble anyway. */
492 static inline int unreplied(const struct ip_conntrack_tuple_hash *i)
494 return !(test_bit(IPS_ASSURED_BIT, &tuplehash_to_ctrack(i)->status));
497 static int early_drop(struct list_head *chain)
499 /* Traverse backwards: gives us oldest, which is roughly LRU */
500 struct ip_conntrack_tuple_hash *h;
501 struct ip_conntrack *ct = NULL;
502 int dropped = 0;
504 read_lock_bh(&ip_conntrack_lock);
505 h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *);
506 if (h) {
507 ct = tuplehash_to_ctrack(h);
508 atomic_inc(&ct->ct_general.use);
510 read_unlock_bh(&ip_conntrack_lock);
512 if (!ct)
513 return dropped;
515 if (del_timer(&ct->timeout)) {
516 death_by_timeout((unsigned long)ct);
517 dropped = 1;
518 CONNTRACK_STAT_INC(early_drop);
520 ip_conntrack_put(ct);
521 return dropped;
524 static inline int helper_cmp(const struct ip_conntrack_helper *i,
525 const struct ip_conntrack_tuple *rtuple)
527 return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask);
530 static struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *tuple)
532 return LIST_FIND(&helpers, helper_cmp,
533 struct ip_conntrack_helper *,
534 tuple);
537 /* Allocate a new conntrack: we return -ENOMEM if classification
538 failed due to stress. Otherwise it really is unclassifiable. */
539 static struct ip_conntrack_tuple_hash *
540 init_conntrack(const struct ip_conntrack_tuple *tuple,
541 struct ip_conntrack_protocol *protocol,
542 struct sk_buff *skb)
544 struct ip_conntrack *conntrack;
545 struct ip_conntrack_tuple repl_tuple;
546 size_t hash;
547 struct ip_conntrack_expect *exp;
549 if (!ip_conntrack_hash_rnd_initted) {
550 get_random_bytes(&ip_conntrack_hash_rnd, 4);
551 ip_conntrack_hash_rnd_initted = 1;
554 hash = hash_conntrack(tuple);
556 if (ip_conntrack_max
557 && atomic_read(&ip_conntrack_count) >= ip_conntrack_max) {
558 /* Try dropping from this hash chain. */
559 if (!early_drop(&ip_conntrack_hash[hash])) {
560 if (net_ratelimit())
561 printk(KERN_WARNING
562 "ip_conntrack: table full, dropping"
563 " packet.\n");
564 return ERR_PTR(-ENOMEM);
568 if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) {
569 DEBUGP("Can't invert tuple.\n");
570 return NULL;
573 conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
574 if (!conntrack) {
575 DEBUGP("Can't allocate conntrack.\n");
576 return ERR_PTR(-ENOMEM);
579 memset(conntrack, 0, sizeof(*conntrack));
580 atomic_set(&conntrack->ct_general.use, 1);
581 conntrack->ct_general.destroy = destroy_conntrack;
582 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *tuple;
583 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = repl_tuple;
584 if (!protocol->new(conntrack, skb)) {
585 kmem_cache_free(ip_conntrack_cachep, conntrack);
586 return NULL;
588 /* Don't set timer yet: wait for confirmation */
589 init_timer(&conntrack->timeout);
590 conntrack->timeout.data = (unsigned long)conntrack;
591 conntrack->timeout.function = death_by_timeout;
593 write_lock_bh(&ip_conntrack_lock);
594 exp = find_expectation(tuple);
596 if (exp) {
597 DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
598 conntrack, exp);
599 /* Welcome, Mr. Bond. We've been expecting you... */
600 __set_bit(IPS_EXPECTED_BIT, &conntrack->status);
601 conntrack->master = exp->master;
602 #ifdef CONFIG_IP_NF_CONNTRACK_MARK
603 conntrack->mark = exp->master->mark;
604 #endif
605 #if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
606 defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
607 /* this is ugly, but there is no other place where to put it */
608 conntrack->nat.masq_index = exp->master->nat.masq_index;
609 #endif
610 nf_conntrack_get(&conntrack->master->ct_general);
611 CONNTRACK_STAT_INC(expect_new);
612 } else {
613 conntrack->helper = ip_ct_find_helper(&repl_tuple);
615 CONNTRACK_STAT_INC(new);
618 /* Overload tuple linked list to put us in unconfirmed list. */
619 list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
621 atomic_inc(&ip_conntrack_count);
622 write_unlock_bh(&ip_conntrack_lock);
624 if (exp) {
625 if (exp->expectfn)
626 exp->expectfn(conntrack, exp);
627 ip_conntrack_expect_put(exp);
630 return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
633 /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
634 static inline struct ip_conntrack *
635 resolve_normal_ct(struct sk_buff *skb,
636 struct ip_conntrack_protocol *proto,
637 int *set_reply,
638 unsigned int hooknum,
639 enum ip_conntrack_info *ctinfo)
641 struct ip_conntrack_tuple tuple;
642 struct ip_conntrack_tuple_hash *h;
643 struct ip_conntrack *ct;
645 IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0);
647 if (!ip_ct_get_tuple(skb->nh.iph, skb, skb->nh.iph->ihl*4,
648 &tuple,proto))
649 return NULL;
651 /* look for tuple match */
652 h = ip_conntrack_find_get(&tuple, NULL);
653 if (!h) {
654 h = init_conntrack(&tuple, proto, skb);
655 if (!h)
656 return NULL;
657 if (IS_ERR(h))
658 return (void *)h;
660 ct = tuplehash_to_ctrack(h);
662 /* It exists; we have (non-exclusive) reference. */
663 if (DIRECTION(h) == IP_CT_DIR_REPLY) {
664 *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
665 /* Please set reply bit if this packet OK */
666 *set_reply = 1;
667 } else {
668 /* Once we've had two way comms, always ESTABLISHED. */
669 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
670 DEBUGP("ip_conntrack_in: normal packet for %p\n",
671 ct);
672 *ctinfo = IP_CT_ESTABLISHED;
673 } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
674 DEBUGP("ip_conntrack_in: related packet for %p\n",
675 ct);
676 *ctinfo = IP_CT_RELATED;
677 } else {
678 DEBUGP("ip_conntrack_in: new packet for %p\n",
679 ct);
680 *ctinfo = IP_CT_NEW;
682 *set_reply = 0;
684 skb->nfct = &ct->ct_general;
685 skb->nfctinfo = *ctinfo;
686 return ct;
689 /* Netfilter hook itself. */
690 unsigned int ip_conntrack_in(unsigned int hooknum,
691 struct sk_buff **pskb,
692 const struct net_device *in,
693 const struct net_device *out,
694 int (*okfn)(struct sk_buff *))
696 struct ip_conntrack *ct;
697 enum ip_conntrack_info ctinfo;
698 struct ip_conntrack_protocol *proto;
699 int set_reply = 0;
700 int ret;
702 /* Previously seen (loopback or untracked)? Ignore. */
703 if ((*pskb)->nfct) {
704 CONNTRACK_STAT_INC(ignore);
705 return NF_ACCEPT;
708 /* Never happen */
709 if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) {
710 if (net_ratelimit()) {
711 printk(KERN_ERR "ip_conntrack_in: Frag of proto %u (hook=%u)\n",
712 (*pskb)->nh.iph->protocol, hooknum);
714 return NF_DROP;
717 /* Doesn't cover locally-generated broadcast, so not worth it. */
718 #if 0
719 /* Ignore broadcast: no `connection'. */
720 if ((*pskb)->pkt_type == PACKET_BROADCAST) {
721 printk("Broadcast packet!\n");
722 return NF_ACCEPT;
723 } else if (((*pskb)->nh.iph->daddr & htonl(0x000000FF))
724 == htonl(0x000000FF)) {
725 printk("Should bcast: %u.%u.%u.%u->%u.%u.%u.%u (sk=%p, ptype=%u)\n",
726 NIPQUAD((*pskb)->nh.iph->saddr),
727 NIPQUAD((*pskb)->nh.iph->daddr),
728 (*pskb)->sk, (*pskb)->pkt_type);
730 #endif
732 proto = ip_ct_find_proto((*pskb)->nh.iph->protocol);
734 /* It may be an special packet, error, unclean...
735 * inverse of the return code tells to the netfilter
736 * core what to do with the packet. */
737 if (proto->error != NULL
738 && (ret = proto->error(*pskb, &ctinfo, hooknum)) <= 0) {
739 CONNTRACK_STAT_INC(error);
740 CONNTRACK_STAT_INC(invalid);
741 return -ret;
744 if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo))) {
745 /* Not valid part of a connection */
746 CONNTRACK_STAT_INC(invalid);
747 return NF_ACCEPT;
750 if (IS_ERR(ct)) {
751 /* Too stressed to deal. */
752 CONNTRACK_STAT_INC(drop);
753 return NF_DROP;
756 IP_NF_ASSERT((*pskb)->nfct);
758 ip_conntrack_event_cache_init(*pskb);
760 ret = proto->packet(ct, *pskb, ctinfo);
761 if (ret < 0) {
762 /* Invalid: inverse of the return code tells
763 * the netfilter core what to do*/
764 nf_conntrack_put((*pskb)->nfct);
765 (*pskb)->nfct = NULL;
766 CONNTRACK_STAT_INC(invalid);
767 return -ret;
770 if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
771 ip_conntrack_event_cache(IPCT_STATUS, *pskb);
773 return ret;
776 int invert_tuplepr(struct ip_conntrack_tuple *inverse,
777 const struct ip_conntrack_tuple *orig)
779 return ip_ct_invert_tuple(inverse, orig,
780 ip_ct_find_proto(orig->dst.protonum));
783 /* Would two expected things clash? */
784 static inline int expect_clash(const struct ip_conntrack_expect *a,
785 const struct ip_conntrack_expect *b)
787 /* Part covered by intersection of masks must be unequal,
788 otherwise they clash */
789 struct ip_conntrack_tuple intersect_mask
790 = { { a->mask.src.ip & b->mask.src.ip,
791 { a->mask.src.u.all & b->mask.src.u.all } },
792 { a->mask.dst.ip & b->mask.dst.ip,
793 { a->mask.dst.u.all & b->mask.dst.u.all },
794 a->mask.dst.protonum & b->mask.dst.protonum } };
796 return ip_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
799 static inline int expect_matches(const struct ip_conntrack_expect *a,
800 const struct ip_conntrack_expect *b)
802 return a->master == b->master
803 && ip_ct_tuple_equal(&a->tuple, &b->tuple)
804 && ip_ct_tuple_equal(&a->mask, &b->mask);
807 /* Generally a bad idea to call this: could have matched already. */
808 void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp)
810 struct ip_conntrack_expect *i;
812 write_lock_bh(&ip_conntrack_lock);
813 /* choose the the oldest expectation to evict */
814 list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
815 if (expect_matches(i, exp) && del_timer(&i->timeout)) {
816 unlink_expect(i);
817 write_unlock_bh(&ip_conntrack_lock);
818 ip_conntrack_expect_put(i);
819 return;
822 write_unlock_bh(&ip_conntrack_lock);
825 struct ip_conntrack_expect *ip_conntrack_expect_alloc(struct ip_conntrack *me)
827 struct ip_conntrack_expect *new;
829 new = kmem_cache_alloc(ip_conntrack_expect_cachep, GFP_ATOMIC);
830 if (!new) {
831 DEBUGP("expect_related: OOM allocating expect\n");
832 return NULL;
834 new->master = me;
835 atomic_inc(&new->master->ct_general.use);
836 atomic_set(&new->use, 1);
837 return new;
840 void ip_conntrack_expect_put(struct ip_conntrack_expect *exp)
842 if (atomic_dec_and_test(&exp->use)) {
843 ip_conntrack_put(exp->master);
844 kmem_cache_free(ip_conntrack_expect_cachep, exp);
848 static void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp)
850 atomic_inc(&exp->use);
851 exp->master->expecting++;
852 list_add(&exp->list, &ip_conntrack_expect_list);
854 init_timer(&exp->timeout);
855 exp->timeout.data = (unsigned long)exp;
856 exp->timeout.function = expectation_timed_out;
857 exp->timeout.expires = jiffies + exp->master->helper->timeout * HZ;
858 add_timer(&exp->timeout);
860 CONNTRACK_STAT_INC(expect_create);
863 /* Race with expectations being used means we could have none to find; OK. */
864 static void evict_oldest_expect(struct ip_conntrack *master)
866 struct ip_conntrack_expect *i;
868 list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
869 if (i->master == master) {
870 if (del_timer(&i->timeout)) {
871 unlink_expect(i);
872 ip_conntrack_expect_put(i);
874 break;
879 static inline int refresh_timer(struct ip_conntrack_expect *i)
881 if (!del_timer(&i->timeout))
882 return 0;
884 i->timeout.expires = jiffies + i->master->helper->timeout*HZ;
885 add_timer(&i->timeout);
886 return 1;
889 int ip_conntrack_expect_related(struct ip_conntrack_expect *expect)
891 struct ip_conntrack_expect *i;
892 int ret;
894 DEBUGP("ip_conntrack_expect_related %p\n", related_to);
895 DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple);
896 DEBUGP("mask: "); DUMP_TUPLE(&expect->mask);
898 write_lock_bh(&ip_conntrack_lock);
899 list_for_each_entry(i, &ip_conntrack_expect_list, list) {
900 if (expect_matches(i, expect)) {
901 /* Refresh timer: if it's dying, ignore.. */
902 if (refresh_timer(i)) {
903 ret = 0;
904 goto out;
906 } else if (expect_clash(i, expect)) {
907 ret = -EBUSY;
908 goto out;
912 /* Will be over limit? */
913 if (expect->master->helper->max_expected &&
914 expect->master->expecting >= expect->master->helper->max_expected)
915 evict_oldest_expect(expect->master);
917 ip_conntrack_expect_insert(expect);
918 ip_conntrack_expect_event(IPEXP_NEW, expect);
919 ret = 0;
920 out:
921 write_unlock_bh(&ip_conntrack_lock);
922 return ret;
925 /* Alter reply tuple (maybe alter helper). This is for NAT, and is
926 implicitly racy: see __ip_conntrack_confirm */
927 void ip_conntrack_alter_reply(struct ip_conntrack *conntrack,
928 const struct ip_conntrack_tuple *newreply)
930 write_lock_bh(&ip_conntrack_lock);
931 /* Should be unconfirmed, so not in hash table yet */
932 IP_NF_ASSERT(!is_confirmed(conntrack));
934 DEBUGP("Altering reply tuple of %p to ", conntrack);
935 DUMP_TUPLE(newreply);
937 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
938 if (!conntrack->master && conntrack->expecting == 0)
939 conntrack->helper = ip_ct_find_helper(newreply);
940 write_unlock_bh(&ip_conntrack_lock);
943 int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
945 BUG_ON(me->timeout == 0);
946 write_lock_bh(&ip_conntrack_lock);
947 list_prepend(&helpers, me);
948 write_unlock_bh(&ip_conntrack_lock);
950 return 0;
953 static inline int unhelp(struct ip_conntrack_tuple_hash *i,
954 const struct ip_conntrack_helper *me)
956 if (tuplehash_to_ctrack(i)->helper == me) {
957 ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i));
958 tuplehash_to_ctrack(i)->helper = NULL;
960 return 0;
963 void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
965 unsigned int i;
966 struct ip_conntrack_expect *exp, *tmp;
968 /* Need write lock here, to delete helper. */
969 write_lock_bh(&ip_conntrack_lock);
970 LIST_DELETE(&helpers, me);
972 /* Get rid of expectations */
973 list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) {
974 if (exp->master->helper == me && del_timer(&exp->timeout)) {
975 unlink_expect(exp);
976 ip_conntrack_expect_put(exp);
979 /* Get rid of expecteds, set helpers to NULL. */
980 LIST_FIND_W(&unconfirmed, unhelp, struct ip_conntrack_tuple_hash*, me);
981 for (i = 0; i < ip_conntrack_htable_size; i++)
982 LIST_FIND_W(&ip_conntrack_hash[i], unhelp,
983 struct ip_conntrack_tuple_hash *, me);
984 write_unlock_bh(&ip_conntrack_lock);
986 /* Someone could be still looking at the helper in a bh. */
987 synchronize_net();
990 static inline void ct_add_counters(struct ip_conntrack *ct,
991 enum ip_conntrack_info ctinfo,
992 const struct sk_buff *skb)
994 #ifdef CONFIG_IP_NF_CT_ACCT
995 if (skb) {
996 ct->counters[CTINFO2DIR(ctinfo)].packets++;
997 ct->counters[CTINFO2DIR(ctinfo)].bytes +=
998 ntohs(skb->nh.iph->tot_len);
1000 #endif
1003 /* Refresh conntrack for this many jiffies and do accounting (if skb != NULL) */
1004 void ip_ct_refresh_acct(struct ip_conntrack *ct,
1005 enum ip_conntrack_info ctinfo,
1006 const struct sk_buff *skb,
1007 unsigned long extra_jiffies)
1009 IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct);
1011 /* If not in hash table, timer will not be active yet */
1012 if (!is_confirmed(ct)) {
1013 ct->timeout.expires = extra_jiffies;
1014 ct_add_counters(ct, ctinfo, skb);
1015 } else {
1016 write_lock_bh(&ip_conntrack_lock);
1017 /* Need del_timer for race avoidance (may already be dying). */
1018 if (del_timer(&ct->timeout)) {
1019 ct->timeout.expires = jiffies + extra_jiffies;
1020 add_timer(&ct->timeout);
1021 ip_conntrack_event_cache(IPCT_REFRESH, skb);
1023 ct_add_counters(ct, ctinfo, skb);
1024 write_unlock_bh(&ip_conntrack_lock);
1028 /* Returns new sk_buff, or NULL */
1029 struct sk_buff *
1030 ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user)
1032 skb_orphan(skb);
1034 local_bh_disable();
1035 skb = ip_defrag(skb, user);
1036 local_bh_enable();
1038 if (skb)
1039 ip_send_check(skb->nh.iph);
1040 return skb;
1043 /* Used by ipt_REJECT. */
1044 static void ip_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
1046 struct ip_conntrack *ct;
1047 enum ip_conntrack_info ctinfo;
1049 /* This ICMP is in reverse direction to the packet which caused it */
1050 ct = ip_conntrack_get(skb, &ctinfo);
1052 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
1053 ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY;
1054 else
1055 ctinfo = IP_CT_RELATED;
1057 /* Attach to new skbuff, and increment count */
1058 nskb->nfct = &ct->ct_general;
1059 nskb->nfctinfo = ctinfo;
1060 nf_conntrack_get(nskb->nfct);
1063 static inline int
1064 do_iter(const struct ip_conntrack_tuple_hash *i,
1065 int (*iter)(struct ip_conntrack *i, void *data),
1066 void *data)
1068 return iter(tuplehash_to_ctrack(i), data);
1071 /* Bring out ya dead! */
1072 static struct ip_conntrack_tuple_hash *
1073 get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data),
1074 void *data, unsigned int *bucket)
1076 struct ip_conntrack_tuple_hash *h = NULL;
1078 write_lock_bh(&ip_conntrack_lock);
1079 for (; *bucket < ip_conntrack_htable_size; (*bucket)++) {
1080 h = LIST_FIND_W(&ip_conntrack_hash[*bucket], do_iter,
1081 struct ip_conntrack_tuple_hash *, iter, data);
1082 if (h)
1083 break;
1085 if (!h)
1086 h = LIST_FIND_W(&unconfirmed, do_iter,
1087 struct ip_conntrack_tuple_hash *, iter, data);
1088 if (h)
1089 atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
1090 write_unlock_bh(&ip_conntrack_lock);
1092 return h;
1095 void
1096 ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data)
1098 struct ip_conntrack_tuple_hash *h;
1099 unsigned int bucket = 0;
1101 while ((h = get_next_corpse(iter, data, &bucket)) != NULL) {
1102 struct ip_conntrack *ct = tuplehash_to_ctrack(h);
1103 /* Time to push up daises... */
1104 if (del_timer(&ct->timeout))
1105 death_by_timeout((unsigned long)ct);
1106 /* ... else the timer will get him soon. */
1108 ip_conntrack_put(ct);
1111 #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
1113 /* we need to deliver all cached events in order to drop
1114 * the reference counts */
1115 int cpu;
1116 for_each_cpu(cpu) {
1117 struct ip_conntrack_ecache *ecache =
1118 &per_cpu(ip_conntrack_ecache, cpu);
1119 if (ecache->ct) {
1120 __ip_ct_deliver_cached_events(ecache);
1121 ip_conntrack_put(ecache->ct);
1122 ecache->ct = NULL;
1126 #endif
1129 /* Fast function for those who don't want to parse /proc (and I don't
1130 blame them). */
1131 /* Reversing the socket's dst/src point of view gives us the reply
1132 mapping. */
1133 static int
1134 getorigdst(struct sock *sk, int optval, void __user *user, int *len)
1136 struct inet_sock *inet = inet_sk(sk);
1137 struct ip_conntrack_tuple_hash *h;
1138 struct ip_conntrack_tuple tuple;
1140 IP_CT_TUPLE_U_BLANK(&tuple);
1141 tuple.src.ip = inet->rcv_saddr;
1142 tuple.src.u.tcp.port = inet->sport;
1143 tuple.dst.ip = inet->daddr;
1144 tuple.dst.u.tcp.port = inet->dport;
1145 tuple.dst.protonum = IPPROTO_TCP;
1147 /* We only do TCP at the moment: is there a better way? */
1148 if (strcmp(sk->sk_prot->name, "TCP")) {
1149 DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n");
1150 return -ENOPROTOOPT;
1153 if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
1154 DEBUGP("SO_ORIGINAL_DST: len %u not %u\n",
1155 *len, sizeof(struct sockaddr_in));
1156 return -EINVAL;
1159 h = ip_conntrack_find_get(&tuple, NULL);
1160 if (h) {
1161 struct sockaddr_in sin;
1162 struct ip_conntrack *ct = tuplehash_to_ctrack(h);
1164 sin.sin_family = AF_INET;
1165 sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
1166 .tuple.dst.u.tcp.port;
1167 sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
1168 .tuple.dst.ip;
1170 DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
1171 NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
1172 ip_conntrack_put(ct);
1173 if (copy_to_user(user, &sin, sizeof(sin)) != 0)
1174 return -EFAULT;
1175 else
1176 return 0;
1178 DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n",
1179 NIPQUAD(tuple.src.ip), ntohs(tuple.src.u.tcp.port),
1180 NIPQUAD(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port));
1181 return -ENOENT;
1184 static struct nf_sockopt_ops so_getorigdst = {
1185 .pf = PF_INET,
1186 .get_optmin = SO_ORIGINAL_DST,
1187 .get_optmax = SO_ORIGINAL_DST+1,
1188 .get = &getorigdst,
1191 static int kill_all(struct ip_conntrack *i, void *data)
1193 return 1;
1196 static void free_conntrack_hash(void)
1198 if (ip_conntrack_vmalloc)
1199 vfree(ip_conntrack_hash);
1200 else
1201 free_pages((unsigned long)ip_conntrack_hash,
1202 get_order(sizeof(struct list_head)
1203 * ip_conntrack_htable_size));
1206 /* Mishearing the voices in his head, our hero wonders how he's
1207 supposed to kill the mall. */
1208 void ip_conntrack_cleanup(void)
1210 ip_ct_attach = NULL;
1211 /* This makes sure all current packets have passed through
1212 netfilter framework. Roll on, two-stage module
1213 delete... */
1214 synchronize_net();
1216 i_see_dead_people:
1217 ip_ct_iterate_cleanup(kill_all, NULL);
1218 if (atomic_read(&ip_conntrack_count) != 0) {
1219 schedule();
1220 goto i_see_dead_people;
1222 /* wait until all references to ip_conntrack_untracked are dropped */
1223 while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1)
1224 schedule();
1226 kmem_cache_destroy(ip_conntrack_cachep);
1227 kmem_cache_destroy(ip_conntrack_expect_cachep);
1228 free_conntrack_hash();
1229 nf_unregister_sockopt(&so_getorigdst);
1232 static int hashsize;
1233 module_param(hashsize, int, 0400);
1235 int __init ip_conntrack_init(void)
1237 unsigned int i;
1238 int ret;
1240 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
1241 * machine has 256 buckets. >= 1GB machines have 8192 buckets. */
1242 if (hashsize) {
1243 ip_conntrack_htable_size = hashsize;
1244 } else {
1245 ip_conntrack_htable_size
1246 = (((num_physpages << PAGE_SHIFT) / 16384)
1247 / sizeof(struct list_head));
1248 if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
1249 ip_conntrack_htable_size = 8192;
1250 if (ip_conntrack_htable_size < 16)
1251 ip_conntrack_htable_size = 16;
1253 ip_conntrack_max = 8 * ip_conntrack_htable_size;
1255 printk("ip_conntrack version %s (%u buckets, %d max)"
1256 " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION,
1257 ip_conntrack_htable_size, ip_conntrack_max,
1258 sizeof(struct ip_conntrack));
1260 ret = nf_register_sockopt(&so_getorigdst);
1261 if (ret != 0) {
1262 printk(KERN_ERR "Unable to register netfilter socket option\n");
1263 return ret;
1266 /* AK: the hash table is twice as big than needed because it
1267 uses list_head. it would be much nicer to caches to use a
1268 single pointer list head here. */
1269 ip_conntrack_vmalloc = 0;
1270 ip_conntrack_hash
1271 =(void*)__get_free_pages(GFP_KERNEL,
1272 get_order(sizeof(struct list_head)
1273 *ip_conntrack_htable_size));
1274 if (!ip_conntrack_hash) {
1275 ip_conntrack_vmalloc = 1;
1276 printk(KERN_WARNING "ip_conntrack: falling back to vmalloc.\n");
1277 ip_conntrack_hash = vmalloc(sizeof(struct list_head)
1278 * ip_conntrack_htable_size);
1280 if (!ip_conntrack_hash) {
1281 printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
1282 goto err_unreg_sockopt;
1285 ip_conntrack_cachep = kmem_cache_create("ip_conntrack",
1286 sizeof(struct ip_conntrack), 0,
1287 0, NULL, NULL);
1288 if (!ip_conntrack_cachep) {
1289 printk(KERN_ERR "Unable to create ip_conntrack slab cache\n");
1290 goto err_free_hash;
1293 ip_conntrack_expect_cachep = kmem_cache_create("ip_conntrack_expect",
1294 sizeof(struct ip_conntrack_expect),
1295 0, 0, NULL, NULL);
1296 if (!ip_conntrack_expect_cachep) {
1297 printk(KERN_ERR "Unable to create ip_expect slab cache\n");
1298 goto err_free_conntrack_slab;
1301 /* Don't NEED lock here, but good form anyway. */
1302 write_lock_bh(&ip_conntrack_lock);
1303 for (i = 0; i < MAX_IP_CT_PROTO; i++)
1304 ip_ct_protos[i] = &ip_conntrack_generic_protocol;
1305 /* Sew in builtin protocols. */
1306 ip_ct_protos[IPPROTO_TCP] = &ip_conntrack_protocol_tcp;
1307 ip_ct_protos[IPPROTO_UDP] = &ip_conntrack_protocol_udp;
1308 ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp;
1309 write_unlock_bh(&ip_conntrack_lock);
1311 for (i = 0; i < ip_conntrack_htable_size; i++)
1312 INIT_LIST_HEAD(&ip_conntrack_hash[i]);
1314 /* For use by ipt_REJECT */
1315 ip_ct_attach = ip_conntrack_attach;
1317 /* Set up fake conntrack:
1318 - to never be deleted, not in any hashes */
1319 atomic_set(&ip_conntrack_untracked.ct_general.use, 1);
1320 /* - and look it like as a confirmed connection */
1321 set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status);
1323 return ret;
1325 err_free_conntrack_slab:
1326 kmem_cache_destroy(ip_conntrack_cachep);
1327 err_free_hash:
1328 free_conntrack_hash();
1329 err_unreg_sockopt:
1330 nf_unregister_sockopt(&so_getorigdst);
1332 return -ENOMEM;