1 /* flow.c: Generic flow cache.
3 * Copyright (C) 2003 Alexey N. Kuznetsov (kuznet@ms2.inr.ac.ru)
4 * Copyright (C) 2003 David S. Miller (davem@redhat.com)
7 #include <linux/kernel.h>
8 #include <linux/module.h>
9 #include <linux/list.h>
10 #include <linux/jhash.h>
11 #include <linux/interrupt.h>
13 #include <linux/random.h>
14 #include <linux/init.h>
15 #include <linux/slab.h>
16 #include <linux/smp.h>
17 #include <linux/completion.h>
18 #include <linux/percpu.h>
19 #include <linux/bitops.h>
20 #include <linux/notifier.h>
21 #include <linux/cpu.h>
22 #include <linux/cpumask.h>
23 #include <linux/mutex.h>
25 #include <asm/atomic.h>
26 #include <asm/semaphore.h>
27 #include <linux/security.h>
29 struct flow_cache_entry
{
30 struct flow_cache_entry
*next
;
39 atomic_t flow_cache_genid
= ATOMIC_INIT(0);
41 static u32 flow_hash_shift
;
42 #define flow_hash_size (1 << flow_hash_shift)
43 static DEFINE_PER_CPU(struct flow_cache_entry
**, flow_tables
) = { NULL
};
45 #define flow_table(cpu) (per_cpu(flow_tables, cpu))
47 static kmem_cache_t
*flow_cachep __read_mostly
;
49 static int flow_lwm
, flow_hwm
;
51 struct flow_percpu_info
{
55 } ____cacheline_aligned
;
56 static DEFINE_PER_CPU(struct flow_percpu_info
, flow_hash_info
) = { 0 };
58 #define flow_hash_rnd_recalc(cpu) \
59 (per_cpu(flow_hash_info, cpu).hash_rnd_recalc)
60 #define flow_hash_rnd(cpu) \
61 (per_cpu(flow_hash_info, cpu).hash_rnd)
62 #define flow_count(cpu) \
63 (per_cpu(flow_hash_info, cpu).count)
65 static struct timer_list flow_hash_rnd_timer
;
67 #define FLOW_HASH_RND_PERIOD (10 * 60 * HZ)
69 struct flow_flush_info
{
71 struct completion completion
;
73 static DEFINE_PER_CPU(struct tasklet_struct
, flow_flush_tasklets
) = { NULL
};
75 #define flow_flush_tasklet(cpu) (&per_cpu(flow_flush_tasklets, cpu))
77 static void flow_cache_new_hashrnd(unsigned long arg
)
81 for_each_possible_cpu(i
)
82 flow_hash_rnd_recalc(i
) = 1;
84 flow_hash_rnd_timer
.expires
= jiffies
+ FLOW_HASH_RND_PERIOD
;
85 add_timer(&flow_hash_rnd_timer
);
88 static void flow_entry_kill(int cpu
, struct flow_cache_entry
*fle
)
91 atomic_dec(fle
->object_ref
);
92 kmem_cache_free(flow_cachep
, fle
);
96 static void __flow_cache_shrink(int cpu
, int shrink_to
)
98 struct flow_cache_entry
*fle
, **flp
;
101 for (i
= 0; i
< flow_hash_size
; i
++) {
104 flp
= &flow_table(cpu
)[i
];
105 while ((fle
= *flp
) != NULL
&& k
< shrink_to
) {
109 while ((fle
= *flp
) != NULL
) {
111 flow_entry_kill(cpu
, fle
);
116 static void flow_cache_shrink(int cpu
)
118 int shrink_to
= flow_lwm
/ flow_hash_size
;
120 __flow_cache_shrink(cpu
, shrink_to
);
123 static void flow_new_hash_rnd(int cpu
)
125 get_random_bytes(&flow_hash_rnd(cpu
), sizeof(u32
));
126 flow_hash_rnd_recalc(cpu
) = 0;
128 __flow_cache_shrink(cpu
, 0);
131 static u32
flow_hash_code(struct flowi
*key
, int cpu
)
133 u32
*k
= (u32
*) key
;
135 return (jhash2(k
, (sizeof(*key
) / sizeof(u32
)), flow_hash_rnd(cpu
)) &
136 (flow_hash_size
- 1));
139 #if (BITS_PER_LONG == 64)
140 typedef u64 flow_compare_t
;
142 typedef u32 flow_compare_t
;
145 extern void flowi_is_missized(void);
147 /* I hear what you're saying, use memcmp. But memcmp cannot make
148 * important assumptions that we can here, such as alignment and
151 static int flow_key_compare(struct flowi
*key1
, struct flowi
*key2
)
153 flow_compare_t
*k1
, *k1_lim
, *k2
;
154 const int n_elem
= sizeof(struct flowi
) / sizeof(flow_compare_t
);
156 if (sizeof(struct flowi
) % sizeof(flow_compare_t
))
159 k1
= (flow_compare_t
*) key1
;
160 k1_lim
= k1
+ n_elem
;
162 k2
= (flow_compare_t
*) key2
;
167 } while (k1
< k1_lim
);
172 void *flow_cache_lookup(struct flowi
*key
, u16 family
, u8 dir
,
173 flow_resolve_t resolver
)
175 struct flow_cache_entry
*fle
, **head
;
180 cpu
= smp_processor_id();
183 /* Packet really early in init? Making flow_cache_init a
184 * pre-smp initcall would solve this. --RR */
185 if (!flow_table(cpu
))
188 if (flow_hash_rnd_recalc(cpu
))
189 flow_new_hash_rnd(cpu
);
190 hash
= flow_hash_code(key
, cpu
);
192 head
= &flow_table(cpu
)[hash
];
193 for (fle
= *head
; fle
; fle
= fle
->next
) {
194 if (fle
->family
== family
&&
196 flow_key_compare(key
, &fle
->key
) == 0) {
197 if (fle
->genid
== atomic_read(&flow_cache_genid
)) {
198 void *ret
= fle
->object
;
201 atomic_inc(fle
->object_ref
);
211 if (flow_count(cpu
) > flow_hwm
)
212 flow_cache_shrink(cpu
);
214 fle
= kmem_cache_alloc(flow_cachep
, SLAB_ATOMIC
);
218 fle
->family
= family
;
220 memcpy(&fle
->key
, key
, sizeof(*key
));
232 err
= resolver(key
, family
, dir
, &obj
, &obj_ref
);
236 /* Force security policy check on next lookup */
238 flow_entry_kill(cpu
, fle
);
240 fle
->genid
= atomic_read(&flow_cache_genid
);
243 atomic_dec(fle
->object_ref
);
246 fle
->object_ref
= obj_ref
;
248 atomic_inc(fle
->object_ref
);
259 static void flow_cache_flush_tasklet(unsigned long data
)
261 struct flow_flush_info
*info
= (void *)data
;
265 cpu
= smp_processor_id();
266 for (i
= 0; i
< flow_hash_size
; i
++) {
267 struct flow_cache_entry
*fle
;
269 fle
= flow_table(cpu
)[i
];
270 for (; fle
; fle
= fle
->next
) {
271 unsigned genid
= atomic_read(&flow_cache_genid
);
273 if (!fle
->object
|| fle
->genid
== genid
)
277 atomic_dec(fle
->object_ref
);
281 if (atomic_dec_and_test(&info
->cpuleft
))
282 complete(&info
->completion
);
285 static void flow_cache_flush_per_cpu(void *) __attribute__((__unused__
));
286 static void flow_cache_flush_per_cpu(void *data
)
288 struct flow_flush_info
*info
= data
;
290 struct tasklet_struct
*tasklet
;
292 cpu
= smp_processor_id();
294 tasklet
= flow_flush_tasklet(cpu
);
295 tasklet
->data
= (unsigned long)info
;
296 tasklet_schedule(tasklet
);
299 void flow_cache_flush(void)
301 struct flow_flush_info info
;
302 static DEFINE_MUTEX(flow_flush_sem
);
304 /* Don't want cpus going down or up during this. */
306 mutex_lock(&flow_flush_sem
);
307 atomic_set(&info
.cpuleft
, num_online_cpus());
308 init_completion(&info
.completion
);
311 smp_call_function(flow_cache_flush_per_cpu
, &info
, 1, 0);
312 flow_cache_flush_tasklet((unsigned long)&info
);
315 wait_for_completion(&info
.completion
);
316 mutex_unlock(&flow_flush_sem
);
317 unlock_cpu_hotplug();
320 static void __devinit
flow_cache_cpu_prepare(int cpu
)
322 struct tasklet_struct
*tasklet
;
326 (PAGE_SIZE
<< order
) <
327 (sizeof(struct flow_cache_entry
*)*flow_hash_size
);
331 flow_table(cpu
) = (struct flow_cache_entry
**)
332 __get_free_pages(GFP_KERNEL
|__GFP_ZERO
, order
);
333 if (!flow_table(cpu
))
334 panic("NET: failed to allocate flow cache order %lu\n", order
);
336 flow_hash_rnd_recalc(cpu
) = 1;
339 tasklet
= flow_flush_tasklet(cpu
);
340 tasklet_init(tasklet
, flow_cache_flush_tasklet
, 0);
343 #ifdef CONFIG_HOTPLUG_CPU
344 static int flow_cache_cpu(struct notifier_block
*nfb
,
345 unsigned long action
,
348 if (action
== CPU_DEAD
)
349 __flow_cache_shrink((unsigned long)hcpu
, 0);
352 #endif /* CONFIG_HOTPLUG_CPU */
354 static int __init
flow_cache_init(void)
358 flow_cachep
= kmem_cache_create("flow_cache",
359 sizeof(struct flow_cache_entry
),
360 0, SLAB_HWCACHE_ALIGN
|SLAB_PANIC
,
362 flow_hash_shift
= 10;
363 flow_lwm
= 2 * flow_hash_size
;
364 flow_hwm
= 4 * flow_hash_size
;
366 init_timer(&flow_hash_rnd_timer
);
367 flow_hash_rnd_timer
.function
= flow_cache_new_hashrnd
;
368 flow_hash_rnd_timer
.expires
= jiffies
+ FLOW_HASH_RND_PERIOD
;
369 add_timer(&flow_hash_rnd_timer
);
371 for_each_possible_cpu(i
)
372 flow_cache_cpu_prepare(i
);
374 hotcpu_notifier(flow_cache_cpu
, 0);
378 module_init(flow_cache_init
);
380 EXPORT_SYMBOL(flow_cache_genid
);
381 EXPORT_SYMBOL(flow_cache_lookup
);