2 * Lightweight Autonomic Network Architecture
4 * LANA Berkeley Packet Filter (BPF) module.
6 * Copyright 2011 Daniel Borkmann <dborkma@tik.ee.ethz.ch>,
7 * Swiss federal institute of technology (ETH Zurich)
11 #include <linux/kernel.h>
12 #include <linux/module.h>
13 #include <linux/spinlock.h>
14 #include <linux/notifier.h>
15 #include <linux/rcupdate.h>
16 #include <linux/seqlock.h>
17 #include <linux/spinlock.h>
18 #include <linux/slab.h>
19 #include <linux/percpu.h>
20 #include <linux/prefetch.h>
21 #include <linux/filter.h>
22 #include <linux/proc_fs.h>
23 #include <linux/seq_file.h>
24 #include <linux/uaccess.h>
26 #include "xt_fblock.h"
27 #include "xt_builder.h"
30 #include "xt_engine.h"
31 #include "xt_builder.h"
35 struct sk_filter
*filter
;
39 struct sock_fprog_kern
{
41 struct sock_filter
*filter
;
46 * To use the BPF JIT compiler, you need to export symbols from
47 * /arch/x86/net/ so that they can be used from a module. Then,
48 * recompile your kernel with CONFIG_BPF_JIT=y and change symbols
49 * within this file from fb_bpf_jit_<x> to bpf_jit_<x> and the macro
50 * FB_SK_RUN_FILTER to SK_RUN_FILTER.
53 static inline void fb_bpf_jit_compile(struct sk_filter
*fp
)
57 static inline void fb_bpf_jit_free(struct sk_filter
*fp
)
61 #define FB_SK_RUN_FILTER(FILTER, SKB) sk_run_filter(SKB, FILTER->insns)
63 static int fb_bpf_init_filter(struct fb_bpf_priv __percpu
*fb_priv_cpu
,
64 struct sock_fprog_kern
*fprog
, unsigned int cpu
)
67 struct sk_filter
*sf
, *sfold
;
71 if (fprog
->filter
== NULL
)
74 fsize
= sizeof(struct sock_filter
) * fprog
->len
;
76 sf
= kmalloc_node(fsize
+ sizeof(*sf
), GFP_KERNEL
, cpu_to_node(cpu
));
80 memcpy(sf
->insns
, fprog
->filter
, fsize
);
81 atomic_set(&sf
->refcnt
, 1);
83 sf
->bpf_func
= sk_run_filter
;
85 err
= sk_chk_filter(sf
->insns
, sf
->len
);
91 fb_bpf_jit_compile(sf
);
93 spin_lock_irqsave(&fb_priv_cpu
->flock
, flags
);
94 sfold
= fb_priv_cpu
->filter
;
95 fb_priv_cpu
->filter
= sf
;
96 spin_unlock_irqrestore(&fb_priv_cpu
->flock
, flags
);
99 fb_bpf_jit_free(sfold
);
106 static int fb_bpf_init_filter_cpus(struct fblock
*fb
,
107 struct sock_fprog_kern
*fprog
)
111 struct fb_bpf_priv __percpu
*fb_priv
;
117 fb_priv
= (struct fb_bpf_priv __percpu
*) rcu_dereference_raw(fb
->private_data
);
121 for_each_online_cpu(cpu
) {
122 struct fb_bpf_priv
*fb_priv_cpu
;
123 fb_priv_cpu
= per_cpu_ptr(fb_priv
, cpu
);
124 err
= fb_bpf_init_filter(fb_priv_cpu
, fprog
, cpu
);
126 printk(KERN_ERR
"[%s::%s] fb_bpf_init_filter error: %d\n",
127 fb
->name
, fb
->factory
->type
, err
);
136 static void fb_bpf_cleanup_filter(struct fb_bpf_priv __percpu
*fb_priv_cpu
)
139 struct sk_filter
*sfold
;
141 spin_lock_irqsave(&fb_priv_cpu
->flock
, flags
);
142 sfold
= fb_priv_cpu
->filter
;
143 fb_priv_cpu
->filter
= NULL
;
144 spin_unlock_irqrestore(&fb_priv_cpu
->flock
, flags
);
147 fb_bpf_jit_free(sfold
);
152 static void fb_bpf_cleanup_filter_cpus(struct fblock
*fb
)
155 struct fb_bpf_priv __percpu
*fb_priv
;
161 fb_priv
= (struct fb_bpf_priv __percpu
*) rcu_dereference_raw(fb
->private_data
);
165 for_each_online_cpu(cpu
) {
166 struct fb_bpf_priv
*fb_priv_cpu
;
167 fb_priv_cpu
= per_cpu_ptr(fb_priv
, cpu
);
168 fb_bpf_cleanup_filter(fb_priv_cpu
);
173 static int fb_bpf_netrx(const struct fblock
* const fb
,
174 struct sk_buff
* const skb
,
175 enum path_type
* const dir
)
178 unsigned int pkt_len
;
180 struct fb_bpf_priv __percpu
*fb_priv_cpu
;
182 fb_priv_cpu
= this_cpu_ptr(rcu_dereference_raw(fb
->private_data
));
184 spin_lock_irqsave(&fb_priv_cpu
->flock
, flags
);
185 if (fb_priv_cpu
->filter
) {
186 pkt_len
= FB_SK_RUN_FILTER(fb_priv_cpu
->filter
, skb
);
187 /* No snap, either drop or pass */
188 if (pkt_len
< skb
->len
) {
189 spin_unlock_irqrestore(&fb_priv_cpu
->flock
, flags
);
194 write_next_idp_to_skb(skb
, fb
->idp
, fb_priv_cpu
->port
[*dir
]);
195 if (fb_priv_cpu
->port
[*dir
] == IDP_UNKNOWN
)
197 spin_unlock_irqrestore(&fb_priv_cpu
->flock
, flags
);
205 static int fb_bpf_event(struct notifier_block
*self
, unsigned long cmd
,
211 struct fb_bpf_priv __percpu
*fb_priv
;
214 fb
= rcu_dereference_raw(container_of(self
, struct fblock_notifier
, nb
)->self
);
215 fb_priv
= (struct fb_bpf_priv __percpu
*) rcu_dereference_raw(fb
->private_data
);
219 case FBLOCK_BIND_IDP
: {
221 struct fblock_bind_msg
*msg
= args
;
223 for_each_online_cpu(cpu
) {
224 struct fb_bpf_priv
*fb_priv_cpu
;
225 fb_priv_cpu
= per_cpu_ptr(fb_priv
, cpu
);
226 spin_lock(&fb_priv_cpu
->flock
);
227 if (fb_priv_cpu
->port
[msg
->dir
] == IDP_UNKNOWN
) {
228 fb_priv_cpu
->port
[msg
->dir
] = msg
->idp
;
232 spin_unlock(&fb_priv_cpu
->flock
);
235 spin_unlock(&fb_priv_cpu
->flock
);
239 printk(KERN_INFO
"[%s::%s] port %s bound to IDP%u\n",
240 fb
->name
, fb
->factory
->type
,
241 path_names
[msg
->dir
], msg
->idp
);
243 case FBLOCK_UNBIND_IDP
: {
245 struct fblock_bind_msg
*msg
= args
;
247 for_each_online_cpu(cpu
) {
248 struct fb_bpf_priv
*fb_priv_cpu
;
249 fb_priv_cpu
= per_cpu_ptr(fb_priv
, cpu
);
250 spin_lock(&fb_priv_cpu
->flock
);
251 if (fb_priv_cpu
->port
[msg
->dir
] == msg
->idp
) {
252 fb_priv_cpu
->port
[msg
->dir
] = IDP_UNKNOWN
;
256 spin_unlock(&fb_priv_cpu
->flock
);
259 spin_unlock(&fb_priv_cpu
->flock
);
263 printk(KERN_INFO
"[%s::%s] port %s unbound\n",
264 fb
->name
, fb
->factory
->type
,
265 path_names
[msg
->dir
]);
274 static int fb_bpf_proc_show_filter(struct seq_file
*m
, void *v
)
277 struct fblock
*fb
= (struct fblock
*) m
->private;
278 struct fb_bpf_priv
*fb_priv_cpu
;
279 struct sk_filter
*sf
;
283 fb_priv_cpu
= this_cpu_ptr(rcu_dereference_raw(fb
->private_data
));
286 spin_lock_irqsave(&fb_priv_cpu
->flock
, flags
);
287 sf
= fb_priv_cpu
->filter
;
290 if (sf
->bpf_func
== sk_run_filter
)
291 seq_puts(m
, "bpf jit: 0\n");
293 seq_puts(m
, "bpf jit: 1\n");
294 seq_puts(m
, "code:\n");
295 for (i
= 0; i
< sf
->len
; ++i
) {
297 memset(sline
, 0, sizeof(sline
));
298 snprintf(sline
, sizeof(sline
),
299 "0x%x, %u, %u, 0x%x\n",
304 sline
[sizeof(sline
) - 1] = 0;
308 spin_unlock_irqrestore(&fb_priv_cpu
->flock
, flags
);
314 static int fb_bpf_proc_open(struct inode
*inode
, struct file
*file
)
316 return single_open(file
, fb_bpf_proc_show_filter
, PDE(inode
)->data
);
319 #define MAX_BUFF_SIZ 16384
320 #define MAX_INSTR_SIZ 512
322 static ssize_t
fb_bpf_proc_write(struct file
*file
, const char __user
* ubuff
,
323 size_t count
, loff_t
* offset
)
327 char *code
, *ptr1
, *ptr2
;
328 size_t len
= MAX_BUFF_SIZ
;
329 struct sock_fprog_kern
*fp
;
330 struct fblock
*fb
= PDE(file
->f_path
.dentry
->d_inode
)->data
;
332 if (count
> MAX_BUFF_SIZ
)
334 if (count
< MAX_BUFF_SIZ
)
337 code
= kmalloc(len
, GFP_KERNEL
);
340 fp
= kmalloc(sizeof(*fp
), GFP_KERNEL
);
343 fp
->filter
= kmalloc(MAX_INSTR_SIZ
* sizeof(struct sock_filter
), GFP_KERNEL
);
346 memset(code
, 0, len
);
347 if (copy_from_user(code
, ubuff
, len
)) {
356 while (fp
->len
< MAX_INSTR_SIZ
&& (char *) (code
+ len
) > ptr1
) {
357 while (ptr1
&& *ptr1
== ' ')
359 fp
->filter
[fp
->len
].code
= (__u16
) simple_strtoul(ptr1
, &ptr2
, 16);
360 while (ptr2
&& (*ptr2
== ' ' || *ptr2
== ','))
362 fp
->filter
[fp
->len
].jt
= (__u8
) simple_strtoul(ptr2
, &ptr1
, 10);
363 while (ptr1
&& (*ptr1
== ' ' || *ptr1
== ','))
365 fp
->filter
[fp
->len
].jf
= (__u8
) simple_strtoul(ptr1
, &ptr2
, 10);
366 while (ptr2
&& (*ptr2
== ' ' || *ptr2
== ','))
368 fp
->filter
[fp
->len
].k
= (__u32
) simple_strtoul(ptr2
, &ptr1
, 16);
369 while (ptr1
&& (*ptr1
== ' ' || *ptr1
== ',' || *ptr1
== '\n'))
374 if (fp
->len
== MAX_INSTR_SIZ
) {
375 printk(KERN_ERR
"[%s::%s] Maximun instruction size exeeded!\n",
376 fb
->name
, fb
->factory
->type
);
380 printk(KERN_ERR
"[%s::%s] Parsed code:\n", fb
->name
, fb
->factory
->type
);
381 for (i
= 0; i
< fp
->len
; ++i
) {
382 printk(KERN_INFO
"[%s::%s] %d: c:0x%x jt:%u jf:%u k:0x%x\n",
383 fb
->name
, fb
->factory
->type
, i
,
384 fp
->filter
[i
].code
, fp
->filter
[i
].jt
, fp
->filter
[i
].jf
,
388 fb_bpf_cleanup_filter_cpus(fb
);
389 ret
= fb_bpf_init_filter_cpus(fb
, fp
);
391 printk(KERN_INFO
"[%s::%s] Filter injected!\n",
392 fb
->name
, fb
->factory
->type
);
394 printk(KERN_ERR
"[%s::%s] Filter injection error: %ld!\n",
395 fb
->name
, fb
->factory
->type
, ret
);
396 fb_bpf_cleanup_filter_cpus(fb
);
410 return !ret
? -ENOMEM
: ret
;
414 static const struct file_operations fb_bpf_proc_fops
= {
415 .owner
= THIS_MODULE
,
416 .open
= fb_bpf_proc_open
,
419 .write
= fb_bpf_proc_write
,
420 .release
= single_release
,
423 static struct fblock
*fb_bpf_ctor(char *name
)
428 struct fb_bpf_priv __percpu
*fb_priv
;
429 struct proc_dir_entry
*fb_proc
;
431 fb
= alloc_fblock(GFP_ATOMIC
);
435 fb_priv
= alloc_percpu(struct fb_bpf_priv
);
440 for_each_online_cpu(cpu
) {
441 struct fb_bpf_priv
*fb_priv_cpu
;
442 fb_priv_cpu
= per_cpu_ptr(fb_priv
, cpu
);
443 spin_lock_init(&fb_priv_cpu
->flock
);
444 fb_priv_cpu
->port
[0] = IDP_UNKNOWN
;
445 fb_priv_cpu
->port
[1] = IDP_UNKNOWN
;
446 fb_priv_cpu
->filter
= NULL
;
450 ret
= init_fblock(fb
, name
, fb_priv
);
454 fb
->netfb_rx
= fb_bpf_netrx
;
455 fb
->event_rx
= fb_bpf_event
;
457 fb_proc
= proc_create_data(fb
->name
, 0444, fblock_proc_dir
,
458 &fb_bpf_proc_fops
, (void *)(long) fb
);
462 ret
= register_fblock_namespace(fb
);
466 __module_get(THIS_MODULE
);
470 remove_proc_entry(fb
->name
, fblock_proc_dir
);
472 cleanup_fblock_ctor(fb
);
474 free_percpu(fb_priv
);
480 static void fb_bpf_dtor(struct fblock
*fb
)
482 fb_bpf_cleanup_filter_cpus(fb
);
483 free_percpu(rcu_dereference_raw(fb
->private_data
));
484 remove_proc_entry(fb
->name
, fblock_proc_dir
);
485 module_put(THIS_MODULE
);
488 static struct fblock_factory fb_bpf_factory
= {
493 .owner
= THIS_MODULE
,
496 static int __init
init_fb_bpf_module(void)
498 return register_fblock_type(&fb_bpf_factory
);
501 static void __exit
cleanup_fb_bpf_module(void)
503 unregister_fblock_type(&fb_bpf_factory
);
506 module_init(init_fb_bpf_module
);
507 module_exit(cleanup_fb_bpf_module
);
509 MODULE_LICENSE("GPL");
510 MODULE_AUTHOR("Daniel Borkmann <dborkma@tik.ee.ethz.ch>");
511 MODULE_DESCRIPTION("LANA Berkeley Packet Filter module");