[NETFILTER]: Extend netfilter logging API
[linux-2.6/btrfs-unstable.git] / net / core / netfilter.c
blob98cc61e79fea6c92f82d124c00456fca3f5373e3
1 /* netfilter.c: look after the filters for various protocols.
2 * Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
4 * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
5 * way.
7 * Rusty Russell (C)2000 -- This code is GPL.
9 * February 2000: Modified by James Morris to have 1 queue per protocol.
10 * 15-Mar-2000: Added NF_REPEAT --RR.
11 * 08-May-2003: Internal logging interface added by Jozsef Kadlecsik.
13 #include <linux/config.h>
14 #include <linux/kernel.h>
15 #include <linux/netfilter.h>
16 #include <net/protocol.h>
17 #include <linux/init.h>
18 #include <linux/skbuff.h>
19 #include <linux/wait.h>
20 #include <linux/module.h>
21 #include <linux/interrupt.h>
22 #include <linux/if.h>
23 #include <linux/netdevice.h>
24 #include <linux/inetdevice.h>
25 #include <linux/proc_fs.h>
26 #include <net/sock.h>
28 /* In this code, we can be waiting indefinitely for userspace to
29 * service a packet if a hook returns NF_QUEUE. We could keep a count
30 * of skbuffs queued for userspace, and not deregister a hook unless
31 * this is zero, but that sucks. Now, we simply check when the
32 * packets come back: if the hook is gone, the packet is discarded. */
33 #ifdef CONFIG_NETFILTER_DEBUG
34 #define NFDEBUG(format, args...) printk(format , ## args)
35 #else
36 #define NFDEBUG(format, args...)
37 #endif
39 /* Sockopts only registered and called from user context, so
40 net locking would be overkill. Also, [gs]etsockopt calls may
41 sleep. */
42 static DECLARE_MUTEX(nf_sockopt_mutex);
44 struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
45 static LIST_HEAD(nf_sockopts);
46 static DEFINE_SPINLOCK(nf_hook_lock);
48 /*
49 * A queue handler may be registered for each protocol. Each is protected by
50 * long term mutex. The handler must provide an an outfn() to accept packets
51 * for queueing and must reinject all packets it receives, no matter what.
53 static struct nf_queue_handler_t {
54 nf_queue_outfn_t outfn;
55 void *data;
56 } queue_handler[NPROTO];
58 static struct nf_queue_rerouter *queue_rerouter;
60 static DEFINE_RWLOCK(queue_handler_lock);
62 int nf_register_hook(struct nf_hook_ops *reg)
64 struct list_head *i;
66 spin_lock_bh(&nf_hook_lock);
67 list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) {
68 if (reg->priority < ((struct nf_hook_ops *)i)->priority)
69 break;
71 list_add_rcu(&reg->list, i->prev);
72 spin_unlock_bh(&nf_hook_lock);
74 synchronize_net();
75 return 0;
78 void nf_unregister_hook(struct nf_hook_ops *reg)
80 spin_lock_bh(&nf_hook_lock);
81 list_del_rcu(&reg->list);
82 spin_unlock_bh(&nf_hook_lock);
84 synchronize_net();
87 /* Do exclusive ranges overlap? */
88 static inline int overlap(int min1, int max1, int min2, int max2)
90 return max1 > min2 && min1 < max2;
93 /* Functions to register sockopt ranges (exclusive). */
94 int nf_register_sockopt(struct nf_sockopt_ops *reg)
96 struct list_head *i;
97 int ret = 0;
99 if (down_interruptible(&nf_sockopt_mutex) != 0)
100 return -EINTR;
102 list_for_each(i, &nf_sockopts) {
103 struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i;
104 if (ops->pf == reg->pf
105 && (overlap(ops->set_optmin, ops->set_optmax,
106 reg->set_optmin, reg->set_optmax)
107 || overlap(ops->get_optmin, ops->get_optmax,
108 reg->get_optmin, reg->get_optmax))) {
109 NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
110 ops->set_optmin, ops->set_optmax,
111 ops->get_optmin, ops->get_optmax,
112 reg->set_optmin, reg->set_optmax,
113 reg->get_optmin, reg->get_optmax);
114 ret = -EBUSY;
115 goto out;
119 list_add(&reg->list, &nf_sockopts);
120 out:
121 up(&nf_sockopt_mutex);
122 return ret;
125 void nf_unregister_sockopt(struct nf_sockopt_ops *reg)
127 /* No point being interruptible: we're probably in cleanup_module() */
128 restart:
129 down(&nf_sockopt_mutex);
130 if (reg->use != 0) {
131 /* To be woken by nf_sockopt call... */
132 /* FIXME: Stuart Young's name appears gratuitously. */
133 set_current_state(TASK_UNINTERRUPTIBLE);
134 reg->cleanup_task = current;
135 up(&nf_sockopt_mutex);
136 schedule();
137 goto restart;
139 list_del(&reg->list);
140 up(&nf_sockopt_mutex);
143 /* Call get/setsockopt() */
144 static int nf_sockopt(struct sock *sk, int pf, int val,
145 char __user *opt, int *len, int get)
147 struct list_head *i;
148 struct nf_sockopt_ops *ops;
149 int ret;
151 if (down_interruptible(&nf_sockopt_mutex) != 0)
152 return -EINTR;
154 list_for_each(i, &nf_sockopts) {
155 ops = (struct nf_sockopt_ops *)i;
156 if (ops->pf == pf) {
157 if (get) {
158 if (val >= ops->get_optmin
159 && val < ops->get_optmax) {
160 ops->use++;
161 up(&nf_sockopt_mutex);
162 ret = ops->get(sk, val, opt, len);
163 goto out;
165 } else {
166 if (val >= ops->set_optmin
167 && val < ops->set_optmax) {
168 ops->use++;
169 up(&nf_sockopt_mutex);
170 ret = ops->set(sk, val, opt, *len);
171 goto out;
176 up(&nf_sockopt_mutex);
177 return -ENOPROTOOPT;
179 out:
180 down(&nf_sockopt_mutex);
181 ops->use--;
182 if (ops->cleanup_task)
183 wake_up_process(ops->cleanup_task);
184 up(&nf_sockopt_mutex);
185 return ret;
188 int nf_setsockopt(struct sock *sk, int pf, int val, char __user *opt,
189 int len)
191 return nf_sockopt(sk, pf, val, opt, &len, 0);
194 int nf_getsockopt(struct sock *sk, int pf, int val, char __user *opt, int *len)
196 return nf_sockopt(sk, pf, val, opt, len, 1);
199 static unsigned int nf_iterate(struct list_head *head,
200 struct sk_buff **skb,
201 int hook,
202 const struct net_device *indev,
203 const struct net_device *outdev,
204 struct list_head **i,
205 int (*okfn)(struct sk_buff *),
206 int hook_thresh)
208 unsigned int verdict;
211 * The caller must not block between calls to this
212 * function because of risk of continuing from deleted element.
214 list_for_each_continue_rcu(*i, head) {
215 struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
217 if (hook_thresh > elem->priority)
218 continue;
220 /* Optimization: we don't need to hold module
221 reference here, since function can't sleep. --RR */
222 verdict = elem->hook(hook, skb, indev, outdev, okfn);
223 if (verdict != NF_ACCEPT) {
224 #ifdef CONFIG_NETFILTER_DEBUG
225 if (unlikely((verdict & NF_VERDICT_MASK)
226 > NF_MAX_VERDICT)) {
227 NFDEBUG("Evil return from %p(%u).\n",
228 elem->hook, hook);
229 continue;
231 #endif
232 if (verdict != NF_REPEAT)
233 return verdict;
234 *i = (*i)->prev;
237 return NF_ACCEPT;
240 int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data)
242 int ret;
244 if (pf >= NPROTO)
245 return -EINVAL;
247 write_lock_bh(&queue_handler_lock);
248 if (queue_handler[pf].outfn)
249 ret = -EBUSY;
250 else {
251 queue_handler[pf].outfn = outfn;
252 queue_handler[pf].data = data;
253 ret = 0;
255 write_unlock_bh(&queue_handler_lock);
257 return ret;
260 /* The caller must flush their queue before this */
261 int nf_unregister_queue_handler(int pf)
263 if (pf >= NPROTO)
264 return -EINVAL;
266 write_lock_bh(&queue_handler_lock);
267 queue_handler[pf].outfn = NULL;
268 queue_handler[pf].data = NULL;
269 write_unlock_bh(&queue_handler_lock);
271 return 0;
274 int nf_register_queue_rerouter(int pf, struct nf_queue_rerouter *rer)
276 if (pf >= NPROTO)
277 return -EINVAL;
279 write_lock_bh(&queue_handler_lock);
280 memcpy(&queue_rerouter[pf], rer, sizeof(queue_rerouter[pf]));
281 write_unlock_bh(&queue_handler_lock);
283 return 0;
286 int nf_unregister_queue_rerouter(int pf)
288 if (pf >= NPROTO)
289 return -EINVAL;
291 write_lock_bh(&queue_handler_lock);
292 memset(&queue_rerouter[pf], 0, sizeof(queue_rerouter[pf]));
293 write_unlock_bh(&queue_handler_lock);
294 return 0;
297 void nf_unregister_queue_handlers(nf_queue_outfn_t outfn)
299 int pf;
301 write_lock_bh(&queue_handler_lock);
302 for (pf = 0; pf < NPROTO; pf++) {
303 if (queue_handler[pf].outfn == outfn) {
304 queue_handler[pf].outfn = NULL;
305 queue_handler[pf].data = NULL;
308 write_unlock_bh(&queue_handler_lock);
312 * Any packet that leaves via this function must come back
313 * through nf_reinject().
315 static int nf_queue(struct sk_buff **skb,
316 struct list_head *elem,
317 int pf, unsigned int hook,
318 struct net_device *indev,
319 struct net_device *outdev,
320 int (*okfn)(struct sk_buff *),
321 unsigned int queuenum)
323 int status;
324 struct nf_info *info;
325 #ifdef CONFIG_BRIDGE_NETFILTER
326 struct net_device *physindev = NULL;
327 struct net_device *physoutdev = NULL;
328 #endif
330 /* QUEUE == DROP if noone is waiting, to be safe. */
331 read_lock(&queue_handler_lock);
332 if (!queue_handler[pf].outfn) {
333 read_unlock(&queue_handler_lock);
334 kfree_skb(*skb);
335 return 1;
338 info = kmalloc(sizeof(*info)+queue_rerouter[pf].rer_size, GFP_ATOMIC);
339 if (!info) {
340 if (net_ratelimit())
341 printk(KERN_ERR "OOM queueing packet %p\n",
342 *skb);
343 read_unlock(&queue_handler_lock);
344 kfree_skb(*skb);
345 return 1;
348 *info = (struct nf_info) {
349 (struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn };
351 /* If it's going away, ignore hook. */
352 if (!try_module_get(info->elem->owner)) {
353 read_unlock(&queue_handler_lock);
354 kfree(info);
355 return 0;
358 /* Bump dev refs so they don't vanish while packet is out */
359 if (indev) dev_hold(indev);
360 if (outdev) dev_hold(outdev);
362 #ifdef CONFIG_BRIDGE_NETFILTER
363 if ((*skb)->nf_bridge) {
364 physindev = (*skb)->nf_bridge->physindev;
365 if (physindev) dev_hold(physindev);
366 physoutdev = (*skb)->nf_bridge->physoutdev;
367 if (physoutdev) dev_hold(physoutdev);
369 #endif
370 if (queue_rerouter[pf].save)
371 queue_rerouter[pf].save(*skb, info);
373 status = queue_handler[pf].outfn(*skb, info, queuenum,
374 queue_handler[pf].data);
376 if (status >= 0 && queue_rerouter[pf].reroute)
377 status = queue_rerouter[pf].reroute(skb, info);
379 read_unlock(&queue_handler_lock);
381 if (status < 0) {
382 /* James M doesn't say fuck enough. */
383 if (indev) dev_put(indev);
384 if (outdev) dev_put(outdev);
385 #ifdef CONFIG_BRIDGE_NETFILTER
386 if (physindev) dev_put(physindev);
387 if (physoutdev) dev_put(physoutdev);
388 #endif
389 module_put(info->elem->owner);
390 kfree(info);
391 kfree_skb(*skb);
393 return 1;
396 return 1;
399 /* Returns 1 if okfn() needs to be executed by the caller,
400 * -EPERM for NF_DROP, 0 otherwise. */
401 int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb,
402 struct net_device *indev,
403 struct net_device *outdev,
404 int (*okfn)(struct sk_buff *),
405 int hook_thresh)
407 struct list_head *elem;
408 unsigned int verdict;
409 int ret = 0;
411 /* We may already have this, but read-locks nest anyway */
412 rcu_read_lock();
414 elem = &nf_hooks[pf][hook];
415 next_hook:
416 verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev,
417 outdev, &elem, okfn, hook_thresh);
418 if (verdict == NF_ACCEPT || verdict == NF_STOP) {
419 ret = 1;
420 goto unlock;
421 } else if (verdict == NF_DROP) {
422 kfree_skb(*pskb);
423 ret = -EPERM;
424 } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
425 NFDEBUG("nf_hook: Verdict = QUEUE.\n");
426 if (!nf_queue(pskb, elem, pf, hook, indev, outdev, okfn,
427 verdict >> NF_VERDICT_BITS))
428 goto next_hook;
430 unlock:
431 rcu_read_unlock();
432 return ret;
435 void nf_reinject(struct sk_buff *skb, struct nf_info *info,
436 unsigned int verdict)
438 struct list_head *elem = &info->elem->list;
439 struct list_head *i;
441 rcu_read_lock();
443 /* Release those devices we held, or Alexey will kill me. */
444 if (info->indev) dev_put(info->indev);
445 if (info->outdev) dev_put(info->outdev);
446 #ifdef CONFIG_BRIDGE_NETFILTER
447 if (skb->nf_bridge) {
448 if (skb->nf_bridge->physindev)
449 dev_put(skb->nf_bridge->physindev);
450 if (skb->nf_bridge->physoutdev)
451 dev_put(skb->nf_bridge->physoutdev);
453 #endif
455 /* Drop reference to owner of hook which queued us. */
456 module_put(info->elem->owner);
458 list_for_each_rcu(i, &nf_hooks[info->pf][info->hook]) {
459 if (i == elem)
460 break;
463 if (elem == &nf_hooks[info->pf][info->hook]) {
464 /* The module which sent it to userspace is gone. */
465 NFDEBUG("%s: module disappeared, dropping packet.\n",
466 __FUNCTION__);
467 verdict = NF_DROP;
470 /* Continue traversal iff userspace said ok... */
471 if (verdict == NF_REPEAT) {
472 elem = elem->prev;
473 verdict = NF_ACCEPT;
476 if (verdict == NF_ACCEPT) {
477 next_hook:
478 verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
479 &skb, info->hook,
480 info->indev, info->outdev, &elem,
481 info->okfn, INT_MIN);
484 switch (verdict & NF_VERDICT_MASK) {
485 case NF_ACCEPT:
486 info->okfn(skb);
487 break;
489 case NF_QUEUE:
490 if (!nf_queue(&skb, elem, info->pf, info->hook,
491 info->indev, info->outdev, info->okfn,
492 verdict >> NF_VERDICT_BITS))
493 goto next_hook;
494 break;
496 rcu_read_unlock();
498 if (verdict == NF_DROP)
499 kfree_skb(skb);
501 kfree(info);
502 return;
505 int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len)
507 struct sk_buff *nskb;
509 if (writable_len > (*pskb)->len)
510 return 0;
512 /* Not exclusive use of packet? Must copy. */
513 if (skb_shared(*pskb) || skb_cloned(*pskb))
514 goto copy_skb;
516 return pskb_may_pull(*pskb, writable_len);
518 copy_skb:
519 nskb = skb_copy(*pskb, GFP_ATOMIC);
520 if (!nskb)
521 return 0;
522 BUG_ON(skb_is_nonlinear(nskb));
524 /* Rest of kernel will get very unhappy if we pass it a
525 suddenly-orphaned skbuff */
526 if ((*pskb)->sk)
527 skb_set_owner_w(nskb, (*pskb)->sk);
528 kfree_skb(*pskb);
529 *pskb = nskb;
530 return 1;
532 EXPORT_SYMBOL(skb_make_writable);
534 /* Internal logging interface, which relies on the real
535 LOG target modules */
537 #define NF_LOG_PREFIXLEN 128
539 static struct nf_logger *nf_logging[NPROTO]; /* = NULL */
540 static DEFINE_SPINLOCK(nf_log_lock);
542 int nf_log_register(int pf, struct nf_logger *logger)
544 int ret = -EBUSY;
546 /* Any setup of logging members must be done before
547 * substituting pointer. */
548 spin_lock(&nf_log_lock);
549 if (!nf_logging[pf]) {
550 rcu_assign_pointer(nf_logging[pf], logger);
551 ret = 0;
553 spin_unlock(&nf_log_lock);
554 return ret;
557 void nf_log_unregister_pf(int pf)
559 spin_lock(&nf_log_lock);
560 nf_logging[pf] = NULL;
561 spin_unlock(&nf_log_lock);
563 /* Give time to concurrent readers. */
564 synchronize_net();
567 void nf_log_unregister_logger(struct nf_logger *logger)
569 int i;
571 spin_lock(&nf_log_lock);
572 for (i = 0; i < NPROTO; i++) {
573 if (nf_logging[i] == logger)
574 nf_logging[i] = NULL;
576 spin_unlock(&nf_log_lock);
578 synchronize_net();
581 void nf_log_packet(int pf,
582 unsigned int hooknum,
583 const struct sk_buff *skb,
584 const struct net_device *in,
585 const struct net_device *out,
586 struct nf_loginfo *loginfo,
587 const char *fmt, ...)
589 va_list args;
590 char prefix[NF_LOG_PREFIXLEN];
591 struct nf_logger *logger;
593 rcu_read_lock();
594 logger = rcu_dereference(nf_logging[pf]);
595 if (logger) {
596 va_start(args, fmt);
597 vsnprintf(prefix, sizeof(prefix), fmt, args);
598 va_end(args);
599 /* We must read logging before nf_logfn[pf] */
600 logger->logfn(pf, hooknum, skb, in, out, loginfo, prefix);
601 } else if (net_ratelimit()) {
602 printk(KERN_WARNING "nf_log_packet: can\'t log since "
603 "no backend logging module loaded in! Please either "
604 "load one, or disable logging explicitly\n");
606 rcu_read_unlock();
608 EXPORT_SYMBOL(nf_log_register);
609 EXPORT_SYMBOL(nf_log_unregister_pf);
610 EXPORT_SYMBOL(nf_log_unregister_logger);
611 EXPORT_SYMBOL(nf_log_packet);
613 #ifdef CONFIG_PROC_FS
614 struct proc_dir_entry *proc_net_netfilter;
615 EXPORT_SYMBOL(proc_net_netfilter);
617 static void *seq_start(struct seq_file *seq, loff_t *pos)
619 rcu_read_lock();
621 if (*pos >= NPROTO)
622 return NULL;
624 return pos;
627 static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
629 (*pos)++;
631 if (*pos >= NPROTO)
632 return NULL;
634 return pos;
637 static void seq_stop(struct seq_file *s, void *v)
639 rcu_read_unlock();
642 static int seq_show(struct seq_file *s, void *v)
644 loff_t *pos = v;
645 const struct nf_logger *logger;
647 logger = rcu_dereference(nf_logging[*pos]);
649 if (!logger)
650 return seq_printf(s, "%2lld NONE\n", *pos);
652 return seq_printf(s, "%2lld %s\n", *pos, logger->name);
655 static struct seq_operations nflog_seq_ops = {
656 .start = seq_start,
657 .next = seq_next,
658 .stop = seq_stop,
659 .show = seq_show,
662 static int nflog_open(struct inode *inode, struct file *file)
664 return seq_open(file, &nflog_seq_ops);
667 static struct file_operations nflog_file_ops = {
668 .owner = THIS_MODULE,
669 .open = nflog_open,
670 .read = seq_read,
671 .llseek = seq_lseek,
672 .release = seq_release,
675 #endif /* PROC_FS */
678 /* This does not belong here, but locally generated errors need it if connection
679 tracking in use: without this, connection may not be in hash table, and hence
680 manufactured ICMP or RST packets will not be associated with it. */
681 void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *);
683 void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
685 void (*attach)(struct sk_buff *, struct sk_buff *);
687 if (skb->nfct && (attach = ip_ct_attach) != NULL) {
688 mb(); /* Just to be sure: must be read before executing this */
689 attach(new, skb);
693 void __init netfilter_init(void)
695 int i, h;
696 #ifdef CONFIG_PROC_FS
697 struct proc_dir_entry *pde;
698 #endif
700 queue_rerouter = kmalloc(NPROTO * sizeof(struct nf_queue_rerouter),
701 GFP_KERNEL);
702 if (!queue_rerouter)
703 panic("netfilter: cannot allocate queue rerouter array\n");
704 memset(queue_rerouter, 0, NPROTO * sizeof(struct nf_queue_rerouter));
706 for (i = 0; i < NPROTO; i++) {
707 for (h = 0; h < NF_MAX_HOOKS; h++)
708 INIT_LIST_HEAD(&nf_hooks[i][h]);
711 #ifdef CONFIG_PROC_FS
712 proc_net_netfilter = proc_mkdir("netfilter", proc_net);
713 if (!proc_net_netfilter)
714 panic("cannot create netfilter proc entry");
715 pde = create_proc_entry("nf_log", S_IRUGO, proc_net_netfilter);
716 if (!pde)
717 panic("cannot create /proc/net/netfilter/nf_log");
718 pde->proc_fops = &nflog_file_ops;
719 #endif
722 EXPORT_SYMBOL(ip_ct_attach);
723 EXPORT_SYMBOL(nf_ct_attach);
724 EXPORT_SYMBOL(nf_getsockopt);
725 EXPORT_SYMBOL(nf_hook_slow);
726 EXPORT_SYMBOL(nf_hooks);
727 EXPORT_SYMBOL(nf_register_hook);
728 EXPORT_SYMBOL(nf_register_queue_handler);
729 EXPORT_SYMBOL(nf_register_sockopt);
730 EXPORT_SYMBOL(nf_reinject);
731 EXPORT_SYMBOL(nf_setsockopt);
732 EXPORT_SYMBOL(nf_unregister_hook);
733 EXPORT_SYMBOL(nf_unregister_queue_handler);
734 EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers);
735 EXPORT_SYMBOL_GPL(nf_register_queue_rerouter);
736 EXPORT_SYMBOL_GPL(nf_unregister_queue_rerouter);
737 EXPORT_SYMBOL(nf_unregister_sockopt);