1 /* netfilter.c: look after the filters for various protocols.
2 * Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
4 * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
7 * Rusty Russell (C)2000 -- This code is GPL.
9 * February 2000: Modified by James Morris to have 1 queue per protocol.
10 * 15-Mar-2000: Added NF_REPEAT --RR.
11 * 08-May-2003: Internal logging interface added by Jozsef Kadlecsik.
13 #include <linux/config.h>
14 #include <linux/kernel.h>
15 #include <linux/netfilter.h>
16 #include <net/protocol.h>
17 #include <linux/init.h>
18 #include <linux/skbuff.h>
19 #include <linux/wait.h>
20 #include <linux/module.h>
21 #include <linux/interrupt.h>
23 #include <linux/netdevice.h>
24 #include <linux/inetdevice.h>
25 #include <linux/proc_fs.h>
28 /* In this code, we can be waiting indefinitely for userspace to
29 * service a packet if a hook returns NF_QUEUE. We could keep a count
30 * of skbuffs queued for userspace, and not deregister a hook unless
31 * this is zero, but that sucks. Now, we simply check when the
32 * packets come back: if the hook is gone, the packet is discarded. */
33 #ifdef CONFIG_NETFILTER_DEBUG
34 #define NFDEBUG(format, args...) printk(format , ## args)
36 #define NFDEBUG(format, args...)
39 /* Sockopts only registered and called from user context, so
40 net locking would be overkill. Also, [gs]etsockopt calls may
42 static DECLARE_MUTEX(nf_sockopt_mutex
);
44 struct list_head nf_hooks
[NPROTO
][NF_MAX_HOOKS
];
45 static LIST_HEAD(nf_sockopts
);
46 static DEFINE_SPINLOCK(nf_hook_lock
);
49 * A queue handler may be registered for each protocol. Each is protected by
50 * long term mutex. The handler must provide an an outfn() to accept packets
51 * for queueing and must reinject all packets it receives, no matter what.
53 static struct nf_queue_handler_t
{
54 nf_queue_outfn_t outfn
;
56 } queue_handler
[NPROTO
];
58 static struct nf_queue_rerouter
*queue_rerouter
;
60 static DEFINE_RWLOCK(queue_handler_lock
);
62 int nf_register_hook(struct nf_hook_ops
*reg
)
66 spin_lock_bh(&nf_hook_lock
);
67 list_for_each(i
, &nf_hooks
[reg
->pf
][reg
->hooknum
]) {
68 if (reg
->priority
< ((struct nf_hook_ops
*)i
)->priority
)
71 list_add_rcu(®
->list
, i
->prev
);
72 spin_unlock_bh(&nf_hook_lock
);
78 void nf_unregister_hook(struct nf_hook_ops
*reg
)
80 spin_lock_bh(&nf_hook_lock
);
81 list_del_rcu(®
->list
);
82 spin_unlock_bh(&nf_hook_lock
);
87 /* Do exclusive ranges overlap? */
88 static inline int overlap(int min1
, int max1
, int min2
, int max2
)
90 return max1
> min2
&& min1
< max2
;
93 /* Functions to register sockopt ranges (exclusive). */
94 int nf_register_sockopt(struct nf_sockopt_ops
*reg
)
99 if (down_interruptible(&nf_sockopt_mutex
) != 0)
102 list_for_each(i
, &nf_sockopts
) {
103 struct nf_sockopt_ops
*ops
= (struct nf_sockopt_ops
*)i
;
104 if (ops
->pf
== reg
->pf
105 && (overlap(ops
->set_optmin
, ops
->set_optmax
,
106 reg
->set_optmin
, reg
->set_optmax
)
107 || overlap(ops
->get_optmin
, ops
->get_optmax
,
108 reg
->get_optmin
, reg
->get_optmax
))) {
109 NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
110 ops
->set_optmin
, ops
->set_optmax
,
111 ops
->get_optmin
, ops
->get_optmax
,
112 reg
->set_optmin
, reg
->set_optmax
,
113 reg
->get_optmin
, reg
->get_optmax
);
119 list_add(®
->list
, &nf_sockopts
);
121 up(&nf_sockopt_mutex
);
125 void nf_unregister_sockopt(struct nf_sockopt_ops
*reg
)
127 /* No point being interruptible: we're probably in cleanup_module() */
129 down(&nf_sockopt_mutex
);
131 /* To be woken by nf_sockopt call... */
132 /* FIXME: Stuart Young's name appears gratuitously. */
133 set_current_state(TASK_UNINTERRUPTIBLE
);
134 reg
->cleanup_task
= current
;
135 up(&nf_sockopt_mutex
);
139 list_del(®
->list
);
140 up(&nf_sockopt_mutex
);
143 /* Call get/setsockopt() */
144 static int nf_sockopt(struct sock
*sk
, int pf
, int val
,
145 char __user
*opt
, int *len
, int get
)
148 struct nf_sockopt_ops
*ops
;
151 if (down_interruptible(&nf_sockopt_mutex
) != 0)
154 list_for_each(i
, &nf_sockopts
) {
155 ops
= (struct nf_sockopt_ops
*)i
;
158 if (val
>= ops
->get_optmin
159 && val
< ops
->get_optmax
) {
161 up(&nf_sockopt_mutex
);
162 ret
= ops
->get(sk
, val
, opt
, len
);
166 if (val
>= ops
->set_optmin
167 && val
< ops
->set_optmax
) {
169 up(&nf_sockopt_mutex
);
170 ret
= ops
->set(sk
, val
, opt
, *len
);
176 up(&nf_sockopt_mutex
);
180 down(&nf_sockopt_mutex
);
182 if (ops
->cleanup_task
)
183 wake_up_process(ops
->cleanup_task
);
184 up(&nf_sockopt_mutex
);
188 int nf_setsockopt(struct sock
*sk
, int pf
, int val
, char __user
*opt
,
191 return nf_sockopt(sk
, pf
, val
, opt
, &len
, 0);
194 int nf_getsockopt(struct sock
*sk
, int pf
, int val
, char __user
*opt
, int *len
)
196 return nf_sockopt(sk
, pf
, val
, opt
, len
, 1);
199 static unsigned int nf_iterate(struct list_head
*head
,
200 struct sk_buff
**skb
,
202 const struct net_device
*indev
,
203 const struct net_device
*outdev
,
204 struct list_head
**i
,
205 int (*okfn
)(struct sk_buff
*),
208 unsigned int verdict
;
211 * The caller must not block between calls to this
212 * function because of risk of continuing from deleted element.
214 list_for_each_continue_rcu(*i
, head
) {
215 struct nf_hook_ops
*elem
= (struct nf_hook_ops
*)*i
;
217 if (hook_thresh
> elem
->priority
)
220 /* Optimization: we don't need to hold module
221 reference here, since function can't sleep. --RR */
222 verdict
= elem
->hook(hook
, skb
, indev
, outdev
, okfn
);
223 if (verdict
!= NF_ACCEPT
) {
224 #ifdef CONFIG_NETFILTER_DEBUG
225 if (unlikely((verdict
& NF_VERDICT_MASK
)
227 NFDEBUG("Evil return from %p(%u).\n",
232 if (verdict
!= NF_REPEAT
)
240 int nf_register_queue_handler(int pf
, nf_queue_outfn_t outfn
, void *data
)
247 write_lock_bh(&queue_handler_lock
);
248 if (queue_handler
[pf
].outfn
)
251 queue_handler
[pf
].outfn
= outfn
;
252 queue_handler
[pf
].data
= data
;
255 write_unlock_bh(&queue_handler_lock
);
260 /* The caller must flush their queue before this */
261 int nf_unregister_queue_handler(int pf
)
266 write_lock_bh(&queue_handler_lock
);
267 queue_handler
[pf
].outfn
= NULL
;
268 queue_handler
[pf
].data
= NULL
;
269 write_unlock_bh(&queue_handler_lock
);
274 int nf_register_queue_rerouter(int pf
, struct nf_queue_rerouter
*rer
)
279 write_lock_bh(&queue_handler_lock
);
280 memcpy(&queue_rerouter
[pf
], rer
, sizeof(queue_rerouter
[pf
]));
281 write_unlock_bh(&queue_handler_lock
);
286 int nf_unregister_queue_rerouter(int pf
)
291 write_lock_bh(&queue_handler_lock
);
292 memset(&queue_rerouter
[pf
], 0, sizeof(queue_rerouter
[pf
]));
293 write_unlock_bh(&queue_handler_lock
);
297 void nf_unregister_queue_handlers(nf_queue_outfn_t outfn
)
301 write_lock_bh(&queue_handler_lock
);
302 for (pf
= 0; pf
< NPROTO
; pf
++) {
303 if (queue_handler
[pf
].outfn
== outfn
) {
304 queue_handler
[pf
].outfn
= NULL
;
305 queue_handler
[pf
].data
= NULL
;
308 write_unlock_bh(&queue_handler_lock
);
312 * Any packet that leaves via this function must come back
313 * through nf_reinject().
315 static int nf_queue(struct sk_buff
**skb
,
316 struct list_head
*elem
,
317 int pf
, unsigned int hook
,
318 struct net_device
*indev
,
319 struct net_device
*outdev
,
320 int (*okfn
)(struct sk_buff
*),
321 unsigned int queuenum
)
324 struct nf_info
*info
;
325 #ifdef CONFIG_BRIDGE_NETFILTER
326 struct net_device
*physindev
= NULL
;
327 struct net_device
*physoutdev
= NULL
;
330 /* QUEUE == DROP if noone is waiting, to be safe. */
331 read_lock(&queue_handler_lock
);
332 if (!queue_handler
[pf
].outfn
) {
333 read_unlock(&queue_handler_lock
);
338 info
= kmalloc(sizeof(*info
)+queue_rerouter
[pf
].rer_size
, GFP_ATOMIC
);
341 printk(KERN_ERR
"OOM queueing packet %p\n",
343 read_unlock(&queue_handler_lock
);
348 *info
= (struct nf_info
) {
349 (struct nf_hook_ops
*)elem
, pf
, hook
, indev
, outdev
, okfn
};
351 /* If it's going away, ignore hook. */
352 if (!try_module_get(info
->elem
->owner
)) {
353 read_unlock(&queue_handler_lock
);
358 /* Bump dev refs so they don't vanish while packet is out */
359 if (indev
) dev_hold(indev
);
360 if (outdev
) dev_hold(outdev
);
362 #ifdef CONFIG_BRIDGE_NETFILTER
363 if ((*skb
)->nf_bridge
) {
364 physindev
= (*skb
)->nf_bridge
->physindev
;
365 if (physindev
) dev_hold(physindev
);
366 physoutdev
= (*skb
)->nf_bridge
->physoutdev
;
367 if (physoutdev
) dev_hold(physoutdev
);
370 if (queue_rerouter
[pf
].save
)
371 queue_rerouter
[pf
].save(*skb
, info
);
373 status
= queue_handler
[pf
].outfn(*skb
, info
, queuenum
,
374 queue_handler
[pf
].data
);
376 if (status
>= 0 && queue_rerouter
[pf
].reroute
)
377 status
= queue_rerouter
[pf
].reroute(skb
, info
);
379 read_unlock(&queue_handler_lock
);
382 /* James M doesn't say fuck enough. */
383 if (indev
) dev_put(indev
);
384 if (outdev
) dev_put(outdev
);
385 #ifdef CONFIG_BRIDGE_NETFILTER
386 if (physindev
) dev_put(physindev
);
387 if (physoutdev
) dev_put(physoutdev
);
389 module_put(info
->elem
->owner
);
399 /* Returns 1 if okfn() needs to be executed by the caller,
400 * -EPERM for NF_DROP, 0 otherwise. */
401 int nf_hook_slow(int pf
, unsigned int hook
, struct sk_buff
**pskb
,
402 struct net_device
*indev
,
403 struct net_device
*outdev
,
404 int (*okfn
)(struct sk_buff
*),
407 struct list_head
*elem
;
408 unsigned int verdict
;
411 /* We may already have this, but read-locks nest anyway */
414 elem
= &nf_hooks
[pf
][hook
];
416 verdict
= nf_iterate(&nf_hooks
[pf
][hook
], pskb
, hook
, indev
,
417 outdev
, &elem
, okfn
, hook_thresh
);
418 if (verdict
== NF_ACCEPT
|| verdict
== NF_STOP
) {
421 } else if (verdict
== NF_DROP
) {
424 } else if ((verdict
& NF_VERDICT_MASK
) == NF_QUEUE
) {
425 NFDEBUG("nf_hook: Verdict = QUEUE.\n");
426 if (!nf_queue(pskb
, elem
, pf
, hook
, indev
, outdev
, okfn
,
427 verdict
>> NF_VERDICT_BITS
))
435 void nf_reinject(struct sk_buff
*skb
, struct nf_info
*info
,
436 unsigned int verdict
)
438 struct list_head
*elem
= &info
->elem
->list
;
443 /* Release those devices we held, or Alexey will kill me. */
444 if (info
->indev
) dev_put(info
->indev
);
445 if (info
->outdev
) dev_put(info
->outdev
);
446 #ifdef CONFIG_BRIDGE_NETFILTER
447 if (skb
->nf_bridge
) {
448 if (skb
->nf_bridge
->physindev
)
449 dev_put(skb
->nf_bridge
->physindev
);
450 if (skb
->nf_bridge
->physoutdev
)
451 dev_put(skb
->nf_bridge
->physoutdev
);
455 /* Drop reference to owner of hook which queued us. */
456 module_put(info
->elem
->owner
);
458 list_for_each_rcu(i
, &nf_hooks
[info
->pf
][info
->hook
]) {
463 if (elem
== &nf_hooks
[info
->pf
][info
->hook
]) {
464 /* The module which sent it to userspace is gone. */
465 NFDEBUG("%s: module disappeared, dropping packet.\n",
470 /* Continue traversal iff userspace said ok... */
471 if (verdict
== NF_REPEAT
) {
476 if (verdict
== NF_ACCEPT
) {
478 verdict
= nf_iterate(&nf_hooks
[info
->pf
][info
->hook
],
480 info
->indev
, info
->outdev
, &elem
,
481 info
->okfn
, INT_MIN
);
484 switch (verdict
& NF_VERDICT_MASK
) {
490 if (!nf_queue(&skb
, elem
, info
->pf
, info
->hook
,
491 info
->indev
, info
->outdev
, info
->okfn
,
492 verdict
>> NF_VERDICT_BITS
))
498 if (verdict
== NF_DROP
)
505 int skb_make_writable(struct sk_buff
**pskb
, unsigned int writable_len
)
507 struct sk_buff
*nskb
;
509 if (writable_len
> (*pskb
)->len
)
512 /* Not exclusive use of packet? Must copy. */
513 if (skb_shared(*pskb
) || skb_cloned(*pskb
))
516 return pskb_may_pull(*pskb
, writable_len
);
519 nskb
= skb_copy(*pskb
, GFP_ATOMIC
);
522 BUG_ON(skb_is_nonlinear(nskb
));
524 /* Rest of kernel will get very unhappy if we pass it a
525 suddenly-orphaned skbuff */
527 skb_set_owner_w(nskb
, (*pskb
)->sk
);
532 EXPORT_SYMBOL(skb_make_writable
);
534 /* Internal logging interface, which relies on the real
535 LOG target modules */
537 #define NF_LOG_PREFIXLEN 128
539 static struct nf_logger
*nf_logging
[NPROTO
]; /* = NULL */
540 static DEFINE_SPINLOCK(nf_log_lock
);
542 int nf_log_register(int pf
, struct nf_logger
*logger
)
546 /* Any setup of logging members must be done before
547 * substituting pointer. */
548 spin_lock(&nf_log_lock
);
549 if (!nf_logging
[pf
]) {
550 rcu_assign_pointer(nf_logging
[pf
], logger
);
553 spin_unlock(&nf_log_lock
);
557 void nf_log_unregister_pf(int pf
)
559 spin_lock(&nf_log_lock
);
560 nf_logging
[pf
] = NULL
;
561 spin_unlock(&nf_log_lock
);
563 /* Give time to concurrent readers. */
567 void nf_log_unregister_logger(struct nf_logger
*logger
)
571 spin_lock(&nf_log_lock
);
572 for (i
= 0; i
< NPROTO
; i
++) {
573 if (nf_logging
[i
] == logger
)
574 nf_logging
[i
] = NULL
;
576 spin_unlock(&nf_log_lock
);
581 void nf_log_packet(int pf
,
582 unsigned int hooknum
,
583 const struct sk_buff
*skb
,
584 const struct net_device
*in
,
585 const struct net_device
*out
,
586 struct nf_loginfo
*loginfo
,
587 const char *fmt
, ...)
590 char prefix
[NF_LOG_PREFIXLEN
];
591 struct nf_logger
*logger
;
594 logger
= rcu_dereference(nf_logging
[pf
]);
597 vsnprintf(prefix
, sizeof(prefix
), fmt
, args
);
599 /* We must read logging before nf_logfn[pf] */
600 logger
->logfn(pf
, hooknum
, skb
, in
, out
, loginfo
, prefix
);
601 } else if (net_ratelimit()) {
602 printk(KERN_WARNING
"nf_log_packet: can\'t log since "
603 "no backend logging module loaded in! Please either "
604 "load one, or disable logging explicitly\n");
608 EXPORT_SYMBOL(nf_log_register
);
609 EXPORT_SYMBOL(nf_log_unregister_pf
);
610 EXPORT_SYMBOL(nf_log_unregister_logger
);
611 EXPORT_SYMBOL(nf_log_packet
);
613 #ifdef CONFIG_PROC_FS
614 struct proc_dir_entry
*proc_net_netfilter
;
615 EXPORT_SYMBOL(proc_net_netfilter
);
617 static void *seq_start(struct seq_file
*seq
, loff_t
*pos
)
627 static void *seq_next(struct seq_file
*s
, void *v
, loff_t
*pos
)
637 static void seq_stop(struct seq_file
*s
, void *v
)
642 static int seq_show(struct seq_file
*s
, void *v
)
645 const struct nf_logger
*logger
;
647 logger
= rcu_dereference(nf_logging
[*pos
]);
650 return seq_printf(s
, "%2lld NONE\n", *pos
);
652 return seq_printf(s
, "%2lld %s\n", *pos
, logger
->name
);
655 static struct seq_operations nflog_seq_ops
= {
662 static int nflog_open(struct inode
*inode
, struct file
*file
)
664 return seq_open(file
, &nflog_seq_ops
);
667 static struct file_operations nflog_file_ops
= {
668 .owner
= THIS_MODULE
,
672 .release
= seq_release
,
678 /* This does not belong here, but locally generated errors need it if connection
679 tracking in use: without this, connection may not be in hash table, and hence
680 manufactured ICMP or RST packets will not be associated with it. */
681 void (*ip_ct_attach
)(struct sk_buff
*, struct sk_buff
*);
683 void nf_ct_attach(struct sk_buff
*new, struct sk_buff
*skb
)
685 void (*attach
)(struct sk_buff
*, struct sk_buff
*);
687 if (skb
->nfct
&& (attach
= ip_ct_attach
) != NULL
) {
688 mb(); /* Just to be sure: must be read before executing this */
693 void __init
netfilter_init(void)
696 #ifdef CONFIG_PROC_FS
697 struct proc_dir_entry
*pde
;
700 queue_rerouter
= kmalloc(NPROTO
* sizeof(struct nf_queue_rerouter
),
703 panic("netfilter: cannot allocate queue rerouter array\n");
704 memset(queue_rerouter
, 0, NPROTO
* sizeof(struct nf_queue_rerouter
));
706 for (i
= 0; i
< NPROTO
; i
++) {
707 for (h
= 0; h
< NF_MAX_HOOKS
; h
++)
708 INIT_LIST_HEAD(&nf_hooks
[i
][h
]);
711 #ifdef CONFIG_PROC_FS
712 proc_net_netfilter
= proc_mkdir("netfilter", proc_net
);
713 if (!proc_net_netfilter
)
714 panic("cannot create netfilter proc entry");
715 pde
= create_proc_entry("nf_log", S_IRUGO
, proc_net_netfilter
);
717 panic("cannot create /proc/net/netfilter/nf_log");
718 pde
->proc_fops
= &nflog_file_ops
;
722 EXPORT_SYMBOL(ip_ct_attach
);
723 EXPORT_SYMBOL(nf_ct_attach
);
724 EXPORT_SYMBOL(nf_getsockopt
);
725 EXPORT_SYMBOL(nf_hook_slow
);
726 EXPORT_SYMBOL(nf_hooks
);
727 EXPORT_SYMBOL(nf_register_hook
);
728 EXPORT_SYMBOL(nf_register_queue_handler
);
729 EXPORT_SYMBOL(nf_register_sockopt
);
730 EXPORT_SYMBOL(nf_reinject
);
731 EXPORT_SYMBOL(nf_setsockopt
);
732 EXPORT_SYMBOL(nf_unregister_hook
);
733 EXPORT_SYMBOL(nf_unregister_queue_handler
);
734 EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers
);
735 EXPORT_SYMBOL_GPL(nf_register_queue_rerouter
);
736 EXPORT_SYMBOL_GPL(nf_unregister_queue_rerouter
);
737 EXPORT_SYMBOL(nf_unregister_sockopt
);