[SPARC64]: Fix UltraSPARC-III fallout from membar changes.
[linux-2.6/sactl.git] / net / core / netfilter.c
blob076c156d5eda98345230d52b7d0f75a34cb0141b
1 /* netfilter.c: look after the filters for various protocols.
2 * Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
4 * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
5 * way.
7 * Rusty Russell (C)2000 -- This code is GPL.
9 * February 2000: Modified by James Morris to have 1 queue per protocol.
10 * 15-Mar-2000: Added NF_REPEAT --RR.
11 * 08-May-2003: Internal logging interface added by Jozsef Kadlecsik.
13 #include <linux/config.h>
14 #include <linux/kernel.h>
15 #include <linux/netfilter.h>
16 #include <net/protocol.h>
17 #include <linux/init.h>
18 #include <linux/skbuff.h>
19 #include <linux/wait.h>
20 #include <linux/module.h>
21 #include <linux/interrupt.h>
22 #include <linux/if.h>
23 #include <linux/netdevice.h>
24 #include <linux/inetdevice.h>
25 #include <linux/tcp.h>
26 #include <linux/udp.h>
27 #include <linux/icmp.h>
28 #include <net/sock.h>
29 #include <net/route.h>
30 #include <linux/ip.h>
32 /* In this code, we can be waiting indefinitely for userspace to
33 * service a packet if a hook returns NF_QUEUE. We could keep a count
34 * of skbuffs queued for userspace, and not deregister a hook unless
35 * this is zero, but that sucks. Now, we simply check when the
36 * packets come back: if the hook is gone, the packet is discarded. */
37 #ifdef CONFIG_NETFILTER_DEBUG
38 #define NFDEBUG(format, args...) printk(format , ## args)
39 #else
40 #define NFDEBUG(format, args...)
41 #endif
43 /* Sockopts only registered and called from user context, so
44 net locking would be overkill. Also, [gs]etsockopt calls may
45 sleep. */
46 static DECLARE_MUTEX(nf_sockopt_mutex);
48 struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
49 static LIST_HEAD(nf_sockopts);
50 static DEFINE_SPINLOCK(nf_hook_lock);
52 /*
53 * A queue handler may be registered for each protocol. Each is protected by
54 * long term mutex. The handler must provide an an outfn() to accept packets
55 * for queueing and must reinject all packets it receives, no matter what.
57 static struct nf_queue_handler_t {
58 nf_queue_outfn_t outfn;
59 void *data;
60 } queue_handler[NPROTO];
61 static DEFINE_RWLOCK(queue_handler_lock);
63 int nf_register_hook(struct nf_hook_ops *reg)
65 struct list_head *i;
67 spin_lock_bh(&nf_hook_lock);
68 list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) {
69 if (reg->priority < ((struct nf_hook_ops *)i)->priority)
70 break;
72 list_add_rcu(&reg->list, i->prev);
73 spin_unlock_bh(&nf_hook_lock);
75 synchronize_net();
76 return 0;
79 void nf_unregister_hook(struct nf_hook_ops *reg)
81 spin_lock_bh(&nf_hook_lock);
82 list_del_rcu(&reg->list);
83 spin_unlock_bh(&nf_hook_lock);
85 synchronize_net();
88 /* Do exclusive ranges overlap? */
89 static inline int overlap(int min1, int max1, int min2, int max2)
91 return max1 > min2 && min1 < max2;
94 /* Functions to register sockopt ranges (exclusive). */
95 int nf_register_sockopt(struct nf_sockopt_ops *reg)
97 struct list_head *i;
98 int ret = 0;
100 if (down_interruptible(&nf_sockopt_mutex) != 0)
101 return -EINTR;
103 list_for_each(i, &nf_sockopts) {
104 struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i;
105 if (ops->pf == reg->pf
106 && (overlap(ops->set_optmin, ops->set_optmax,
107 reg->set_optmin, reg->set_optmax)
108 || overlap(ops->get_optmin, ops->get_optmax,
109 reg->get_optmin, reg->get_optmax))) {
110 NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
111 ops->set_optmin, ops->set_optmax,
112 ops->get_optmin, ops->get_optmax,
113 reg->set_optmin, reg->set_optmax,
114 reg->get_optmin, reg->get_optmax);
115 ret = -EBUSY;
116 goto out;
120 list_add(&reg->list, &nf_sockopts);
121 out:
122 up(&nf_sockopt_mutex);
123 return ret;
126 void nf_unregister_sockopt(struct nf_sockopt_ops *reg)
128 /* No point being interruptible: we're probably in cleanup_module() */
129 restart:
130 down(&nf_sockopt_mutex);
131 if (reg->use != 0) {
132 /* To be woken by nf_sockopt call... */
133 /* FIXME: Stuart Young's name appears gratuitously. */
134 set_current_state(TASK_UNINTERRUPTIBLE);
135 reg->cleanup_task = current;
136 up(&nf_sockopt_mutex);
137 schedule();
138 goto restart;
140 list_del(&reg->list);
141 up(&nf_sockopt_mutex);
144 /* Call get/setsockopt() */
145 static int nf_sockopt(struct sock *sk, int pf, int val,
146 char __user *opt, int *len, int get)
148 struct list_head *i;
149 struct nf_sockopt_ops *ops;
150 int ret;
152 if (down_interruptible(&nf_sockopt_mutex) != 0)
153 return -EINTR;
155 list_for_each(i, &nf_sockopts) {
156 ops = (struct nf_sockopt_ops *)i;
157 if (ops->pf == pf) {
158 if (get) {
159 if (val >= ops->get_optmin
160 && val < ops->get_optmax) {
161 ops->use++;
162 up(&nf_sockopt_mutex);
163 ret = ops->get(sk, val, opt, len);
164 goto out;
166 } else {
167 if (val >= ops->set_optmin
168 && val < ops->set_optmax) {
169 ops->use++;
170 up(&nf_sockopt_mutex);
171 ret = ops->set(sk, val, opt, *len);
172 goto out;
177 up(&nf_sockopt_mutex);
178 return -ENOPROTOOPT;
180 out:
181 down(&nf_sockopt_mutex);
182 ops->use--;
183 if (ops->cleanup_task)
184 wake_up_process(ops->cleanup_task);
185 up(&nf_sockopt_mutex);
186 return ret;
189 int nf_setsockopt(struct sock *sk, int pf, int val, char __user *opt,
190 int len)
192 return nf_sockopt(sk, pf, val, opt, &len, 0);
195 int nf_getsockopt(struct sock *sk, int pf, int val, char __user *opt, int *len)
197 return nf_sockopt(sk, pf, val, opt, len, 1);
200 static unsigned int nf_iterate(struct list_head *head,
201 struct sk_buff **skb,
202 int hook,
203 const struct net_device *indev,
204 const struct net_device *outdev,
205 struct list_head **i,
206 int (*okfn)(struct sk_buff *),
207 int hook_thresh)
209 unsigned int verdict;
212 * The caller must not block between calls to this
213 * function because of risk of continuing from deleted element.
215 list_for_each_continue_rcu(*i, head) {
216 struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
218 if (hook_thresh > elem->priority)
219 continue;
221 /* Optimization: we don't need to hold module
222 reference here, since function can't sleep. --RR */
223 verdict = elem->hook(hook, skb, indev, outdev, okfn);
224 if (verdict != NF_ACCEPT) {
225 #ifdef CONFIG_NETFILTER_DEBUG
226 if (unlikely(verdict > NF_MAX_VERDICT)) {
227 NFDEBUG("Evil return from %p(%u).\n",
228 elem->hook, hook);
229 continue;
231 #endif
232 if (verdict != NF_REPEAT)
233 return verdict;
234 *i = (*i)->prev;
237 return NF_ACCEPT;
240 int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data)
242 int ret;
244 write_lock_bh(&queue_handler_lock);
245 if (queue_handler[pf].outfn)
246 ret = -EBUSY;
247 else {
248 queue_handler[pf].outfn = outfn;
249 queue_handler[pf].data = data;
250 ret = 0;
252 write_unlock_bh(&queue_handler_lock);
254 return ret;
257 /* The caller must flush their queue before this */
258 int nf_unregister_queue_handler(int pf)
260 write_lock_bh(&queue_handler_lock);
261 queue_handler[pf].outfn = NULL;
262 queue_handler[pf].data = NULL;
263 write_unlock_bh(&queue_handler_lock);
265 return 0;
269 * Any packet that leaves via this function must come back
270 * through nf_reinject().
272 static int nf_queue(struct sk_buff *skb,
273 struct list_head *elem,
274 int pf, unsigned int hook,
275 struct net_device *indev,
276 struct net_device *outdev,
277 int (*okfn)(struct sk_buff *))
279 int status;
280 struct nf_info *info;
281 #ifdef CONFIG_BRIDGE_NETFILTER
282 struct net_device *physindev = NULL;
283 struct net_device *physoutdev = NULL;
284 #endif
286 /* QUEUE == DROP if noone is waiting, to be safe. */
287 read_lock(&queue_handler_lock);
288 if (!queue_handler[pf].outfn) {
289 read_unlock(&queue_handler_lock);
290 kfree_skb(skb);
291 return 1;
294 info = kmalloc(sizeof(*info), GFP_ATOMIC);
295 if (!info) {
296 if (net_ratelimit())
297 printk(KERN_ERR "OOM queueing packet %p\n",
298 skb);
299 read_unlock(&queue_handler_lock);
300 kfree_skb(skb);
301 return 1;
304 *info = (struct nf_info) {
305 (struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn };
307 /* If it's going away, ignore hook. */
308 if (!try_module_get(info->elem->owner)) {
309 read_unlock(&queue_handler_lock);
310 kfree(info);
311 return 0;
314 /* Bump dev refs so they don't vanish while packet is out */
315 if (indev) dev_hold(indev);
316 if (outdev) dev_hold(outdev);
318 #ifdef CONFIG_BRIDGE_NETFILTER
319 if (skb->nf_bridge) {
320 physindev = skb->nf_bridge->physindev;
321 if (physindev) dev_hold(physindev);
322 physoutdev = skb->nf_bridge->physoutdev;
323 if (physoutdev) dev_hold(physoutdev);
325 #endif
327 status = queue_handler[pf].outfn(skb, info, queue_handler[pf].data);
328 read_unlock(&queue_handler_lock);
330 if (status < 0) {
331 /* James M doesn't say fuck enough. */
332 if (indev) dev_put(indev);
333 if (outdev) dev_put(outdev);
334 #ifdef CONFIG_BRIDGE_NETFILTER
335 if (physindev) dev_put(physindev);
336 if (physoutdev) dev_put(physoutdev);
337 #endif
338 module_put(info->elem->owner);
339 kfree(info);
340 kfree_skb(skb);
341 return 1;
343 return 1;
346 /* Returns 1 if okfn() needs to be executed by the caller,
347 * -EPERM for NF_DROP, 0 otherwise. */
348 int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb,
349 struct net_device *indev,
350 struct net_device *outdev,
351 int (*okfn)(struct sk_buff *),
352 int hook_thresh)
354 struct list_head *elem;
355 unsigned int verdict;
356 int ret = 0;
358 /* We may already have this, but read-locks nest anyway */
359 rcu_read_lock();
361 elem = &nf_hooks[pf][hook];
362 next_hook:
363 verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev,
364 outdev, &elem, okfn, hook_thresh);
365 if (verdict == NF_ACCEPT || verdict == NF_STOP) {
366 ret = 1;
367 goto unlock;
368 } else if (verdict == NF_DROP) {
369 kfree_skb(*pskb);
370 ret = -EPERM;
371 } else if (verdict == NF_QUEUE) {
372 NFDEBUG("nf_hook: Verdict = QUEUE.\n");
373 if (!nf_queue(*pskb, elem, pf, hook, indev, outdev, okfn))
374 goto next_hook;
376 unlock:
377 rcu_read_unlock();
378 return ret;
381 void nf_reinject(struct sk_buff *skb, struct nf_info *info,
382 unsigned int verdict)
384 struct list_head *elem = &info->elem->list;
385 struct list_head *i;
387 rcu_read_lock();
389 /* Release those devices we held, or Alexey will kill me. */
390 if (info->indev) dev_put(info->indev);
391 if (info->outdev) dev_put(info->outdev);
392 #ifdef CONFIG_BRIDGE_NETFILTER
393 if (skb->nf_bridge) {
394 if (skb->nf_bridge->physindev)
395 dev_put(skb->nf_bridge->physindev);
396 if (skb->nf_bridge->physoutdev)
397 dev_put(skb->nf_bridge->physoutdev);
399 #endif
401 /* Drop reference to owner of hook which queued us. */
402 module_put(info->elem->owner);
404 list_for_each_rcu(i, &nf_hooks[info->pf][info->hook]) {
405 if (i == elem)
406 break;
409 if (elem == &nf_hooks[info->pf][info->hook]) {
410 /* The module which sent it to userspace is gone. */
411 NFDEBUG("%s: module disappeared, dropping packet.\n",
412 __FUNCTION__);
413 verdict = NF_DROP;
416 /* Continue traversal iff userspace said ok... */
417 if (verdict == NF_REPEAT) {
418 elem = elem->prev;
419 verdict = NF_ACCEPT;
422 if (verdict == NF_ACCEPT) {
423 next_hook:
424 verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
425 &skb, info->hook,
426 info->indev, info->outdev, &elem,
427 info->okfn, INT_MIN);
430 switch (verdict) {
431 case NF_ACCEPT:
432 info->okfn(skb);
433 break;
435 case NF_QUEUE:
436 if (!nf_queue(skb, elem, info->pf, info->hook,
437 info->indev, info->outdev, info->okfn))
438 goto next_hook;
439 break;
441 rcu_read_unlock();
443 if (verdict == NF_DROP)
444 kfree_skb(skb);
446 kfree(info);
447 return;
450 #ifdef CONFIG_INET
451 /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
452 int ip_route_me_harder(struct sk_buff **pskb)
454 struct iphdr *iph = (*pskb)->nh.iph;
455 struct rtable *rt;
456 struct flowi fl = {};
457 struct dst_entry *odst;
458 unsigned int hh_len;
460 /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
461 * packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook.
463 if (inet_addr_type(iph->saddr) == RTN_LOCAL) {
464 fl.nl_u.ip4_u.daddr = iph->daddr;
465 fl.nl_u.ip4_u.saddr = iph->saddr;
466 fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
467 fl.oif = (*pskb)->sk ? (*pskb)->sk->sk_bound_dev_if : 0;
468 #ifdef CONFIG_IP_ROUTE_FWMARK
469 fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark;
470 #endif
471 fl.proto = iph->protocol;
472 if (ip_route_output_key(&rt, &fl) != 0)
473 return -1;
475 /* Drop old route. */
476 dst_release((*pskb)->dst);
477 (*pskb)->dst = &rt->u.dst;
478 } else {
479 /* non-local src, find valid iif to satisfy
480 * rp-filter when calling ip_route_input. */
481 fl.nl_u.ip4_u.daddr = iph->saddr;
482 if (ip_route_output_key(&rt, &fl) != 0)
483 return -1;
485 odst = (*pskb)->dst;
486 if (ip_route_input(*pskb, iph->daddr, iph->saddr,
487 RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
488 dst_release(&rt->u.dst);
489 return -1;
491 dst_release(&rt->u.dst);
492 dst_release(odst);
495 if ((*pskb)->dst->error)
496 return -1;
498 /* Change in oif may mean change in hh_len. */
499 hh_len = (*pskb)->dst->dev->hard_header_len;
500 if (skb_headroom(*pskb) < hh_len) {
501 struct sk_buff *nskb;
503 nskb = skb_realloc_headroom(*pskb, hh_len);
504 if (!nskb)
505 return -1;
506 if ((*pskb)->sk)
507 skb_set_owner_w(nskb, (*pskb)->sk);
508 kfree_skb(*pskb);
509 *pskb = nskb;
512 return 0;
514 EXPORT_SYMBOL(ip_route_me_harder);
516 int skb_ip_make_writable(struct sk_buff **pskb, unsigned int writable_len)
518 struct sk_buff *nskb;
520 if (writable_len > (*pskb)->len)
521 return 0;
523 /* Not exclusive use of packet? Must copy. */
524 if (skb_shared(*pskb) || skb_cloned(*pskb))
525 goto copy_skb;
527 return pskb_may_pull(*pskb, writable_len);
529 copy_skb:
530 nskb = skb_copy(*pskb, GFP_ATOMIC);
531 if (!nskb)
532 return 0;
533 BUG_ON(skb_is_nonlinear(nskb));
535 /* Rest of kernel will get very unhappy if we pass it a
536 suddenly-orphaned skbuff */
537 if ((*pskb)->sk)
538 skb_set_owner_w(nskb, (*pskb)->sk);
539 kfree_skb(*pskb);
540 *pskb = nskb;
541 return 1;
543 EXPORT_SYMBOL(skb_ip_make_writable);
544 #endif /*CONFIG_INET*/
546 /* Internal logging interface, which relies on the real
547 LOG target modules */
549 #define NF_LOG_PREFIXLEN 128
551 static nf_logfn *nf_logging[NPROTO]; /* = NULL */
552 static int reported = 0;
553 static DEFINE_SPINLOCK(nf_log_lock);
555 int nf_log_register(int pf, nf_logfn *logfn)
557 int ret = -EBUSY;
559 /* Any setup of logging members must be done before
560 * substituting pointer. */
561 spin_lock(&nf_log_lock);
562 if (!nf_logging[pf]) {
563 rcu_assign_pointer(nf_logging[pf], logfn);
564 ret = 0;
566 spin_unlock(&nf_log_lock);
567 return ret;
570 void nf_log_unregister(int pf, nf_logfn *logfn)
572 spin_lock(&nf_log_lock);
573 if (nf_logging[pf] == logfn)
574 nf_logging[pf] = NULL;
575 spin_unlock(&nf_log_lock);
577 /* Give time to concurrent readers. */
578 synchronize_net();
581 void nf_log_packet(int pf,
582 unsigned int hooknum,
583 const struct sk_buff *skb,
584 const struct net_device *in,
585 const struct net_device *out,
586 const char *fmt, ...)
588 va_list args;
589 char prefix[NF_LOG_PREFIXLEN];
590 nf_logfn *logfn;
592 rcu_read_lock();
593 logfn = rcu_dereference(nf_logging[pf]);
594 if (logfn) {
595 va_start(args, fmt);
596 vsnprintf(prefix, sizeof(prefix), fmt, args);
597 va_end(args);
598 /* We must read logging before nf_logfn[pf] */
599 logfn(hooknum, skb, in, out, prefix);
600 } else if (!reported) {
601 printk(KERN_WARNING "nf_log_packet: can\'t log yet, "
602 "no backend logging module loaded in!\n");
603 reported++;
605 rcu_read_unlock();
607 EXPORT_SYMBOL(nf_log_register);
608 EXPORT_SYMBOL(nf_log_unregister);
609 EXPORT_SYMBOL(nf_log_packet);
611 /* This does not belong here, but locally generated errors need it if connection
612 tracking in use: without this, connection may not be in hash table, and hence
613 manufactured ICMP or RST packets will not be associated with it. */
614 void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *);
616 void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
618 void (*attach)(struct sk_buff *, struct sk_buff *);
620 if (skb->nfct && (attach = ip_ct_attach) != NULL) {
621 mb(); /* Just to be sure: must be read before executing this */
622 attach(new, skb);
626 void __init netfilter_init(void)
628 int i, h;
630 for (i = 0; i < NPROTO; i++) {
631 for (h = 0; h < NF_MAX_HOOKS; h++)
632 INIT_LIST_HEAD(&nf_hooks[i][h]);
636 EXPORT_SYMBOL(ip_ct_attach);
637 EXPORT_SYMBOL(nf_ct_attach);
638 EXPORT_SYMBOL(nf_getsockopt);
639 EXPORT_SYMBOL(nf_hook_slow);
640 EXPORT_SYMBOL(nf_hooks);
641 EXPORT_SYMBOL(nf_register_hook);
642 EXPORT_SYMBOL(nf_register_queue_handler);
643 EXPORT_SYMBOL(nf_register_sockopt);
644 EXPORT_SYMBOL(nf_reinject);
645 EXPORT_SYMBOL(nf_setsockopt);
646 EXPORT_SYMBOL(nf_unregister_hook);
647 EXPORT_SYMBOL(nf_unregister_queue_handler);
648 EXPORT_SYMBOL(nf_unregister_sockopt);