Import 2.3.18pre1
[davej-history.git] / net / core / netfilter.c
bloba6472a7dee05d603681f80adba02e42e3c2fbdad
1 /* netfilter.c: look after the filters for various protocols.
2 * Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
4 * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
5 * way.
7 * Rusty Russell (C)1998 -- This code is GPL.
8 */
9 #include <linux/config.h>
10 #include <linux/netfilter.h>
11 #include <net/protocol.h>
12 #include <linux/init.h>
13 #include <linux/skbuff.h>
14 #include <linux/wait.h>
15 #include <linux/module.h>
16 #include <linux/interrupt.h>
17 #include <linux/if.h>
18 #include <linux/netdevice.h>
19 #include <linux/spinlock.h>
21 #define __KERNEL_SYSCALLS__
22 #include <linux/unistd.h>
24 /* In this code, we can be waiting indefinitely for userspace to
25 * service a packet if a hook returns NF_QUEUE. We could keep a count
26 * of skbuffs queued for userspace, and not deregister a hook unless
27 * this is zero, but that sucks. Now, we simply check when the
28 * packets come back: if the hook is gone, the packet is discarded. */
29 #ifdef CONFIG_NETFILTER_DEBUG
30 #define NFDEBUG(format, args...) printk(format , ## args)
31 #else
32 #define NFDEBUG(format, args...)
33 #endif
35 /* Each queued (to userspace) skbuff has one of these. */
36 struct nf_info
38 /* The ops struct which sent us to userspace. */
39 struct nf_hook_ops *elem;
41 /* If we're sent to userspace, this keeps housekeeping info */
42 int pf;
43 unsigned long mark;
44 unsigned int hook;
45 struct net_device *indev, *outdev;
46 int (*okfn)(struct sk_buff *);
49 static rwlock_t nf_lock = RW_LOCK_UNLOCKED;
50 static DECLARE_MUTEX(nf_sockopt_mutex);
52 struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
53 static LIST_HEAD(nf_sockopts);
54 static LIST_HEAD(nf_interested);
56 int nf_register_hook(struct nf_hook_ops *reg)
58 struct list_head *i;
60 #ifdef CONFIG_NETFILTER_DEBUG
61 if (reg->pf<0 || reg->pf>=NPROTO || reg->hooknum >= NF_MAX_HOOKS) {
62 NFDEBUG("nf_register_hook: bad vals: pf=%i, hooknum=%u.\n",
63 reg->pf, reg->hooknum);
64 return -EINVAL;
66 #endif
67 NFDEBUG("nf_register_hook: pf=%i hook=%u.\n", reg->pf, reg->hooknum);
69 write_lock_bh(&nf_lock);
70 for (i = nf_hooks[reg->pf][reg->hooknum].next;
71 i != &nf_hooks[reg->pf][reg->hooknum];
72 i = i->next) {
73 if (reg->priority < ((struct nf_hook_ops *)i)->priority)
74 break;
76 list_add(&reg->list, i->prev);
77 write_unlock_bh(&nf_lock);
78 return 0;
81 void nf_unregister_hook(struct nf_hook_ops *reg)
83 #ifdef CONFIG_NETFILTER_DEBUG
84 if (reg->pf<0 || reg->pf>=NPROTO || reg->hooknum >= NF_MAX_HOOKS) {
85 NFDEBUG("nf_unregister_hook: bad vals: pf=%i, hooknum=%u.\n",
86 reg->pf, reg->hooknum);
87 return;
89 #endif
90 write_lock_bh(&nf_lock);
91 list_del(&reg->list);
92 write_unlock_bh(&nf_lock);
95 /* Do exclusive ranges overlap? */
96 static inline int overlap(int min1, int max1, int min2, int max2)
98 return (min1 >= min2 && min1 < max2)
99 || (max1 > min2 && max1 <= max2);
102 /* Functions to register sockopt ranges (exclusive). */
103 int nf_register_sockopt(struct nf_sockopt_ops *reg)
105 struct list_head *i;
106 int ret = 0;
108 #ifdef CONFIG_NETFILTER_DEBUG
109 if (reg->pf<0 || reg->pf>=NPROTO) {
110 NFDEBUG("nf_register_sockopt: bad val: pf=%i.\n", reg->pf);
111 return -EINVAL;
113 if (reg->set_optmin > reg->set_optmax) {
114 NFDEBUG("nf_register_sockopt: bad set val: min=%i max=%i.\n",
115 reg->set_optmin, reg->set_optmax);
116 return -EINVAL;
118 if (reg->get_optmin > reg->get_optmax) {
119 NFDEBUG("nf_register_sockopt: bad get val: min=%i max=%i.\n",
120 reg->get_optmin, reg->get_optmax);
121 return -EINVAL;
123 #endif
124 if (down_interruptible(&nf_sockopt_mutex) != 0)
125 return -EINTR;
127 for (i = nf_sockopts.next; i != &nf_sockopts; i = i->next) {
128 struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i;
129 if (ops->pf == reg->pf
130 && (overlap(ops->set_optmin, ops->set_optmax,
131 reg->set_optmin, reg->set_optmax)
132 || overlap(ops->get_optmin, ops->get_optmax,
133 reg->get_optmin, reg->get_optmax))) {
134 NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
135 ops->set_optmin, ops->set_optmax,
136 ops->get_optmin, ops->get_optmax,
137 reg->set_optmin, reg->set_optmax,
138 reg->get_optmin, reg->get_optmax);
139 ret = -EBUSY;
140 goto out;
144 list_add(&reg->list, &nf_sockopts);
145 out:
146 up(&nf_sockopt_mutex);
147 return ret;
150 void nf_unregister_sockopt(struct nf_sockopt_ops *reg)
152 #ifdef CONFIG_NETFILTER_DEBUG
153 if (reg->pf<0 || reg->pf>=NPROTO) {
154 NFDEBUG("nf_register_sockopt: bad val: pf=%i.\n", reg->pf);
155 return;
157 #endif
158 /* No point being interruptible: we're probably in cleanup_module() */
159 down(&nf_sockopt_mutex);
160 list_del(&reg->list);
161 up(&nf_sockopt_mutex);
164 #ifdef CONFIG_NETFILTER_DEBUG
165 #include <net/ip.h>
166 #include <net/route.h>
167 #include <net/tcp.h>
168 #include <linux/netfilter_ipv4.h>
170 void nf_dump_skb(int pf, struct sk_buff *skb)
172 printk("skb: pf=%i %s dev=%s len=%u\n",
174 skb->sk ? "(owned)" : "(unowned)",
175 skb->dev ? skb->dev->name : "(no dev)",
176 skb->len);
177 switch (pf) {
178 case PF_INET: {
179 const struct iphdr *ip = skb->nh.iph;
180 __u32 *opt = (__u32 *) (ip + 1);
181 int opti;
182 __u16 src_port = 0, dst_port = 0;
184 if (ip->protocol == IPPROTO_TCP
185 || ip->protocol == IPPROTO_UDP) {
186 struct tcphdr *tcp=(struct tcphdr *)((__u32 *)ip+ip->ihl);
187 src_port = ntohs(tcp->source);
188 dst_port = ntohs(tcp->dest);
191 printk("PROTO=%d %ld.%ld.%ld.%ld:%hu %ld.%ld.%ld.%ld:%hu"
192 " L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu",
193 ip->protocol,
194 (ntohl(ip->saddr)>>24)&0xFF,
195 (ntohl(ip->saddr)>>16)&0xFF,
196 (ntohl(ip->saddr)>>8)&0xFF,
197 (ntohl(ip->saddr))&0xFF,
198 src_port,
199 (ntohl(ip->daddr)>>24)&0xFF,
200 (ntohl(ip->daddr)>>16)&0xFF,
201 (ntohl(ip->daddr)>>8)&0xFF,
202 (ntohl(ip->daddr))&0xFF,
203 dst_port,
204 ntohs(ip->tot_len), ip->tos, ntohs(ip->id),
205 ntohs(ip->frag_off), ip->ttl);
207 for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++)
208 printk(" O=0x%8.8X", *opt++);
209 printk("\n");
214 void nf_debug_ip_local_deliver(struct sk_buff *skb)
216 /* If it's a loopback packet, it must have come through
217 * NF_IP_LOCAL_OUT, NF_IP_RAW_INPUT, NF_IP_PRE_ROUTING and
218 * NF_IP_LOCAL_IN. Otherwise, must have gone through
219 * NF_IP_RAW_INPUT and NF_IP_PRE_ROUTING. */
220 if (!skb->dev) {
221 printk("ip_local_deliver: skb->dev is NULL.\n");
223 else if (strcmp(skb->dev->name, "lo") == 0) {
224 if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
225 | (1 << NF_IP_POST_ROUTING)
226 | (1 << NF_IP_PRE_ROUTING)
227 | (1 << NF_IP_LOCAL_IN))) {
228 printk("ip_local_deliver: bad loopback skb: ");
229 debug_print_hooks_ip(skb->nf_debug);
230 nf_dump_skb(PF_INET, skb);
233 else {
234 if (skb->nf_debug != ((1<<NF_IP_PRE_ROUTING)
235 | (1<<NF_IP_LOCAL_IN))) {
236 printk("ip_local_deliver: bad non-lo skb: ");
237 debug_print_hooks_ip(skb->nf_debug);
238 nf_dump_skb(PF_INET, skb);
243 void nf_debug_ip_loopback_xmit(struct sk_buff *newskb)
245 if (newskb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
246 | (1 << NF_IP_POST_ROUTING))) {
247 printk("ip_dev_loopback_xmit: bad owned skb = %p: ",
248 newskb);
249 debug_print_hooks_ip(newskb->nf_debug);
250 nf_dump_skb(PF_INET, newskb);
252 /* Clear to avoid confusing input check */
253 newskb->nf_debug = 0;
256 void nf_debug_ip_finish_output2(struct sk_buff *skb)
258 /* If it's owned, it must have gone through the
259 * NF_IP_LOCAL_OUT and NF_IP_POST_ROUTING.
260 * Otherwise, must have gone through NF_IP_RAW_INPUT,
261 * NF_IP_PRE_ROUTING, NF_IP_FORWARD and NF_IP_POST_ROUTING.
263 if (skb->sk) {
264 if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
265 | (1 << NF_IP_POST_ROUTING))) {
266 printk("ip_finish_output: bad owned skb = %p: ", skb);
267 debug_print_hooks_ip(skb->nf_debug);
268 nf_dump_skb(PF_INET, skb);
270 } else {
271 if (skb->nf_debug != ((1 << NF_IP_PRE_ROUTING)
272 #ifdef CONFIG_IP_NETFILTER_RAW_INPUT
273 | (1 << NF_IP_RAW_INPUT)
274 #endif
275 | (1 << NF_IP_FORWARD)
276 | (1 << NF_IP_POST_ROUTING))) {
277 printk("ip_finish_output: bad unowned skb = %p: ",skb);
278 debug_print_hooks_ip(skb->nf_debug);
279 nf_dump_skb(PF_INET, skb);
285 #endif /*CONFIG_NETFILTER_DEBUG*/
287 void nf_cacheflush(int pf, unsigned int hook, const void *packet,
288 const struct net_device *indev, const struct net_device *outdev,
289 __u32 packetcount, __u32 bytecount)
291 struct list_head *i;
293 read_lock_bh(&nf_lock);
294 for (i = nf_hooks[pf][hook].next;
295 i != &nf_hooks[pf][hook];
296 i = i->next) {
297 if (((struct nf_hook_ops *)i)->flush)
298 ((struct nf_hook_ops *)i)->flush(packet, indev,
299 outdev,
300 packetcount,
301 bytecount);
303 read_unlock_bh(&nf_lock);
306 /* Call get/setsockopt() */
307 static int nf_sockopt(struct sock *sk, int pf, int val,
308 char *opt, int *len, int get)
310 struct list_head *i;
311 int ret;
313 if (!capable(CAP_NET_ADMIN))
314 return -EPERM;
316 if (down_interruptible(&nf_sockopt_mutex) != 0)
317 return -EINTR;
319 for (i = nf_sockopts.next; i != &nf_sockopts; i = i->next) {
320 struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i;
321 if (ops->pf == pf) {
322 if (get) {
323 if (val >= ops->get_optmin
324 && val < ops->get_optmax) {
325 ret = ops->get(sk, val, opt, len);
326 goto out;
328 } else {
329 if (val >= ops->set_optmin
330 && val < ops->set_optmax) {
331 ret = ops->set(sk, val, opt, *len);
332 goto out;
337 ret = -ENOPROTOOPT;
338 out:
339 up(&nf_sockopt_mutex);
340 return ret;
343 int nf_setsockopt(struct sock *sk, int pf, int val, char *opt,
344 int len)
346 return nf_sockopt(sk, pf, val, opt, &len, 0);
349 int nf_getsockopt(struct sock *sk, int pf, int val, char *opt, int *len)
351 return nf_sockopt(sk, pf, val, opt, len, 1);
354 static unsigned int nf_iterate(struct list_head *head,
355 struct sk_buff **skb,
356 int hook,
357 const struct net_device *indev,
358 const struct net_device *outdev,
359 struct list_head **i)
361 for (*i = (*i)->next; *i != head; *i = (*i)->next) {
362 struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
363 switch (elem->hook(hook, skb, indev, outdev)) {
364 case NF_QUEUE:
365 NFDEBUG("nf_iterate: NF_QUEUE for %p.\n", *skb);
366 return NF_QUEUE;
368 case NF_STOLEN:
369 NFDEBUG("nf_iterate: NF_STOLEN for %p.\n", *skb);
370 return NF_STOLEN;
372 case NF_DROP:
373 NFDEBUG("nf_iterate: NF_DROP for %p.\n", *skb);
374 return NF_DROP;
376 #ifdef CONFIG_NETFILTER_DEBUG
377 case NF_ACCEPT:
378 break;
380 default:
381 NFDEBUG("Evil return from %p(%u).\n",
382 elem->hook, hook);
383 #endif
386 return NF_ACCEPT;
389 static void nf_queue(struct sk_buff *skb,
390 struct list_head *elem,
391 int pf, unsigned int hook,
392 struct net_device *indev,
393 struct net_device *outdev,
394 int (*okfn)(struct sk_buff *))
396 struct list_head *i;
398 struct nf_info *info = kmalloc(sizeof(*info), GFP_ATOMIC);
399 if (!info) {
400 NFDEBUG("nf_hook: OOM.\n");
401 kfree_skb(skb);
402 return;
405 /* Can't do struct assignments with arrays in them. Damn. */
406 info->elem = (struct nf_hook_ops *)elem;
407 info->mark = skb->nfmark;
408 info->pf = pf;
409 info->hook = hook;
410 info->okfn = okfn;
411 info->indev = indev;
412 info->outdev = outdev;
413 skb->nfmark = (unsigned long)info;
415 /* Bump dev refs so they don't vanish while packet is out */
416 if (indev) dev_hold(indev);
417 if (outdev) dev_hold(outdev);
419 for (i = nf_interested.next; i != &nf_interested; i = i->next) {
420 struct nf_interest *recip = (struct nf_interest *)i;
422 if ((recip->hookmask & (1 << info->hook))
423 && info->pf == recip->pf
424 && (!recip->mark || info->mark == recip->mark)
425 && (!recip->reason || skb->nfreason == recip->reason)) {
426 /* FIXME: Andi says: use netlink. Hmmm... --RR */
427 if (skb_queue_len(&recip->wake->skbq) >= 100) {
428 NFDEBUG("nf_hook: queue to long.\n");
429 goto free_discard;
431 /* Hand it to userspace for collection */
432 skb_queue_tail(&recip->wake->skbq, skb);
433 NFDEBUG("Waking up pf=%i hook=%u mark=%lu reason=%u\n",
434 pf, hook, skb->nfmark, skb->nfreason);
435 wake_up_interruptible(&recip->wake->sleep);
437 return;
440 NFDEBUG("nf_hook: noone wants the packet.\n");
442 free_discard:
443 if (indev) dev_put(indev);
444 if (outdev) dev_put(outdev);
446 kfree_s(info, sizeof(*info));
447 kfree_skb(skb);
450 /* nf_hook() doesn't have lock, so may give false positive. */
451 int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb,
452 struct net_device *indev,
453 struct net_device *outdev,
454 int (*okfn)(struct sk_buff *))
456 struct list_head *elem;
457 unsigned int verdict;
458 int ret = 0;
460 #ifdef CONFIG_NETFILTER_DEBUG
461 if (pf < 0 || pf >= NPROTO || hook >= NF_MAX_HOOKS) {
462 NFDEBUG("nf_hook: bad vals: pf=%i, hook=%u.\n",
463 pf, hook);
464 kfree_skb(skb);
465 return -EINVAL; /* -ECODERFUCKEDUP ?*/
468 if (skb->nf_debug & (1 << hook)) {
469 NFDEBUG("nf_hook: hook %i already set.\n", hook);
470 nf_dump_skb(pf, skb);
472 skb->nf_debug |= (1 << hook);
473 #endif
474 read_lock_bh(&nf_lock);
475 elem = &nf_hooks[pf][hook];
476 verdict = nf_iterate(&nf_hooks[pf][hook], &skb, hook, indev,
477 outdev, &elem);
478 if (verdict == NF_QUEUE) {
479 NFDEBUG("nf_hook: Verdict = QUEUE.\n");
480 nf_queue(skb, elem, pf, hook, indev, outdev, okfn);
482 read_unlock_bh(&nf_lock);
484 switch (verdict) {
485 case NF_ACCEPT:
486 ret = okfn(skb);
487 break;
489 case NF_DROP:
490 kfree_skb(skb);
491 ret = -EPERM;
492 break;
495 return ret;
498 struct nf_waitinfo {
499 unsigned int verdict;
500 struct task_struct *owner;
503 /* For netfilter device. */
504 void nf_register_interest(struct nf_interest *interest)
506 /* First in, best dressed. */
507 write_lock_bh(&nf_lock);
508 list_add(&interest->list, &nf_interested);
509 write_unlock_bh(&nf_lock);
512 void nf_unregister_interest(struct nf_interest *interest)
514 struct sk_buff *skb;
516 write_lock_bh(&nf_lock);
517 list_del(&interest->list);
518 write_unlock_bh(&nf_lock);
520 /* Blow away any queued skbs; this is overzealous. */
521 while ((skb = skb_dequeue(&interest->wake->skbq)) != NULL)
522 nf_reinject(skb, 0, NF_DROP);
525 void nf_getinfo(const struct sk_buff *skb,
526 struct net_device **indev,
527 struct net_device **outdev,
528 unsigned long *mark)
530 const struct nf_info *info = (const struct nf_info *)skb->nfmark;
532 *indev = info->indev;
533 *outdev = info->outdev;
534 *mark = info->mark;
537 void nf_reinject(struct sk_buff *skb, unsigned long mark, unsigned int verdict)
539 struct nf_info *info = (struct nf_info *)skb->nfmark;
540 struct list_head *elem = &info->elem->list;
541 struct list_head *i;
543 read_lock_bh(&nf_lock);
545 for (i = nf_hooks[info->pf][info->hook].next; i != elem; i = i->next) {
546 if (i == &nf_hooks[info->pf][info->hook]) {
547 /* The module which sent it to userspace is gone. */
548 verdict = NF_DROP;
549 break;
553 /* Continue traversal iff userspace said ok, and devices still
554 exist... */
555 if (verdict == NF_ACCEPT) {
556 skb->nfmark = mark;
557 verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
558 &skb, info->hook,
559 info->indev, info->outdev, &elem);
562 if (verdict == NF_QUEUE) {
563 nf_queue(skb, elem, info->pf, info->hook,
564 info->indev, info->outdev, info->okfn);
566 read_unlock_bh(&nf_lock);
568 switch (verdict) {
569 case NF_ACCEPT:
570 local_bh_disable();
571 info->okfn(skb);
572 local_bh_enable();
573 break;
575 case NF_DROP:
576 kfree_skb(skb);
577 break;
580 /* Release those devices we held, or Alexey will kill me. */
581 if (info->indev) dev_put(info->indev);
582 if (info->outdev) dev_put(info->outdev);
584 kfree_s(info, sizeof(*info));
585 return;
588 /* FIXME: Before cache is ever used, this must be implemented for real. */
589 void nf_invalidate_cache(int pf)
593 #ifdef CONFIG_NETFILTER_DEBUG
595 void debug_print_hooks_ip(unsigned int nf_debug)
597 if (nf_debug & (1 << NF_IP_PRE_ROUTING)) {
598 printk("PRE_ROUTING ");
599 nf_debug ^= (1 << NF_IP_PRE_ROUTING);
601 if (nf_debug & (1 << NF_IP_LOCAL_IN)) {
602 printk("LOCAL_IN ");
603 nf_debug ^= (1 << NF_IP_LOCAL_IN);
605 if (nf_debug & (1 << NF_IP_FORWARD)) {
606 printk("FORWARD ");
607 nf_debug ^= (1 << NF_IP_FORWARD);
609 if (nf_debug & (1 << NF_IP_LOCAL_OUT)) {
610 printk("LOCAL_OUT ");
611 nf_debug ^= (1 << NF_IP_LOCAL_OUT);
613 if (nf_debug & (1 << NF_IP_POST_ROUTING)) {
614 printk("POST_ROUTING ");
615 nf_debug ^= (1 << NF_IP_POST_ROUTING);
617 if (nf_debug)
618 printk("Crap bits: 0x%04X", nf_debug);
619 printk("\n");
621 #endif /* CONFIG_NETFILTER_DEBUG */
623 void __init netfilter_init(void)
625 int i, h;
627 for (i = 0; i < NPROTO; i++)
628 for (h = 0; h < NF_MAX_HOOKS; h++)
629 INIT_LIST_HEAD(&nf_hooks[i][h]);