1 /* netfilter.c: look after the filters for various protocols.
2 * Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
4 * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
7 * Rusty Russell (C)2000 -- This code is GPL.
9 * February 2000: Modified by James Morris to have 1 queue per protocol.
10 * 15-Mar-2000: Added NF_REPEAT --RR.
11 * 08-May-2003: Internal logging interface added by Jozsef Kadlecsik.
13 #include <linux/config.h>
14 #include <linux/kernel.h>
15 #include <linux/netfilter.h>
16 #include <net/protocol.h>
17 #include <linux/init.h>
18 #include <linux/skbuff.h>
19 #include <linux/wait.h>
20 #include <linux/module.h>
21 #include <linux/interrupt.h>
23 #include <linux/netdevice.h>
24 #include <linux/inetdevice.h>
25 #include <linux/tcp.h>
26 #include <linux/udp.h>
27 #include <linux/icmp.h>
29 #include <net/route.h>
32 /* In this code, we can be waiting indefinitely for userspace to
33 * service a packet if a hook returns NF_QUEUE. We could keep a count
34 * of skbuffs queued for userspace, and not deregister a hook unless
35 * this is zero, but that sucks. Now, we simply check when the
36 * packets come back: if the hook is gone, the packet is discarded. */
37 #ifdef CONFIG_NETFILTER_DEBUG
38 #define NFDEBUG(format, args...) printk(format , ## args)
40 #define NFDEBUG(format, args...)
43 /* Sockopts only registered and called from user context, so
44 net locking would be overkill. Also, [gs]etsockopt calls may
46 static DECLARE_MUTEX(nf_sockopt_mutex
);
48 struct list_head nf_hooks
[NPROTO
][NF_MAX_HOOKS
];
49 static LIST_HEAD(nf_sockopts
);
50 static DEFINE_SPINLOCK(nf_hook_lock
);
53 * A queue handler may be registered for each protocol. Each is protected by
54 * long term mutex. The handler must provide an an outfn() to accept packets
55 * for queueing and must reinject all packets it receives, no matter what.
57 static struct nf_queue_handler_t
{
58 nf_queue_outfn_t outfn
;
60 } queue_handler
[NPROTO
];
61 static DEFINE_RWLOCK(queue_handler_lock
);
63 int nf_register_hook(struct nf_hook_ops
*reg
)
67 spin_lock_bh(&nf_hook_lock
);
68 list_for_each(i
, &nf_hooks
[reg
->pf
][reg
->hooknum
]) {
69 if (reg
->priority
< ((struct nf_hook_ops
*)i
)->priority
)
72 list_add_rcu(®
->list
, i
->prev
);
73 spin_unlock_bh(&nf_hook_lock
);
79 void nf_unregister_hook(struct nf_hook_ops
*reg
)
81 spin_lock_bh(&nf_hook_lock
);
82 list_del_rcu(®
->list
);
83 spin_unlock_bh(&nf_hook_lock
);
88 /* Do exclusive ranges overlap? */
89 static inline int overlap(int min1
, int max1
, int min2
, int max2
)
91 return max1
> min2
&& min1
< max2
;
94 /* Functions to register sockopt ranges (exclusive). */
95 int nf_register_sockopt(struct nf_sockopt_ops
*reg
)
100 if (down_interruptible(&nf_sockopt_mutex
) != 0)
103 list_for_each(i
, &nf_sockopts
) {
104 struct nf_sockopt_ops
*ops
= (struct nf_sockopt_ops
*)i
;
105 if (ops
->pf
== reg
->pf
106 && (overlap(ops
->set_optmin
, ops
->set_optmax
,
107 reg
->set_optmin
, reg
->set_optmax
)
108 || overlap(ops
->get_optmin
, ops
->get_optmax
,
109 reg
->get_optmin
, reg
->get_optmax
))) {
110 NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
111 ops
->set_optmin
, ops
->set_optmax
,
112 ops
->get_optmin
, ops
->get_optmax
,
113 reg
->set_optmin
, reg
->set_optmax
,
114 reg
->get_optmin
, reg
->get_optmax
);
120 list_add(®
->list
, &nf_sockopts
);
122 up(&nf_sockopt_mutex
);
126 void nf_unregister_sockopt(struct nf_sockopt_ops
*reg
)
128 /* No point being interruptible: we're probably in cleanup_module() */
130 down(&nf_sockopt_mutex
);
132 /* To be woken by nf_sockopt call... */
133 /* FIXME: Stuart Young's name appears gratuitously. */
134 set_current_state(TASK_UNINTERRUPTIBLE
);
135 reg
->cleanup_task
= current
;
136 up(&nf_sockopt_mutex
);
140 list_del(®
->list
);
141 up(&nf_sockopt_mutex
);
144 /* Call get/setsockopt() */
145 static int nf_sockopt(struct sock
*sk
, int pf
, int val
,
146 char __user
*opt
, int *len
, int get
)
149 struct nf_sockopt_ops
*ops
;
152 if (down_interruptible(&nf_sockopt_mutex
) != 0)
155 list_for_each(i
, &nf_sockopts
) {
156 ops
= (struct nf_sockopt_ops
*)i
;
159 if (val
>= ops
->get_optmin
160 && val
< ops
->get_optmax
) {
162 up(&nf_sockopt_mutex
);
163 ret
= ops
->get(sk
, val
, opt
, len
);
167 if (val
>= ops
->set_optmin
168 && val
< ops
->set_optmax
) {
170 up(&nf_sockopt_mutex
);
171 ret
= ops
->set(sk
, val
, opt
, *len
);
177 up(&nf_sockopt_mutex
);
181 down(&nf_sockopt_mutex
);
183 if (ops
->cleanup_task
)
184 wake_up_process(ops
->cleanup_task
);
185 up(&nf_sockopt_mutex
);
189 int nf_setsockopt(struct sock
*sk
, int pf
, int val
, char __user
*opt
,
192 return nf_sockopt(sk
, pf
, val
, opt
, &len
, 0);
195 int nf_getsockopt(struct sock
*sk
, int pf
, int val
, char __user
*opt
, int *len
)
197 return nf_sockopt(sk
, pf
, val
, opt
, len
, 1);
200 static unsigned int nf_iterate(struct list_head
*head
,
201 struct sk_buff
**skb
,
203 const struct net_device
*indev
,
204 const struct net_device
*outdev
,
205 struct list_head
**i
,
206 int (*okfn
)(struct sk_buff
*),
209 unsigned int verdict
;
212 * The caller must not block between calls to this
213 * function because of risk of continuing from deleted element.
215 list_for_each_continue_rcu(*i
, head
) {
216 struct nf_hook_ops
*elem
= (struct nf_hook_ops
*)*i
;
218 if (hook_thresh
> elem
->priority
)
221 /* Optimization: we don't need to hold module
222 reference here, since function can't sleep. --RR */
223 verdict
= elem
->hook(hook
, skb
, indev
, outdev
, okfn
);
224 if (verdict
!= NF_ACCEPT
) {
225 #ifdef CONFIG_NETFILTER_DEBUG
226 if (unlikely(verdict
> NF_MAX_VERDICT
)) {
227 NFDEBUG("Evil return from %p(%u).\n",
232 if (verdict
!= NF_REPEAT
)
240 int nf_register_queue_handler(int pf
, nf_queue_outfn_t outfn
, void *data
)
244 write_lock_bh(&queue_handler_lock
);
245 if (queue_handler
[pf
].outfn
)
248 queue_handler
[pf
].outfn
= outfn
;
249 queue_handler
[pf
].data
= data
;
252 write_unlock_bh(&queue_handler_lock
);
257 /* The caller must flush their queue before this */
258 int nf_unregister_queue_handler(int pf
)
260 write_lock_bh(&queue_handler_lock
);
261 queue_handler
[pf
].outfn
= NULL
;
262 queue_handler
[pf
].data
= NULL
;
263 write_unlock_bh(&queue_handler_lock
);
269 * Any packet that leaves via this function must come back
270 * through nf_reinject().
272 static int nf_queue(struct sk_buff
*skb
,
273 struct list_head
*elem
,
274 int pf
, unsigned int hook
,
275 struct net_device
*indev
,
276 struct net_device
*outdev
,
277 int (*okfn
)(struct sk_buff
*))
280 struct nf_info
*info
;
281 #ifdef CONFIG_BRIDGE_NETFILTER
282 struct net_device
*physindev
= NULL
;
283 struct net_device
*physoutdev
= NULL
;
286 /* QUEUE == DROP if noone is waiting, to be safe. */
287 read_lock(&queue_handler_lock
);
288 if (!queue_handler
[pf
].outfn
) {
289 read_unlock(&queue_handler_lock
);
294 info
= kmalloc(sizeof(*info
), GFP_ATOMIC
);
297 printk(KERN_ERR
"OOM queueing packet %p\n",
299 read_unlock(&queue_handler_lock
);
304 *info
= (struct nf_info
) {
305 (struct nf_hook_ops
*)elem
, pf
, hook
, indev
, outdev
, okfn
};
307 /* If it's going away, ignore hook. */
308 if (!try_module_get(info
->elem
->owner
)) {
309 read_unlock(&queue_handler_lock
);
314 /* Bump dev refs so they don't vanish while packet is out */
315 if (indev
) dev_hold(indev
);
316 if (outdev
) dev_hold(outdev
);
318 #ifdef CONFIG_BRIDGE_NETFILTER
319 if (skb
->nf_bridge
) {
320 physindev
= skb
->nf_bridge
->physindev
;
321 if (physindev
) dev_hold(physindev
);
322 physoutdev
= skb
->nf_bridge
->physoutdev
;
323 if (physoutdev
) dev_hold(physoutdev
);
327 status
= queue_handler
[pf
].outfn(skb
, info
, queue_handler
[pf
].data
);
328 read_unlock(&queue_handler_lock
);
331 /* James M doesn't say fuck enough. */
332 if (indev
) dev_put(indev
);
333 if (outdev
) dev_put(outdev
);
334 #ifdef CONFIG_BRIDGE_NETFILTER
335 if (physindev
) dev_put(physindev
);
336 if (physoutdev
) dev_put(physoutdev
);
338 module_put(info
->elem
->owner
);
346 /* Returns 1 if okfn() needs to be executed by the caller,
347 * -EPERM for NF_DROP, 0 otherwise. */
348 int nf_hook_slow(int pf
, unsigned int hook
, struct sk_buff
**pskb
,
349 struct net_device
*indev
,
350 struct net_device
*outdev
,
351 int (*okfn
)(struct sk_buff
*),
354 struct list_head
*elem
;
355 unsigned int verdict
;
358 /* We may already have this, but read-locks nest anyway */
361 elem
= &nf_hooks
[pf
][hook
];
363 verdict
= nf_iterate(&nf_hooks
[pf
][hook
], pskb
, hook
, indev
,
364 outdev
, &elem
, okfn
, hook_thresh
);
365 if (verdict
== NF_ACCEPT
|| verdict
== NF_STOP
) {
368 } else if (verdict
== NF_DROP
) {
371 } else if (verdict
== NF_QUEUE
) {
372 NFDEBUG("nf_hook: Verdict = QUEUE.\n");
373 if (!nf_queue(*pskb
, elem
, pf
, hook
, indev
, outdev
, okfn
))
381 void nf_reinject(struct sk_buff
*skb
, struct nf_info
*info
,
382 unsigned int verdict
)
384 struct list_head
*elem
= &info
->elem
->list
;
389 /* Release those devices we held, or Alexey will kill me. */
390 if (info
->indev
) dev_put(info
->indev
);
391 if (info
->outdev
) dev_put(info
->outdev
);
392 #ifdef CONFIG_BRIDGE_NETFILTER
393 if (skb
->nf_bridge
) {
394 if (skb
->nf_bridge
->physindev
)
395 dev_put(skb
->nf_bridge
->physindev
);
396 if (skb
->nf_bridge
->physoutdev
)
397 dev_put(skb
->nf_bridge
->physoutdev
);
401 /* Drop reference to owner of hook which queued us. */
402 module_put(info
->elem
->owner
);
404 list_for_each_rcu(i
, &nf_hooks
[info
->pf
][info
->hook
]) {
409 if (elem
== &nf_hooks
[info
->pf
][info
->hook
]) {
410 /* The module which sent it to userspace is gone. */
411 NFDEBUG("%s: module disappeared, dropping packet.\n",
416 /* Continue traversal iff userspace said ok... */
417 if (verdict
== NF_REPEAT
) {
422 if (verdict
== NF_ACCEPT
) {
424 verdict
= nf_iterate(&nf_hooks
[info
->pf
][info
->hook
],
426 info
->indev
, info
->outdev
, &elem
,
427 info
->okfn
, INT_MIN
);
436 if (!nf_queue(skb
, elem
, info
->pf
, info
->hook
,
437 info
->indev
, info
->outdev
, info
->okfn
))
443 if (verdict
== NF_DROP
)
451 /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
452 int ip_route_me_harder(struct sk_buff
**pskb
)
454 struct iphdr
*iph
= (*pskb
)->nh
.iph
;
456 struct flowi fl
= {};
457 struct dst_entry
*odst
;
460 /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
461 * packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook.
463 if (inet_addr_type(iph
->saddr
) == RTN_LOCAL
) {
464 fl
.nl_u
.ip4_u
.daddr
= iph
->daddr
;
465 fl
.nl_u
.ip4_u
.saddr
= iph
->saddr
;
466 fl
.nl_u
.ip4_u
.tos
= RT_TOS(iph
->tos
);
467 fl
.oif
= (*pskb
)->sk
? (*pskb
)->sk
->sk_bound_dev_if
: 0;
468 #ifdef CONFIG_IP_ROUTE_FWMARK
469 fl
.nl_u
.ip4_u
.fwmark
= (*pskb
)->nfmark
;
471 fl
.proto
= iph
->protocol
;
472 if (ip_route_output_key(&rt
, &fl
) != 0)
475 /* Drop old route. */
476 dst_release((*pskb
)->dst
);
477 (*pskb
)->dst
= &rt
->u
.dst
;
479 /* non-local src, find valid iif to satisfy
480 * rp-filter when calling ip_route_input. */
481 fl
.nl_u
.ip4_u
.daddr
= iph
->saddr
;
482 if (ip_route_output_key(&rt
, &fl
) != 0)
486 if (ip_route_input(*pskb
, iph
->daddr
, iph
->saddr
,
487 RT_TOS(iph
->tos
), rt
->u
.dst
.dev
) != 0) {
488 dst_release(&rt
->u
.dst
);
491 dst_release(&rt
->u
.dst
);
495 if ((*pskb
)->dst
->error
)
498 /* Change in oif may mean change in hh_len. */
499 hh_len
= (*pskb
)->dst
->dev
->hard_header_len
;
500 if (skb_headroom(*pskb
) < hh_len
) {
501 struct sk_buff
*nskb
;
503 nskb
= skb_realloc_headroom(*pskb
, hh_len
);
507 skb_set_owner_w(nskb
, (*pskb
)->sk
);
514 EXPORT_SYMBOL(ip_route_me_harder
);
516 int skb_ip_make_writable(struct sk_buff
**pskb
, unsigned int writable_len
)
518 struct sk_buff
*nskb
;
520 if (writable_len
> (*pskb
)->len
)
523 /* Not exclusive use of packet? Must copy. */
524 if (skb_shared(*pskb
) || skb_cloned(*pskb
))
527 return pskb_may_pull(*pskb
, writable_len
);
530 nskb
= skb_copy(*pskb
, GFP_ATOMIC
);
533 BUG_ON(skb_is_nonlinear(nskb
));
535 /* Rest of kernel will get very unhappy if we pass it a
536 suddenly-orphaned skbuff */
538 skb_set_owner_w(nskb
, (*pskb
)->sk
);
543 EXPORT_SYMBOL(skb_ip_make_writable
);
544 #endif /*CONFIG_INET*/
546 /* Internal logging interface, which relies on the real
547 LOG target modules */
549 #define NF_LOG_PREFIXLEN 128
551 static nf_logfn
*nf_logging
[NPROTO
]; /* = NULL */
552 static int reported
= 0;
553 static DEFINE_SPINLOCK(nf_log_lock
);
555 int nf_log_register(int pf
, nf_logfn
*logfn
)
559 /* Any setup of logging members must be done before
560 * substituting pointer. */
561 spin_lock(&nf_log_lock
);
562 if (!nf_logging
[pf
]) {
563 rcu_assign_pointer(nf_logging
[pf
], logfn
);
566 spin_unlock(&nf_log_lock
);
570 void nf_log_unregister(int pf
, nf_logfn
*logfn
)
572 spin_lock(&nf_log_lock
);
573 if (nf_logging
[pf
] == logfn
)
574 nf_logging
[pf
] = NULL
;
575 spin_unlock(&nf_log_lock
);
577 /* Give time to concurrent readers. */
581 void nf_log_packet(int pf
,
582 unsigned int hooknum
,
583 const struct sk_buff
*skb
,
584 const struct net_device
*in
,
585 const struct net_device
*out
,
586 const char *fmt
, ...)
589 char prefix
[NF_LOG_PREFIXLEN
];
593 logfn
= rcu_dereference(nf_logging
[pf
]);
596 vsnprintf(prefix
, sizeof(prefix
), fmt
, args
);
598 /* We must read logging before nf_logfn[pf] */
599 logfn(hooknum
, skb
, in
, out
, prefix
);
600 } else if (!reported
) {
601 printk(KERN_WARNING
"nf_log_packet: can\'t log yet, "
602 "no backend logging module loaded in!\n");
607 EXPORT_SYMBOL(nf_log_register
);
608 EXPORT_SYMBOL(nf_log_unregister
);
609 EXPORT_SYMBOL(nf_log_packet
);
611 /* This does not belong here, but locally generated errors need it if connection
612 tracking in use: without this, connection may not be in hash table, and hence
613 manufactured ICMP or RST packets will not be associated with it. */
614 void (*ip_ct_attach
)(struct sk_buff
*, struct sk_buff
*);
616 void nf_ct_attach(struct sk_buff
*new, struct sk_buff
*skb
)
618 void (*attach
)(struct sk_buff
*, struct sk_buff
*);
620 if (skb
->nfct
&& (attach
= ip_ct_attach
) != NULL
) {
621 mb(); /* Just to be sure: must be read before executing this */
626 void __init
netfilter_init(void)
630 for (i
= 0; i
< NPROTO
; i
++) {
631 for (h
= 0; h
< NF_MAX_HOOKS
; h
++)
632 INIT_LIST_HEAD(&nf_hooks
[i
][h
]);
636 EXPORT_SYMBOL(ip_ct_attach
);
637 EXPORT_SYMBOL(nf_ct_attach
);
638 EXPORT_SYMBOL(nf_getsockopt
);
639 EXPORT_SYMBOL(nf_hook_slow
);
640 EXPORT_SYMBOL(nf_hooks
);
641 EXPORT_SYMBOL(nf_register_hook
);
642 EXPORT_SYMBOL(nf_register_queue_handler
);
643 EXPORT_SYMBOL(nf_register_sockopt
);
644 EXPORT_SYMBOL(nf_reinject
);
645 EXPORT_SYMBOL(nf_setsockopt
);
646 EXPORT_SYMBOL(nf_unregister_hook
);
647 EXPORT_SYMBOL(nf_unregister_queue_handler
);
648 EXPORT_SYMBOL(nf_unregister_sockopt
);