dm thin: wake worker when discard is prepared
[linux-2.6.git] / net / netfilter / nf_queue.c
blob8d2cf9ec37a850951648640728ab1ad6d8f53e6d
1 #include <linux/kernel.h>
2 #include <linux/slab.h>
3 #include <linux/init.h>
4 #include <linux/module.h>
5 #include <linux/proc_fs.h>
6 #include <linux/skbuff.h>
7 #include <linux/netfilter.h>
8 #include <linux/seq_file.h>
9 #include <linux/rcupdate.h>
10 #include <net/protocol.h>
11 #include <net/netfilter/nf_queue.h>
12 #include <net/dst.h>
14 #include "nf_internals.h"
17 * A queue handler may be registered for each protocol. Each is protected by
18 * long term mutex. The handler must provide an an outfn() to accept packets
19 * for queueing and must reinject all packets it receives, no matter what.
21 static const struct nf_queue_handler __rcu *queue_handler[NFPROTO_NUMPROTO] __read_mostly;
23 static DEFINE_MUTEX(queue_handler_mutex);
25 /* return EBUSY when somebody else is registered, return EEXIST if the
26 * same handler is registered, return 0 in case of success. */
27 int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
29 int ret;
30 const struct nf_queue_handler *old;
32 if (pf >= ARRAY_SIZE(queue_handler))
33 return -EINVAL;
35 mutex_lock(&queue_handler_mutex);
36 old = rcu_dereference_protected(queue_handler[pf],
37 lockdep_is_held(&queue_handler_mutex));
38 if (old == qh)
39 ret = -EEXIST;
40 else if (old)
41 ret = -EBUSY;
42 else {
43 rcu_assign_pointer(queue_handler[pf], qh);
44 ret = 0;
46 mutex_unlock(&queue_handler_mutex);
48 return ret;
50 EXPORT_SYMBOL(nf_register_queue_handler);
52 /* The caller must flush their queue before this */
53 int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
55 const struct nf_queue_handler *old;
57 if (pf >= ARRAY_SIZE(queue_handler))
58 return -EINVAL;
60 mutex_lock(&queue_handler_mutex);
61 old = rcu_dereference_protected(queue_handler[pf],
62 lockdep_is_held(&queue_handler_mutex));
63 if (old && old != qh) {
64 mutex_unlock(&queue_handler_mutex);
65 return -EINVAL;
68 RCU_INIT_POINTER(queue_handler[pf], NULL);
69 mutex_unlock(&queue_handler_mutex);
71 synchronize_rcu();
73 return 0;
75 EXPORT_SYMBOL(nf_unregister_queue_handler);
77 void nf_unregister_queue_handlers(const struct nf_queue_handler *qh)
79 u_int8_t pf;
81 mutex_lock(&queue_handler_mutex);
82 for (pf = 0; pf < ARRAY_SIZE(queue_handler); pf++) {
83 if (rcu_dereference_protected(
84 queue_handler[pf],
85 lockdep_is_held(&queue_handler_mutex)
86 ) == qh)
87 RCU_INIT_POINTER(queue_handler[pf], NULL);
89 mutex_unlock(&queue_handler_mutex);
91 synchronize_rcu();
93 EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers);
95 static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
97 /* Release those devices we held, or Alexey will kill me. */
98 if (entry->indev)
99 dev_put(entry->indev);
100 if (entry->outdev)
101 dev_put(entry->outdev);
102 #ifdef CONFIG_BRIDGE_NETFILTER
103 if (entry->skb->nf_bridge) {
104 struct nf_bridge_info *nf_bridge = entry->skb->nf_bridge;
106 if (nf_bridge->physindev)
107 dev_put(nf_bridge->physindev);
108 if (nf_bridge->physoutdev)
109 dev_put(nf_bridge->physoutdev);
111 #endif
112 /* Drop reference to owner of hook which queued us. */
113 module_put(entry->elem->owner);
117 * Any packet that leaves via this function must come back
118 * through nf_reinject().
120 static int __nf_queue(struct sk_buff *skb,
121 struct nf_hook_ops *elem,
122 u_int8_t pf, unsigned int hook,
123 struct net_device *indev,
124 struct net_device *outdev,
125 int (*okfn)(struct sk_buff *),
126 unsigned int queuenum)
128 int status = -ENOENT;
129 struct nf_queue_entry *entry = NULL;
130 #ifdef CONFIG_BRIDGE_NETFILTER
131 struct net_device *physindev;
132 struct net_device *physoutdev;
133 #endif
134 const struct nf_afinfo *afinfo;
135 const struct nf_queue_handler *qh;
137 /* QUEUE == DROP if no one is waiting, to be safe. */
138 rcu_read_lock();
140 qh = rcu_dereference(queue_handler[pf]);
141 if (!qh) {
142 status = -ESRCH;
143 goto err_unlock;
146 afinfo = nf_get_afinfo(pf);
147 if (!afinfo)
148 goto err_unlock;
150 entry = kmalloc(sizeof(*entry) + afinfo->route_key_size, GFP_ATOMIC);
151 if (!entry) {
152 status = -ENOMEM;
153 goto err_unlock;
156 *entry = (struct nf_queue_entry) {
157 .skb = skb,
158 .elem = elem,
159 .pf = pf,
160 .hook = hook,
161 .indev = indev,
162 .outdev = outdev,
163 .okfn = okfn,
166 /* If it's going away, ignore hook. */
167 if (!try_module_get(entry->elem->owner)) {
168 status = -ECANCELED;
169 goto err_unlock;
171 /* Bump dev refs so they don't vanish while packet is out */
172 if (indev)
173 dev_hold(indev);
174 if (outdev)
175 dev_hold(outdev);
176 #ifdef CONFIG_BRIDGE_NETFILTER
177 if (skb->nf_bridge) {
178 physindev = skb->nf_bridge->physindev;
179 if (physindev)
180 dev_hold(physindev);
181 physoutdev = skb->nf_bridge->physoutdev;
182 if (physoutdev)
183 dev_hold(physoutdev);
185 #endif
186 skb_dst_force(skb);
187 afinfo->saveroute(skb, entry);
188 status = qh->outfn(entry, queuenum);
190 rcu_read_unlock();
192 if (status < 0) {
193 nf_queue_entry_release_refs(entry);
194 goto err;
197 return 0;
199 err_unlock:
200 rcu_read_unlock();
201 err:
202 kfree(entry);
203 return status;
206 #ifdef CONFIG_BRIDGE_NETFILTER
207 /* When called from bridge netfilter, skb->data must point to MAC header
208 * before calling skb_gso_segment(). Else, original MAC header is lost
209 * and segmented skbs will be sent to wrong destination.
211 static void nf_bridge_adjust_skb_data(struct sk_buff *skb)
213 if (skb->nf_bridge)
214 __skb_push(skb, skb->network_header - skb->mac_header);
217 static void nf_bridge_adjust_segmented_data(struct sk_buff *skb)
219 if (skb->nf_bridge)
220 __skb_pull(skb, skb->network_header - skb->mac_header);
222 #else
223 #define nf_bridge_adjust_skb_data(s) do {} while (0)
224 #define nf_bridge_adjust_segmented_data(s) do {} while (0)
225 #endif
227 int nf_queue(struct sk_buff *skb,
228 struct nf_hook_ops *elem,
229 u_int8_t pf, unsigned int hook,
230 struct net_device *indev,
231 struct net_device *outdev,
232 int (*okfn)(struct sk_buff *),
233 unsigned int queuenum)
235 struct sk_buff *segs;
236 int err = -EINVAL;
237 unsigned int queued;
239 if (!skb_is_gso(skb))
240 return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
241 queuenum);
243 switch (pf) {
244 case NFPROTO_IPV4:
245 skb->protocol = htons(ETH_P_IP);
246 break;
247 case NFPROTO_IPV6:
248 skb->protocol = htons(ETH_P_IPV6);
249 break;
252 nf_bridge_adjust_skb_data(skb);
253 segs = skb_gso_segment(skb, 0);
254 /* Does not use PTR_ERR to limit the number of error codes that can be
255 * returned by nf_queue. For instance, callers rely on -ECANCELED to mean
256 * 'ignore this hook'.
258 if (IS_ERR(segs))
259 goto out_err;
260 queued = 0;
261 err = 0;
262 do {
263 struct sk_buff *nskb = segs->next;
265 segs->next = NULL;
266 if (err == 0) {
267 nf_bridge_adjust_segmented_data(segs);
268 err = __nf_queue(segs, elem, pf, hook, indev,
269 outdev, okfn, queuenum);
271 if (err == 0)
272 queued++;
273 else
274 kfree_skb(segs);
275 segs = nskb;
276 } while (segs);
278 if (queued) {
279 kfree_skb(skb);
280 return 0;
282 out_err:
283 nf_bridge_adjust_segmented_data(skb);
284 return err;
287 void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
289 struct sk_buff *skb = entry->skb;
290 struct nf_hook_ops *elem = entry->elem;
291 const struct nf_afinfo *afinfo;
292 int err;
294 rcu_read_lock();
296 nf_queue_entry_release_refs(entry);
298 /* Continue traversal iff userspace said ok... */
299 if (verdict == NF_REPEAT) {
300 elem = list_entry(elem->list.prev, struct nf_hook_ops, list);
301 verdict = NF_ACCEPT;
304 if (verdict == NF_ACCEPT) {
305 afinfo = nf_get_afinfo(entry->pf);
306 if (!afinfo || afinfo->reroute(skb, entry) < 0)
307 verdict = NF_DROP;
310 if (verdict == NF_ACCEPT) {
311 next_hook:
312 verdict = nf_iterate(&nf_hooks[entry->pf][entry->hook],
313 skb, entry->hook,
314 entry->indev, entry->outdev, &elem,
315 entry->okfn, INT_MIN);
318 switch (verdict & NF_VERDICT_MASK) {
319 case NF_ACCEPT:
320 case NF_STOP:
321 local_bh_disable();
322 entry->okfn(skb);
323 local_bh_enable();
324 break;
325 case NF_QUEUE:
326 err = __nf_queue(skb, elem, entry->pf, entry->hook,
327 entry->indev, entry->outdev, entry->okfn,
328 verdict >> NF_VERDICT_QBITS);
329 if (err < 0) {
330 if (err == -ECANCELED)
331 goto next_hook;
332 if (err == -ESRCH &&
333 (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
334 goto next_hook;
335 kfree_skb(skb);
337 break;
338 case NF_STOLEN:
339 break;
340 default:
341 kfree_skb(skb);
343 rcu_read_unlock();
344 kfree(entry);
346 EXPORT_SYMBOL(nf_reinject);
348 #ifdef CONFIG_PROC_FS
349 static void *seq_start(struct seq_file *seq, loff_t *pos)
351 if (*pos >= ARRAY_SIZE(queue_handler))
352 return NULL;
354 return pos;
357 static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
359 (*pos)++;
361 if (*pos >= ARRAY_SIZE(queue_handler))
362 return NULL;
364 return pos;
367 static void seq_stop(struct seq_file *s, void *v)
372 static int seq_show(struct seq_file *s, void *v)
374 int ret;
375 loff_t *pos = v;
376 const struct nf_queue_handler *qh;
378 rcu_read_lock();
379 qh = rcu_dereference(queue_handler[*pos]);
380 if (!qh)
381 ret = seq_printf(s, "%2lld NONE\n", *pos);
382 else
383 ret = seq_printf(s, "%2lld %s\n", *pos, qh->name);
384 rcu_read_unlock();
386 return ret;
389 static const struct seq_operations nfqueue_seq_ops = {
390 .start = seq_start,
391 .next = seq_next,
392 .stop = seq_stop,
393 .show = seq_show,
396 static int nfqueue_open(struct inode *inode, struct file *file)
398 return seq_open(file, &nfqueue_seq_ops);
401 static const struct file_operations nfqueue_file_ops = {
402 .owner = THIS_MODULE,
403 .open = nfqueue_open,
404 .read = seq_read,
405 .llseek = seq_lseek,
406 .release = seq_release,
408 #endif /* PROC_FS */
411 int __init netfilter_queue_init(void)
413 #ifdef CONFIG_PROC_FS
414 if (!proc_create("nf_queue", S_IRUGO,
415 proc_net_netfilter, &nfqueue_file_ops))
416 return -1;
417 #endif
418 return 0;