dm thin: wake worker when discard is prepared
[linux-2.6.git] / net / netfilter / xt_recent.c
blob4635c9b0045981d862cb3b31a34ac9ab735eb85e
1 /*
2 * Copyright (c) 2006 Patrick McHardy <kaber@trash.net>
3 * Copyright © CC Computer Consultants GmbH, 2007 - 2008
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
9 * This is a replacement of the old ipt_recent module, which carried the
10 * following copyright notice:
12 * Author: Stephen Frost <sfrost@snowman.net>
13 * Copyright 2002-2003, Stephen Frost, 2.5.x port by laforge@netfilter.org
15 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
16 #include <linux/init.h>
17 #include <linux/ip.h>
18 #include <linux/ipv6.h>
19 #include <linux/module.h>
20 #include <linux/moduleparam.h>
21 #include <linux/proc_fs.h>
22 #include <linux/seq_file.h>
23 #include <linux/string.h>
24 #include <linux/ctype.h>
25 #include <linux/list.h>
26 #include <linux/random.h>
27 #include <linux/jhash.h>
28 #include <linux/bitops.h>
29 #include <linux/skbuff.h>
30 #include <linux/inet.h>
31 #include <linux/slab.h>
32 #include <net/net_namespace.h>
33 #include <net/netns/generic.h>
35 #include <linux/netfilter/x_tables.h>
36 #include <linux/netfilter/xt_recent.h>
38 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
39 MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
40 MODULE_DESCRIPTION("Xtables: \"recently-seen\" host matching");
41 MODULE_LICENSE("GPL");
42 MODULE_ALIAS("ipt_recent");
43 MODULE_ALIAS("ip6t_recent");
45 static unsigned int ip_list_tot = 100;
46 static unsigned int ip_pkt_list_tot = 20;
47 static unsigned int ip_list_hash_size = 0;
48 static unsigned int ip_list_perms = 0644;
49 static unsigned int ip_list_uid = 0;
50 static unsigned int ip_list_gid = 0;
51 module_param(ip_list_tot, uint, 0400);
52 module_param(ip_pkt_list_tot, uint, 0400);
53 module_param(ip_list_hash_size, uint, 0400);
54 module_param(ip_list_perms, uint, 0400);
55 module_param(ip_list_uid, uint, S_IRUGO | S_IWUSR);
56 module_param(ip_list_gid, uint, S_IRUGO | S_IWUSR);
57 MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list");
58 MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP address to remember (max. 255)");
59 MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs");
60 MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/xt_recent/* files");
61 MODULE_PARM_DESC(ip_list_uid, "default owner of /proc/net/xt_recent/* files");
62 MODULE_PARM_DESC(ip_list_gid, "default owning group of /proc/net/xt_recent/* files");
64 struct recent_entry {
65 struct list_head list;
66 struct list_head lru_list;
67 union nf_inet_addr addr;
68 u_int16_t family;
69 u_int8_t ttl;
70 u_int8_t index;
71 u_int16_t nstamps;
72 unsigned long stamps[0];
75 struct recent_table {
76 struct list_head list;
77 char name[XT_RECENT_NAME_LEN];
78 union nf_inet_addr mask;
79 unsigned int refcnt;
80 unsigned int entries;
81 struct list_head lru_list;
82 struct list_head iphash[0];
85 struct recent_net {
86 struct list_head tables;
87 #ifdef CONFIG_PROC_FS
88 struct proc_dir_entry *xt_recent;
89 #endif
92 static int recent_net_id;
93 static inline struct recent_net *recent_pernet(struct net *net)
95 return net_generic(net, recent_net_id);
98 static DEFINE_SPINLOCK(recent_lock);
99 static DEFINE_MUTEX(recent_mutex);
101 #ifdef CONFIG_PROC_FS
102 static const struct file_operations recent_old_fops, recent_mt_fops;
103 #endif
105 static u_int32_t hash_rnd __read_mostly;
106 static bool hash_rnd_inited __read_mostly;
108 static inline unsigned int recent_entry_hash4(const union nf_inet_addr *addr)
110 return jhash_1word((__force u32)addr->ip, hash_rnd) &
111 (ip_list_hash_size - 1);
114 static inline unsigned int recent_entry_hash6(const union nf_inet_addr *addr)
116 return jhash2((u32 *)addr->ip6, ARRAY_SIZE(addr->ip6), hash_rnd) &
117 (ip_list_hash_size - 1);
120 static struct recent_entry *
121 recent_entry_lookup(const struct recent_table *table,
122 const union nf_inet_addr *addrp, u_int16_t family,
123 u_int8_t ttl)
125 struct recent_entry *e;
126 unsigned int h;
128 if (family == NFPROTO_IPV4)
129 h = recent_entry_hash4(addrp);
130 else
131 h = recent_entry_hash6(addrp);
133 list_for_each_entry(e, &table->iphash[h], list)
134 if (e->family == family &&
135 memcmp(&e->addr, addrp, sizeof(e->addr)) == 0 &&
136 (ttl == e->ttl || ttl == 0 || e->ttl == 0))
137 return e;
138 return NULL;
141 static void recent_entry_remove(struct recent_table *t, struct recent_entry *e)
143 list_del(&e->list);
144 list_del(&e->lru_list);
145 kfree(e);
146 t->entries--;
150 * Drop entries with timestamps older then 'time'.
152 static void recent_entry_reap(struct recent_table *t, unsigned long time)
154 struct recent_entry *e;
157 * The head of the LRU list is always the oldest entry.
159 e = list_entry(t->lru_list.next, struct recent_entry, lru_list);
162 * The last time stamp is the most recent.
164 if (time_after(time, e->stamps[e->index-1]))
165 recent_entry_remove(t, e);
168 static struct recent_entry *
169 recent_entry_init(struct recent_table *t, const union nf_inet_addr *addr,
170 u_int16_t family, u_int8_t ttl)
172 struct recent_entry *e;
174 if (t->entries >= ip_list_tot) {
175 e = list_entry(t->lru_list.next, struct recent_entry, lru_list);
176 recent_entry_remove(t, e);
178 e = kmalloc(sizeof(*e) + sizeof(e->stamps[0]) * ip_pkt_list_tot,
179 GFP_ATOMIC);
180 if (e == NULL)
181 return NULL;
182 memcpy(&e->addr, addr, sizeof(e->addr));
183 e->ttl = ttl;
184 e->stamps[0] = jiffies;
185 e->nstamps = 1;
186 e->index = 1;
187 e->family = family;
188 if (family == NFPROTO_IPV4)
189 list_add_tail(&e->list, &t->iphash[recent_entry_hash4(addr)]);
190 else
191 list_add_tail(&e->list, &t->iphash[recent_entry_hash6(addr)]);
192 list_add_tail(&e->lru_list, &t->lru_list);
193 t->entries++;
194 return e;
197 static void recent_entry_update(struct recent_table *t, struct recent_entry *e)
199 e->index %= ip_pkt_list_tot;
200 e->stamps[e->index++] = jiffies;
201 if (e->index > e->nstamps)
202 e->nstamps = e->index;
203 list_move_tail(&e->lru_list, &t->lru_list);
206 static struct recent_table *recent_table_lookup(struct recent_net *recent_net,
207 const char *name)
209 struct recent_table *t;
211 list_for_each_entry(t, &recent_net->tables, list)
212 if (!strcmp(t->name, name))
213 return t;
214 return NULL;
217 static void recent_table_flush(struct recent_table *t)
219 struct recent_entry *e, *next;
220 unsigned int i;
222 for (i = 0; i < ip_list_hash_size; i++)
223 list_for_each_entry_safe(e, next, &t->iphash[i], list)
224 recent_entry_remove(t, e);
227 static bool
228 recent_mt(const struct sk_buff *skb, struct xt_action_param *par)
230 struct net *net = dev_net(par->in ? par->in : par->out);
231 struct recent_net *recent_net = recent_pernet(net);
232 const struct xt_recent_mtinfo_v1 *info = par->matchinfo;
233 struct recent_table *t;
234 struct recent_entry *e;
235 union nf_inet_addr addr = {}, addr_mask;
236 u_int8_t ttl;
237 bool ret = info->invert;
239 if (par->family == NFPROTO_IPV4) {
240 const struct iphdr *iph = ip_hdr(skb);
242 if (info->side == XT_RECENT_DEST)
243 addr.ip = iph->daddr;
244 else
245 addr.ip = iph->saddr;
247 ttl = iph->ttl;
248 } else {
249 const struct ipv6hdr *iph = ipv6_hdr(skb);
251 if (info->side == XT_RECENT_DEST)
252 memcpy(&addr.in6, &iph->daddr, sizeof(addr.in6));
253 else
254 memcpy(&addr.in6, &iph->saddr, sizeof(addr.in6));
256 ttl = iph->hop_limit;
259 /* use TTL as seen before forwarding */
260 if (par->out != NULL && skb->sk == NULL)
261 ttl++;
263 spin_lock_bh(&recent_lock);
264 t = recent_table_lookup(recent_net, info->name);
266 nf_inet_addr_mask(&addr, &addr_mask, &t->mask);
268 e = recent_entry_lookup(t, &addr_mask, par->family,
269 (info->check_set & XT_RECENT_TTL) ? ttl : 0);
270 if (e == NULL) {
271 if (!(info->check_set & XT_RECENT_SET))
272 goto out;
273 e = recent_entry_init(t, &addr_mask, par->family, ttl);
274 if (e == NULL)
275 par->hotdrop = true;
276 ret = !ret;
277 goto out;
280 if (info->check_set & XT_RECENT_SET)
281 ret = !ret;
282 else if (info->check_set & XT_RECENT_REMOVE) {
283 recent_entry_remove(t, e);
284 ret = !ret;
285 } else if (info->check_set & (XT_RECENT_CHECK | XT_RECENT_UPDATE)) {
286 unsigned long time = jiffies - info->seconds * HZ;
287 unsigned int i, hits = 0;
289 for (i = 0; i < e->nstamps; i++) {
290 if (info->seconds && time_after(time, e->stamps[i]))
291 continue;
292 if (!info->hit_count || ++hits >= info->hit_count) {
293 ret = !ret;
294 break;
298 /* info->seconds must be non-zero */
299 if (info->check_set & XT_RECENT_REAP)
300 recent_entry_reap(t, time);
303 if (info->check_set & XT_RECENT_SET ||
304 (info->check_set & XT_RECENT_UPDATE && ret)) {
305 recent_entry_update(t, e);
306 e->ttl = ttl;
308 out:
309 spin_unlock_bh(&recent_lock);
310 return ret;
313 static int recent_mt_check(const struct xt_mtchk_param *par,
314 const struct xt_recent_mtinfo_v1 *info)
316 struct recent_net *recent_net = recent_pernet(par->net);
317 struct recent_table *t;
318 #ifdef CONFIG_PROC_FS
319 struct proc_dir_entry *pde;
320 kuid_t uid;
321 kgid_t gid;
322 #endif
323 unsigned int i;
324 int ret = -EINVAL;
326 if (unlikely(!hash_rnd_inited)) {
327 get_random_bytes(&hash_rnd, sizeof(hash_rnd));
328 hash_rnd_inited = true;
330 if (info->check_set & ~XT_RECENT_VALID_FLAGS) {
331 pr_info("Unsupported user space flags (%08x)\n",
332 info->check_set);
333 return -EINVAL;
335 if (hweight8(info->check_set &
336 (XT_RECENT_SET | XT_RECENT_REMOVE |
337 XT_RECENT_CHECK | XT_RECENT_UPDATE)) != 1)
338 return -EINVAL;
339 if ((info->check_set & (XT_RECENT_SET | XT_RECENT_REMOVE)) &&
340 (info->seconds || info->hit_count ||
341 (info->check_set & XT_RECENT_MODIFIERS)))
342 return -EINVAL;
343 if ((info->check_set & XT_RECENT_REAP) && !info->seconds)
344 return -EINVAL;
345 if (info->hit_count > ip_pkt_list_tot) {
346 pr_info("hitcount (%u) is larger than "
347 "packets to be remembered (%u)\n",
348 info->hit_count, ip_pkt_list_tot);
349 return -EINVAL;
351 if (info->name[0] == '\0' ||
352 strnlen(info->name, XT_RECENT_NAME_LEN) == XT_RECENT_NAME_LEN)
353 return -EINVAL;
355 mutex_lock(&recent_mutex);
356 t = recent_table_lookup(recent_net, info->name);
357 if (t != NULL) {
358 t->refcnt++;
359 ret = 0;
360 goto out;
363 t = kzalloc(sizeof(*t) + sizeof(t->iphash[0]) * ip_list_hash_size,
364 GFP_KERNEL);
365 if (t == NULL) {
366 ret = -ENOMEM;
367 goto out;
369 t->refcnt = 1;
371 memcpy(&t->mask, &info->mask, sizeof(t->mask));
372 strcpy(t->name, info->name);
373 INIT_LIST_HEAD(&t->lru_list);
374 for (i = 0; i < ip_list_hash_size; i++)
375 INIT_LIST_HEAD(&t->iphash[i]);
376 #ifdef CONFIG_PROC_FS
377 uid = make_kuid(&init_user_ns, ip_list_uid);
378 gid = make_kgid(&init_user_ns, ip_list_gid);
379 if (!uid_valid(uid) || !gid_valid(gid)) {
380 kfree(t);
381 ret = -EINVAL;
382 goto out;
384 pde = proc_create_data(t->name, ip_list_perms, recent_net->xt_recent,
385 &recent_mt_fops, t);
386 if (pde == NULL) {
387 kfree(t);
388 ret = -ENOMEM;
389 goto out;
391 pde->uid = uid;
392 pde->gid = gid;
393 #endif
394 spin_lock_bh(&recent_lock);
395 list_add_tail(&t->list, &recent_net->tables);
396 spin_unlock_bh(&recent_lock);
397 ret = 0;
398 out:
399 mutex_unlock(&recent_mutex);
400 return ret;
403 static int recent_mt_check_v0(const struct xt_mtchk_param *par)
405 const struct xt_recent_mtinfo_v0 *info_v0 = par->matchinfo;
406 struct xt_recent_mtinfo_v1 info_v1;
408 /* Copy revision 0 structure to revision 1 */
409 memcpy(&info_v1, info_v0, sizeof(struct xt_recent_mtinfo));
410 /* Set default mask to ensure backward compatible behaviour */
411 memset(info_v1.mask.all, 0xFF, sizeof(info_v1.mask.all));
413 return recent_mt_check(par, &info_v1);
416 static int recent_mt_check_v1(const struct xt_mtchk_param *par)
418 return recent_mt_check(par, par->matchinfo);
421 static void recent_mt_destroy(const struct xt_mtdtor_param *par)
423 struct recent_net *recent_net = recent_pernet(par->net);
424 const struct xt_recent_mtinfo_v1 *info = par->matchinfo;
425 struct recent_table *t;
427 mutex_lock(&recent_mutex);
428 t = recent_table_lookup(recent_net, info->name);
429 if (--t->refcnt == 0) {
430 spin_lock_bh(&recent_lock);
431 list_del(&t->list);
432 spin_unlock_bh(&recent_lock);
433 #ifdef CONFIG_PROC_FS
434 remove_proc_entry(t->name, recent_net->xt_recent);
435 #endif
436 recent_table_flush(t);
437 kfree(t);
439 mutex_unlock(&recent_mutex);
442 #ifdef CONFIG_PROC_FS
443 struct recent_iter_state {
444 const struct recent_table *table;
445 unsigned int bucket;
448 static void *recent_seq_start(struct seq_file *seq, loff_t *pos)
449 __acquires(recent_lock)
451 struct recent_iter_state *st = seq->private;
452 const struct recent_table *t = st->table;
453 struct recent_entry *e;
454 loff_t p = *pos;
456 spin_lock_bh(&recent_lock);
458 for (st->bucket = 0; st->bucket < ip_list_hash_size; st->bucket++)
459 list_for_each_entry(e, &t->iphash[st->bucket], list)
460 if (p-- == 0)
461 return e;
462 return NULL;
465 static void *recent_seq_next(struct seq_file *seq, void *v, loff_t *pos)
467 struct recent_iter_state *st = seq->private;
468 const struct recent_table *t = st->table;
469 const struct recent_entry *e = v;
470 const struct list_head *head = e->list.next;
472 while (head == &t->iphash[st->bucket]) {
473 if (++st->bucket >= ip_list_hash_size)
474 return NULL;
475 head = t->iphash[st->bucket].next;
477 (*pos)++;
478 return list_entry(head, struct recent_entry, list);
481 static void recent_seq_stop(struct seq_file *s, void *v)
482 __releases(recent_lock)
484 spin_unlock_bh(&recent_lock);
487 static int recent_seq_show(struct seq_file *seq, void *v)
489 const struct recent_entry *e = v;
490 unsigned int i;
492 i = (e->index - 1) % ip_pkt_list_tot;
493 if (e->family == NFPROTO_IPV4)
494 seq_printf(seq, "src=%pI4 ttl: %u last_seen: %lu oldest_pkt: %u",
495 &e->addr.ip, e->ttl, e->stamps[i], e->index);
496 else
497 seq_printf(seq, "src=%pI6 ttl: %u last_seen: %lu oldest_pkt: %u",
498 &e->addr.in6, e->ttl, e->stamps[i], e->index);
499 for (i = 0; i < e->nstamps; i++)
500 seq_printf(seq, "%s %lu", i ? "," : "", e->stamps[i]);
501 seq_printf(seq, "\n");
502 return 0;
505 static const struct seq_operations recent_seq_ops = {
506 .start = recent_seq_start,
507 .next = recent_seq_next,
508 .stop = recent_seq_stop,
509 .show = recent_seq_show,
512 static int recent_seq_open(struct inode *inode, struct file *file)
514 struct proc_dir_entry *pde = PDE(inode);
515 struct recent_iter_state *st;
517 st = __seq_open_private(file, &recent_seq_ops, sizeof(*st));
518 if (st == NULL)
519 return -ENOMEM;
521 st->table = pde->data;
522 return 0;
525 static ssize_t
526 recent_mt_proc_write(struct file *file, const char __user *input,
527 size_t size, loff_t *loff)
529 const struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
530 struct recent_table *t = pde->data;
531 struct recent_entry *e;
532 char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:5afe:c0de")];
533 const char *c = buf;
534 union nf_inet_addr addr = {};
535 u_int16_t family;
536 bool add, succ;
538 if (size == 0)
539 return 0;
540 if (size > sizeof(buf))
541 size = sizeof(buf);
542 if (copy_from_user(buf, input, size) != 0)
543 return -EFAULT;
545 /* Strict protocol! */
546 if (*loff != 0)
547 return -ESPIPE;
548 switch (*c) {
549 case '/': /* flush table */
550 spin_lock_bh(&recent_lock);
551 recent_table_flush(t);
552 spin_unlock_bh(&recent_lock);
553 return size;
554 case '-': /* remove address */
555 add = false;
556 break;
557 case '+': /* add address */
558 add = true;
559 break;
560 default:
561 pr_info("Need \"+ip\", \"-ip\" or \"/\"\n");
562 return -EINVAL;
565 ++c;
566 --size;
567 if (strnchr(c, size, ':') != NULL) {
568 family = NFPROTO_IPV6;
569 succ = in6_pton(c, size, (void *)&addr, '\n', NULL);
570 } else {
571 family = NFPROTO_IPV4;
572 succ = in4_pton(c, size, (void *)&addr, '\n', NULL);
575 if (!succ) {
576 pr_info("illegal address written to procfs\n");
577 return -EINVAL;
580 spin_lock_bh(&recent_lock);
581 e = recent_entry_lookup(t, &addr, family, 0);
582 if (e == NULL) {
583 if (add)
584 recent_entry_init(t, &addr, family, 0);
585 } else {
586 if (add)
587 recent_entry_update(t, e);
588 else
589 recent_entry_remove(t, e);
591 spin_unlock_bh(&recent_lock);
592 /* Note we removed one above */
593 *loff += size + 1;
594 return size + 1;
597 static const struct file_operations recent_mt_fops = {
598 .open = recent_seq_open,
599 .read = seq_read,
600 .write = recent_mt_proc_write,
601 .release = seq_release_private,
602 .owner = THIS_MODULE,
603 .llseek = seq_lseek,
606 static int __net_init recent_proc_net_init(struct net *net)
608 struct recent_net *recent_net = recent_pernet(net);
610 recent_net->xt_recent = proc_mkdir("xt_recent", net->proc_net);
611 if (!recent_net->xt_recent)
612 return -ENOMEM;
613 return 0;
616 static void __net_exit recent_proc_net_exit(struct net *net)
618 proc_net_remove(net, "xt_recent");
620 #else
621 static inline int recent_proc_net_init(struct net *net)
623 return 0;
626 static inline void recent_proc_net_exit(struct net *net)
629 #endif /* CONFIG_PROC_FS */
631 static int __net_init recent_net_init(struct net *net)
633 struct recent_net *recent_net = recent_pernet(net);
635 INIT_LIST_HEAD(&recent_net->tables);
636 return recent_proc_net_init(net);
639 static void __net_exit recent_net_exit(struct net *net)
641 struct recent_net *recent_net = recent_pernet(net);
643 BUG_ON(!list_empty(&recent_net->tables));
644 recent_proc_net_exit(net);
647 static struct pernet_operations recent_net_ops = {
648 .init = recent_net_init,
649 .exit = recent_net_exit,
650 .id = &recent_net_id,
651 .size = sizeof(struct recent_net),
654 static struct xt_match recent_mt_reg[] __read_mostly = {
656 .name = "recent",
657 .revision = 0,
658 .family = NFPROTO_IPV4,
659 .match = recent_mt,
660 .matchsize = sizeof(struct xt_recent_mtinfo),
661 .checkentry = recent_mt_check_v0,
662 .destroy = recent_mt_destroy,
663 .me = THIS_MODULE,
666 .name = "recent",
667 .revision = 0,
668 .family = NFPROTO_IPV6,
669 .match = recent_mt,
670 .matchsize = sizeof(struct xt_recent_mtinfo),
671 .checkentry = recent_mt_check_v0,
672 .destroy = recent_mt_destroy,
673 .me = THIS_MODULE,
676 .name = "recent",
677 .revision = 1,
678 .family = NFPROTO_IPV4,
679 .match = recent_mt,
680 .matchsize = sizeof(struct xt_recent_mtinfo_v1),
681 .checkentry = recent_mt_check_v1,
682 .destroy = recent_mt_destroy,
683 .me = THIS_MODULE,
686 .name = "recent",
687 .revision = 1,
688 .family = NFPROTO_IPV6,
689 .match = recent_mt,
690 .matchsize = sizeof(struct xt_recent_mtinfo_v1),
691 .checkentry = recent_mt_check_v1,
692 .destroy = recent_mt_destroy,
693 .me = THIS_MODULE,
697 static int __init recent_mt_init(void)
699 int err;
701 if (!ip_list_tot || !ip_pkt_list_tot || ip_pkt_list_tot > 255)
702 return -EINVAL;
703 ip_list_hash_size = 1 << fls(ip_list_tot);
705 err = register_pernet_subsys(&recent_net_ops);
706 if (err)
707 return err;
708 err = xt_register_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg));
709 if (err)
710 unregister_pernet_subsys(&recent_net_ops);
711 return err;
714 static void __exit recent_mt_exit(void)
716 xt_unregister_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg));
717 unregister_pernet_subsys(&recent_net_ops);
720 module_init(recent_mt_init);
721 module_exit(recent_mt_exit);