Import 2.3.10pre5
[davej-history.git] / net / ipv4 / ip_masq_mfw.c
blob8c428a69e051576b8e5a80aa10a88c1d56ba9233
1 /*
2 * IP_MASQ_MARKFW masquerading module
4 * Does (reverse-masq) forwarding based on skb->fwmark value
6 * $Id: ip_masq_mfw.c,v 1.5 1999/06/29 12:35:49 davem Exp $
8 * Author: Juan Jose Ciarlante <jjciarla@raiz.uncu.edu.ar>
9 * based on Steven Clarke's portfw
11 * Fixes:
12 * JuanJo Ciarlante: added u-space sched support
13 * JuanJo Ciarlante: if rport==0, use packet dest port *grin*
14 * JuanJo Ciarlante: fixed tcp syn&&!ack creation
18 #include <linux/config.h>
19 #include <linux/module.h>
20 #include <linux/types.h>
21 #include <linux/kernel.h>
22 #include <linux/errno.h>
23 #include <linux/list.h>
24 #include <net/ip.h>
25 #include <linux/ip_fw.h>
26 #include <linux/ip_masq.h>
27 #include <net/ip_masq.h>
28 #include <net/ip_masq_mod.h>
29 #include <linux/proc_fs.h>
30 #include <linux/init.h>
31 #include <asm/softirq.h>
32 #include <asm/spinlock.h>
33 #include <asm/atomic.h>
35 static struct ip_masq_mod *mmod_self = NULL;
36 #ifdef CONFIG_IP_MASQ_DEBUG
37 static int debug=0;
38 MODULE_PARM(debug, "i");
39 #endif
42 * Lists structure:
43 * There is a "main" linked list with entries hashed
44 * by fwmark value (struct ip_masq_mfw, the "m-entries").
46 * Each of this m-entry holds a double linked list
47 * of "forward-to" hosts (struct ip_masq_mfw_host, the "m.host"),
48 * the round-robin scheduling takes place by rotating m.host entries
49 * "inside" its m-entry.
53 * Each forwarded host (addr:port) is stored here
55 struct ip_masq_mfw_host {
56 struct list_head list;
57 __u32 addr;
58 __u16 port;
59 __u16 pad0;
60 __u32 fwmark;
61 int pref;
62 atomic_t pref_cnt;
65 #define IP_MASQ_MFW_HSIZE 16
67 * This entries are indexed by fwmark,
68 * they hold a list of forwarded addr:port
69 */
71 struct ip_masq_mfw {
72 struct ip_masq_mfw *next; /* linked list */
73 __u32 fwmark; /* key: firewall mark */
74 struct list_head hosts; /* list of forward-to hosts */
75 atomic_t nhosts; /* number of "" */
76 rwlock_t lock;
80 static DECLARE_MUTEX(mfw_sema);
81 static rwlock_t mfw_lock = RW_LOCK_UNLOCKED;
83 static struct ip_masq_mfw *ip_masq_mfw_table[IP_MASQ_MFW_HSIZE];
85 static __inline__ int mfw_hash_val(int fwmark)
87 return fwmark & 0x0f;
91 * Get m-entry by "fwmark"
92 * Caller must lock tables.
95 static struct ip_masq_mfw *__mfw_get(int fwmark)
97 struct ip_masq_mfw* mfw;
98 int hash = mfw_hash_val(fwmark);
100 for (mfw=ip_masq_mfw_table[hash];mfw;mfw=mfw->next) {
101 if (mfw->fwmark==fwmark) {
102 goto out;
105 out:
106 return mfw;
110 * Links m-entry.
111 * Caller should have checked if already present for same fwmark
113 * Caller must lock tables.
115 static int __mfw_add(struct ip_masq_mfw *mfw)
117 int fwmark = mfw->fwmark;
118 int hash = mfw_hash_val(fwmark);
120 mfw->next = ip_masq_mfw_table[hash];
121 ip_masq_mfw_table[hash] = mfw;
122 ip_masq_mod_inc_nent(mmod_self);
124 return 0;
128 * Creates a m-entry (doesn't link it)
131 static struct ip_masq_mfw * mfw_new(int fwmark)
133 struct ip_masq_mfw *mfw;
135 mfw = kmalloc(sizeof(*mfw), GFP_KERNEL);
136 if (mfw == NULL)
137 goto out;
139 MOD_INC_USE_COUNT;
140 memset(mfw, 0, sizeof(*mfw));
141 mfw->fwmark = fwmark;
142 mfw->lock = (rwlock_t) RW_LOCK_UNLOCKED;
144 INIT_LIST_HEAD(&mfw->hosts);
145 out:
146 return mfw;
149 static void mfw_host_to_user(struct ip_masq_mfw_host *h, struct ip_mfw_user *mu)
151 mu->raddr = h->addr;
152 mu->rport = h->port;
153 mu->fwmark = h->fwmark;
154 mu->pref = h->pref;
158 * Creates a m.host (doesn't link it in a m-entry)
160 static struct ip_masq_mfw_host * mfw_host_new(struct ip_mfw_user *mu)
162 struct ip_masq_mfw_host * mfw_host;
163 mfw_host = kmalloc(sizeof (*mfw_host), GFP_KERNEL);
164 if (!mfw_host)
165 return NULL;
167 MOD_INC_USE_COUNT;
168 memset(mfw_host, 0, sizeof(*mfw_host));
169 mfw_host->addr = mu->raddr;
170 mfw_host->port = mu->rport;
171 mfw_host->fwmark = mu->fwmark;
172 mfw_host->pref = mu->pref;
173 atomic_set(&mfw_host->pref_cnt, mu->pref);
175 return mfw_host;
179 * Create AND link m.host to m-entry.
180 * It locks m.lock.
182 static int mfw_addhost(struct ip_masq_mfw *mfw, struct ip_mfw_user *mu, int attail)
184 struct ip_masq_mfw_host *mfw_host;
186 mfw_host = mfw_host_new(mu);
187 if (!mfw_host)
188 return -ENOMEM;
190 write_lock_bh(&mfw->lock);
191 list_add(&mfw_host->list, attail? mfw->hosts.prev : &mfw->hosts);
192 atomic_inc(&mfw->nhosts);
193 write_unlock_bh(&mfw->lock);
195 return 0;
199 * Unlink AND destroy m.host(s) from m-entry.
200 * Wildcard (nul host or addr) ok.
201 * It uses m.lock.
203 static int mfw_delhost(struct ip_masq_mfw *mfw, struct ip_mfw_user *mu)
206 struct list_head *l,*e;
207 struct ip_masq_mfw_host *h;
208 int n_del = 0;
209 l = &mfw->hosts;
211 write_lock_bh(&mfw->lock);
212 for (e=l->next; e!=l; e=e->next)
214 h = list_entry(e, struct ip_masq_mfw_host, list);
215 if ((!mu->raddr || h->addr == mu->raddr) &&
216 (!mu->rport || h->port == mu->rport)) {
217 /* HIT */
218 atomic_dec(&mfw->nhosts);
219 list_del(&h->list);
220 kfree_s(h, sizeof(*h));
221 MOD_DEC_USE_COUNT;
222 n_del++;
226 write_unlock_bh(&mfw->lock);
227 return n_del? 0 : -ESRCH;
231 * Changes m.host parameters
232 * Wildcards ok
234 * Caller must lock tables.
236 static int __mfw_edithost(struct ip_masq_mfw *mfw, struct ip_mfw_user *mu)
239 struct list_head *l,*e;
240 struct ip_masq_mfw_host *h;
241 int n_edit = 0;
242 l = &mfw->hosts;
244 for (e=l->next; e!=l; e=e->next)
246 h = list_entry(e, struct ip_masq_mfw_host, list);
247 if ((!mu->raddr || h->addr == mu->raddr) &&
248 (!mu->rport || h->port == mu->rport)) {
249 /* HIT */
250 h->pref = mu->pref;
251 atomic_set(&h->pref_cnt, mu->pref);
252 n_edit++;
256 return n_edit? 0 : -ESRCH;
260 * Destroys m-entry.
261 * Caller must have checked that it doesn't hold any m.host(s)
263 static void mfw_destroy(struct ip_masq_mfw *mfw)
265 kfree_s(mfw, sizeof(*mfw));
266 MOD_DEC_USE_COUNT;
270 * Unlink m-entry.
272 * Caller must lock tables.
274 static int __mfw_del(struct ip_masq_mfw *mfw)
276 struct ip_masq_mfw **mfw_p;
277 int ret = -EINVAL;
280 for(mfw_p=&ip_masq_mfw_table[mfw_hash_val(mfw->fwmark)];
281 *mfw_p;
282 mfw_p = &((*mfw_p)->next))
284 if (mfw==(*mfw_p)) {
285 *mfw_p = mfw->next;
286 ip_masq_mod_dec_nent(mmod_self);
287 ret = 0;
288 goto out;
291 out:
292 return ret;
296 * Crude m.host scheduler
297 * This interface could be exported to allow playing with
298 * other sched policies.
300 * Caller must lock m-entry.
302 static struct ip_masq_mfw_host * __mfw_sched(struct ip_masq_mfw *mfw, int force)
304 struct ip_masq_mfw_host *h = NULL;
306 if (atomic_read(&mfw->nhosts) == 0)
307 goto out;
310 * Here resides actual sched policy:
311 * When pref_cnt touches 0, entry gets shifted to tail and
312 * its pref_cnt reloaded from h->pref (actual value
313 * passed from u-space).
315 * Exception is pref==0: avoid scheduling.
318 h = list_entry(mfw->hosts.next, struct ip_masq_mfw_host, list);
320 if (atomic_read(&mfw->nhosts) <= 1)
321 goto out;
323 if ((h->pref && atomic_dec_and_test(&h->pref_cnt)) || force) {
324 atomic_set(&h->pref_cnt, h->pref);
325 list_del(&h->list);
326 list_add(&h->list, mfw->hosts.prev);
328 out:
329 return h;
333 * Main lookup routine.
334 * HITs fwmark and schedules m.host entries if required
336 static struct ip_masq_mfw_host * mfw_lookup(int fwmark)
338 struct ip_masq_mfw *mfw;
339 struct ip_masq_mfw_host *h = NULL;
341 read_lock(&mfw_lock);
342 mfw = __mfw_get(fwmark);
344 if (mfw) {
345 write_lock(&mfw->lock);
346 h = __mfw_sched(mfw, 0);
347 write_unlock(&mfw->lock);
350 read_unlock(&mfw_lock);
351 return h;
354 #ifdef CONFIG_PROC_FS
355 static int mfw_procinfo(char *buffer, char **start, off_t offset,
356 int length, int dummy)
358 struct ip_masq_mfw *mfw;
359 struct ip_masq_mfw_host *h;
360 struct list_head *l,*e;
361 off_t pos=0, begin;
362 char temp[129];
363 int idx = 0;
364 int len=0;
366 MOD_INC_USE_COUNT;
368 IP_MASQ_DEBUG(1-debug, "Entered mfw_info\n");
370 if (offset < 64)
372 sprintf(temp, "FwMark > RAddr RPort PrCnt Pref");
373 len = sprintf(buffer, "%-63s\n", temp);
375 pos = 64;
377 for(idx = 0; idx < IP_MASQ_MFW_HSIZE; idx++)
379 read_lock(&mfw_lock);
380 for(mfw = ip_masq_mfw_table[idx]; mfw ; mfw = mfw->next)
382 read_lock_bh(&mfw->lock);
383 l=&mfw->hosts;
385 for(e=l->next;l!=e;e=e->next) {
386 h = list_entry(e, struct ip_masq_mfw_host, list);
387 pos += 64;
388 if (pos <= offset) {
389 len = 0;
390 continue;
393 sprintf(temp,"0x%x > %08lX %5u %5d %5d",
394 h->fwmark,
395 ntohl(h->addr), ntohs(h->port),
396 atomic_read(&h->pref_cnt), h->pref);
397 len += sprintf(buffer+len, "%-63s\n", temp);
399 if(len >= length) {
400 read_unlock_bh(&mfw->lock);
401 read_unlock(&mfw_lock);
402 goto done;
405 read_unlock_bh(&mfw->lock);
407 read_unlock(&mfw_lock);
410 done:
412 if (len) {
413 begin = len - (pos - offset);
414 *start = buffer + begin;
415 len -= begin;
417 if(len>length)
418 len = length;
419 MOD_DEC_USE_COUNT;
420 return len;
422 static struct proc_dir_entry mfw_proc_entry = {
423 /* 0, 0, NULL", */
424 0, 3, "mfw",
425 S_IFREG | S_IRUGO, 1, 0, 0,
426 0, &proc_net_inode_operations,
427 mfw_procinfo
430 #define proc_ent &mfw_proc_entry
431 #else /* !CONFIG_PROC_FS */
433 #define proc_ent NULL
434 #endif
437 static void mfw_flush(void)
439 struct ip_masq_mfw *mfw, *local_table[IP_MASQ_MFW_HSIZE];
440 struct ip_masq_mfw_host *h;
441 struct ip_masq_mfw *mfw_next;
442 int idx;
443 struct list_head *l,*e;
445 write_lock_bh(&mfw_lock);
446 memcpy(local_table, ip_masq_mfw_table, sizeof ip_masq_mfw_table);
447 memset(ip_masq_mfw_table, 0, sizeof ip_masq_mfw_table);
448 write_unlock_bh(&mfw_lock);
451 * For every hash table row ...
453 for(idx=0;idx<IP_MASQ_MFW_HSIZE;idx++) {
456 * For every m-entry in row ...
458 for(mfw=local_table[idx];mfw;mfw=mfw_next) {
460 * For every m.host in m-entry ...
462 l=&mfw->hosts;
463 while((e=l->next) != l) {
464 h = list_entry(e, struct ip_masq_mfw_host, list);
465 atomic_dec(&mfw->nhosts);
466 list_del(&h->list);
467 kfree_s(h, sizeof(*h));
468 MOD_DEC_USE_COUNT;
471 if (atomic_read(&mfw->nhosts)) {
472 IP_MASQ_ERR("mfw_flush(): after flushing row nhosts=%d\n",
473 atomic_read(&mfw->nhosts));
475 mfw_next = mfw->next;
476 kfree_s(mfw, sizeof(*mfw));
477 MOD_DEC_USE_COUNT;
478 ip_masq_mod_dec_nent(mmod_self);
484 * User space control entry point
486 static int mfw_ctl(int optname, struct ip_masq_ctl *mctl, int optlen)
488 struct ip_mfw_user *mu = &mctl->u.mfw_user;
489 struct ip_masq_mfw *mfw;
490 int ret = EINVAL;
491 int arglen = optlen - IP_MASQ_CTL_BSIZE;
492 int cmd;
495 IP_MASQ_DEBUG(1-debug, "ip_masq_user_ctl(len=%d/%d|%d/%d)\n",
496 arglen,
497 sizeof (*mu),
498 optlen,
499 sizeof (*mctl));
502 * checks ...
504 if (arglen != sizeof(*mu) && optlen != sizeof(*mctl))
505 return -EINVAL;
508 * Don't trust the lusers - plenty of error checking!
510 cmd = mctl->m_cmd;
511 IP_MASQ_DEBUG(1-debug, "ip_masq_mfw_ctl(cmd=%d, fwmark=%d)\n",
512 cmd, mu->fwmark);
515 switch(cmd) {
516 case IP_MASQ_CMD_NONE:
517 return 0;
518 case IP_MASQ_CMD_FLUSH:
519 break;
520 case IP_MASQ_CMD_ADD:
521 case IP_MASQ_CMD_INSERT:
522 case IP_MASQ_CMD_SET:
523 if (mu->fwmark == 0) {
524 IP_MASQ_DEBUG(1-debug, "invalid fwmark==0\n");
525 return -EINVAL;
527 if (mu->pref < 0) {
528 IP_MASQ_DEBUG(1-debug, "invalid pref==%d\n",
529 mu->pref);
530 return -EINVAL;
532 break;
536 ret = -EINVAL;
538 switch(cmd) {
539 case IP_MASQ_CMD_ADD:
540 case IP_MASQ_CMD_INSERT:
541 if (!mu->raddr) {
542 IP_MASQ_DEBUG(0-debug, "ip_masq_mfw_ctl(ADD): invalid redirect 0x%x:%d\n",
543 mu->raddr, mu->rport);
544 goto out;
548 * Cannot just use mfw_lock because below
549 * are allocations that can sleep; so
550 * to assure "new entry" atomic creation
551 * I use a semaphore.
554 down(&mfw_sema);
556 read_lock(&mfw_lock);
557 mfw = __mfw_get(mu->fwmark);
558 read_unlock(&mfw_lock);
561 * If first host, create m-entry
563 if (mfw == NULL) {
564 mfw = mfw_new(mu->fwmark);
565 if (mfw == NULL)
566 ret = -ENOMEM;
569 if (mfw) {
571 * Put m.host in m-entry.
573 ret = mfw_addhost(mfw, mu, cmd == IP_MASQ_CMD_ADD);
576 * If first host, link m-entry to hash table.
577 * Already protected by global lock.
579 if (ret == 0 && atomic_read(&mfw->nhosts) == 1) {
580 write_lock_bh(&mfw_lock);
581 __mfw_add(mfw);
582 write_unlock_bh(&mfw_lock);
584 if (atomic_read(&mfw->nhosts) == 0) {
585 mfw_destroy(mfw);
589 up(&mfw_sema);
591 break;
593 case IP_MASQ_CMD_DEL:
594 down(&mfw_sema);
596 read_lock(&mfw_lock);
597 mfw = __mfw_get(mu->fwmark);
598 read_unlock(&mfw_lock);
600 if (mfw) {
601 ret = mfw_delhost(mfw, mu);
604 * Last lease will free
605 * XXX check logic XXX
607 if (atomic_read(&mfw->nhosts) == 0) {
608 write_lock_bh(&mfw_lock);
609 __mfw_del(mfw);
610 write_unlock_bh(&mfw_lock);
611 mfw_destroy(mfw);
613 } else
614 ret = -ESRCH;
616 up(&mfw_sema);
617 break;
618 case IP_MASQ_CMD_FLUSH:
620 down(&mfw_sema);
621 mfw_flush();
622 up(&mfw_sema);
623 ret = 0;
624 break;
625 case IP_MASQ_CMD_SET:
627 * No need to semaphorize here, main list is not
628 * modified.
630 read_lock(&mfw_lock);
632 mfw = __mfw_get(mu->fwmark);
633 if (mfw) {
634 write_lock_bh(&mfw->lock);
636 if (mu->flags & IP_MASQ_MFW_SCHED) {
637 struct ip_masq_mfw_host *h;
638 if ((h=__mfw_sched(mfw, 1))) {
639 mfw_host_to_user(h, mu);
640 ret = 0;
642 } else {
643 ret = __mfw_edithost(mfw, mu);
646 write_unlock_bh(&mfw->lock);
649 read_unlock(&mfw_lock);
650 break;
652 out:
654 return ret;
658 * Module stubs called from ip_masq core module
662 * Input rule stub, called very early for each incoming packet,
663 * to see if this module has "interest" in packet.
665 static int mfw_in_rule(const struct sk_buff *skb, const struct iphdr *iph)
667 int val;
668 read_lock(&mfw_lock);
669 val = ( __mfw_get(skb->fwmark) != 0);
670 read_unlock(&mfw_lock);
671 return val;
675 * Input-create stub, called to allow "custom" masq creation
677 static struct ip_masq * mfw_in_create(const struct sk_buff *skb, const struct iphdr *iph, __u32 maddr)
679 union ip_masq_tphdr tph;
680 struct ip_masq *ms = NULL;
681 struct ip_masq_mfw_host *h = NULL;
683 tph.raw = (char*) iph + iph->ihl * 4;
685 switch (iph->protocol) {
686 case IPPROTO_TCP:
688 * Only open TCP tunnel if SYN+!ACK packet
690 if (!tph.th->syn && tph.th->ack)
691 return NULL;
692 case IPPROTO_UDP:
693 break;
694 default:
695 return NULL;
699 * If no entry exists in the masquerading table
700 * and the port is involved
701 * in port forwarding, create a new masq entry
704 if ((h=mfw_lookup(skb->fwmark))) {
705 ms = ip_masq_new(iph->protocol,
706 iph->daddr, tph.portp[1],
707 /* if no redir-port, use packet dest port */
708 h->addr, h->port? h->port : tph.portp[1],
709 iph->saddr, tph.portp[0],
712 if (ms != NULL)
713 ip_masq_listen(ms);
715 return ms;
719 #define mfw_in_update NULL
720 #define mfw_out_rule NULL
721 #define mfw_out_create NULL
722 #define mfw_out_update NULL
724 static struct ip_masq_mod mfw_mod = {
725 NULL, /* next */
726 NULL, /* next_reg */
727 "mfw", /* name */
728 ATOMIC_INIT(0), /* nent */
729 ATOMIC_INIT(0), /* refcnt */
730 proc_ent,
731 mfw_ctl,
732 NULL, /* masq_mod_init */
733 NULL, /* masq_mod_done */
734 mfw_in_rule,
735 mfw_in_update,
736 mfw_in_create,
737 mfw_out_rule,
738 mfw_out_update,
739 mfw_out_create,
743 __initfunc(int ip_mfw_init(void))
745 return register_ip_masq_mod ((mmod_self=&mfw_mod));
748 int ip_mfw_done(void)
750 return unregister_ip_masq_mod(&mfw_mod);
753 #ifdef MODULE
754 EXPORT_NO_SYMBOLS;
756 int init_module(void)
758 if (ip_mfw_init() != 0)
759 return -EIO;
760 return 0;
763 void cleanup_module(void)
765 if (ip_mfw_done() != 0)
766 printk(KERN_INFO "can't remove module");
769 #endif /* MODULE */