Linux 2.2.0
[davej-history.git] / net / ipv4 / ip_masq_mfw.c
blobe3903c0cb9d1ebf7ba738465cc11179a381054b1
1 /*
2 * IP_MASQ_MARKFW masquerading module
4 * Does (reverse-masq) forwarding based on skb->fwmark value
6 * $Id: ip_masq_mfw.c,v 1.2 1998/12/12 02:40:42 davem Exp $
8 * Author: Juan Jose Ciarlante <jjciarla@raiz.uncu.edu.ar>
9 * based on Steven Clarke's portfw
11 * Fixes:
12 * JuanJo Ciarlante: added u-space sched support
13 * JuanJo Ciarlante: if rport==0, use packet dest port *grin*
14 * JuanJo Ciarlante: fixed tcp syn&&!ack creation
18 #include <linux/config.h>
19 #include <linux/module.h>
20 #include <linux/types.h>
21 #include <linux/kernel.h>
22 #include <linux/errno.h>
23 #include <linux/list.h>
24 #include <net/ip.h>
25 #include <linux/ip_fw.h>
26 #include <linux/ip_masq.h>
27 #include <net/ip_masq.h>
28 #include <net/ip_masq_mod.h>
29 #include <linux/proc_fs.h>
30 #include <linux/init.h>
31 #include <asm/softirq.h>
32 #include <asm/spinlock.h>
33 #include <asm/atomic.h>
35 static struct ip_masq_mod *mmod_self = NULL;
36 #ifdef CONFIG_IP_MASQ_DEBUG
37 static int debug=0;
38 MODULE_PARM(debug, "i");
39 #endif
42 * Lists structure:
43 * There is a "main" linked list with entries hashed
44 * by fwmark value (struct ip_masq_mfw, the "m-entries").
46 * Each of this m-entry holds a double linked list
47 * of "forward-to" hosts (struct ip_masq_mfw_host, the "m.host"),
48 * the round-robin scheduling takes place by rotating m.host entries
49 * "inside" its m-entry.
53 * Each forwarded host (addr:port) is stored here
55 struct ip_masq_mfw_host {
56 struct list_head list;
57 __u32 addr;
58 __u16 port;
59 __u16 pad0;
60 __u32 fwmark;
61 int pref;
62 atomic_t pref_cnt;
65 #define IP_MASQ_MFW_HSIZE 16
67 * This entries are indexed by fwmark,
68 * they hold a list of forwarded addr:port
69 */
71 struct ip_masq_mfw {
72 struct ip_masq_mfw *next; /* linked list */
73 __u32 fwmark; /* key: firewall mark */
74 struct list_head hosts; /* list of forward-to hosts */
75 atomic_t nhosts; /* number of "" */
76 #ifdef __SMP__
77 rwlock_t lock;
78 #endif
82 static struct semaphore mfw_sema = MUTEX;
83 #ifdef __SMP__
84 static rwlock_t mfw_lock = RW_LOCK_UNLOCKED;
85 #endif
87 static struct ip_masq_mfw *ip_masq_mfw_table[IP_MASQ_MFW_HSIZE];
89 static __inline__ int mfw_hash_val(int fwmark)
91 return fwmark & 0x0f;
95 * Get m-entry by "fwmark"
96 * Caller must lock tables.
99 static struct ip_masq_mfw *__mfw_get(int fwmark)
101 struct ip_masq_mfw* mfw;
102 int hash = mfw_hash_val(fwmark);
104 for (mfw=ip_masq_mfw_table[hash];mfw;mfw=mfw->next) {
105 if (mfw->fwmark==fwmark) {
106 goto out;
109 out:
110 return mfw;
114 * Links m-entry.
115 * Caller should have checked if already present for same fwmark
117 * Caller must lock tables.
119 static int __mfw_add(struct ip_masq_mfw *mfw)
121 int fwmark = mfw->fwmark;
122 int hash = mfw_hash_val(fwmark);
124 mfw->next = ip_masq_mfw_table[hash];
125 ip_masq_mfw_table[hash] = mfw;
126 ip_masq_mod_inc_nent(mmod_self);
128 return 0;
132 * Creates a m-entry (doesn't link it)
135 static struct ip_masq_mfw * mfw_new(int fwmark)
137 struct ip_masq_mfw *mfw;
139 mfw = kmalloc(sizeof(*mfw), GFP_KERNEL);
140 if (mfw == NULL)
141 goto out;
143 MOD_INC_USE_COUNT;
144 memset(mfw, 0, sizeof(*mfw));
145 mfw->fwmark = fwmark;
146 #ifdef __SMP__
147 mfw->lock = (rwlock_t) RW_LOCK_UNLOCKED;
148 #endif
150 INIT_LIST_HEAD(&mfw->hosts);
151 out:
152 return mfw;
155 static void mfw_host_to_user(struct ip_masq_mfw_host *h, struct ip_mfw_user *mu)
157 mu->raddr = h->addr;
158 mu->rport = h->port;
159 mu->fwmark = h->fwmark;
160 mu->pref = h->pref;
164 * Creates a m.host (doesn't link it in a m-entry)
166 static struct ip_masq_mfw_host * mfw_host_new(struct ip_mfw_user *mu)
168 struct ip_masq_mfw_host * mfw_host;
169 mfw_host = kmalloc(sizeof (*mfw_host), GFP_KERNEL);
170 if (!mfw_host)
171 return NULL;
173 MOD_INC_USE_COUNT;
174 memset(mfw_host, 0, sizeof(*mfw_host));
175 mfw_host->addr = mu->raddr;
176 mfw_host->port = mu->rport;
177 mfw_host->fwmark = mu->fwmark;
178 mfw_host->pref = mu->pref;
179 atomic_set(&mfw_host->pref_cnt, mu->pref);
181 return mfw_host;
185 * Create AND link m.host to m-entry.
186 * It locks m.lock.
188 static int mfw_addhost(struct ip_masq_mfw *mfw, struct ip_mfw_user *mu, int attail)
190 struct ip_masq_mfw_host *mfw_host;
192 mfw_host = mfw_host_new(mu);
193 if (!mfw_host)
194 return -ENOMEM;
196 write_lock_bh(&mfw->lock);
197 list_add(&mfw_host->list, attail? mfw->hosts.prev : &mfw->hosts);
198 atomic_inc(&mfw->nhosts);
199 write_unlock_bh(&mfw->lock);
201 return 0;
205 * Unlink AND destroy m.host(s) from m-entry.
206 * Wildcard (nul host or addr) ok.
207 * It uses m.lock.
209 static int mfw_delhost(struct ip_masq_mfw *mfw, struct ip_mfw_user *mu)
212 struct list_head *l,*e;
213 struct ip_masq_mfw_host *h;
214 int n_del = 0;
215 l = &mfw->hosts;
217 write_lock_bh(&mfw->lock);
218 for (e=l->next; e!=l; e=e->next)
220 h = list_entry(e, struct ip_masq_mfw_host, list);
221 if ((!mu->raddr || h->addr == mu->raddr) &&
222 (!mu->rport || h->port == mu->rport)) {
223 /* HIT */
224 atomic_dec(&mfw->nhosts);
225 list_del(&h->list);
226 kfree_s(h, sizeof(*h));
227 MOD_DEC_USE_COUNT;
228 n_del++;
232 write_unlock_bh(&mfw->lock);
233 return n_del? 0 : -ESRCH;
237 * Changes m.host parameters
238 * Wildcards ok
240 * Caller must lock tables.
242 static int __mfw_edithost(struct ip_masq_mfw *mfw, struct ip_mfw_user *mu)
245 struct list_head *l,*e;
246 struct ip_masq_mfw_host *h;
247 int n_edit = 0;
248 l = &mfw->hosts;
250 for (e=l->next; e!=l; e=e->next)
252 h = list_entry(e, struct ip_masq_mfw_host, list);
253 if ((!mu->raddr || h->addr == mu->raddr) &&
254 (!mu->rport || h->port == mu->rport)) {
255 /* HIT */
256 h->pref = mu->pref;
257 atomic_set(&h->pref_cnt, mu->pref);
258 n_edit++;
262 return n_edit? 0 : -ESRCH;
266 * Destroys m-entry.
267 * Caller must have checked that it doesn't hold any m.host(s)
269 static void mfw_destroy(struct ip_masq_mfw *mfw)
271 kfree_s(mfw, sizeof(*mfw));
272 MOD_DEC_USE_COUNT;
276 * Unlink m-entry.
278 * Caller must lock tables.
280 static int __mfw_del(struct ip_masq_mfw *mfw)
282 struct ip_masq_mfw **mfw_p;
283 int ret = -EINVAL;
286 for(mfw_p=&ip_masq_mfw_table[mfw_hash_val(mfw->fwmark)];
287 *mfw_p;
288 mfw_p = &((*mfw_p)->next))
290 if (mfw==(*mfw_p)) {
291 *mfw_p = mfw->next;
292 ip_masq_mod_dec_nent(mmod_self);
293 ret = 0;
294 goto out;
297 out:
298 return ret;
302 * Crude m.host scheduler
303 * This interface could be exported to allow playing with
304 * other sched policies.
306 * Caller must lock m-entry.
308 static struct ip_masq_mfw_host * __mfw_sched(struct ip_masq_mfw *mfw, int force)
310 struct ip_masq_mfw_host *h = NULL;
312 if (atomic_read(&mfw->nhosts) == 0)
313 goto out;
316 * Here resides actual sched policy:
317 * When pref_cnt touches 0, entry gets shifted to tail and
318 * its pref_cnt reloaded from h->pref (actual value
319 * passed from u-space).
321 * Exception is pref==0: avoid scheduling.
324 h = list_entry(mfw->hosts.next, struct ip_masq_mfw_host, list);
326 if (atomic_read(&mfw->nhosts) <= 1)
327 goto out;
329 if ((h->pref && atomic_dec_and_test(&h->pref_cnt)) || force) {
330 atomic_set(&h->pref_cnt, h->pref);
331 list_del(&h->list);
332 list_add(&h->list, mfw->hosts.prev);
334 out:
335 return h;
339 * Main lookup routine.
340 * HITs fwmark and schedules m.host entries if required
342 static struct ip_masq_mfw_host * mfw_lookup(int fwmark)
344 struct ip_masq_mfw *mfw;
345 struct ip_masq_mfw_host *h = NULL;
347 read_lock(&mfw_lock);
348 mfw = __mfw_get(fwmark);
350 if (mfw) {
351 write_lock(&mfw->lock);
352 h = __mfw_sched(mfw, 0);
353 write_unlock(&mfw->lock);
356 read_unlock(&mfw_lock);
357 return h;
360 #ifdef CONFIG_PROC_FS
361 static int mfw_procinfo(char *buffer, char **start, off_t offset,
362 int length, int dummy)
364 struct ip_masq_mfw *mfw;
365 struct ip_masq_mfw_host *h;
366 struct list_head *l,*e;
367 off_t pos=0, begin;
368 char temp[129];
369 int idx = 0;
370 int len=0;
372 MOD_INC_USE_COUNT;
374 IP_MASQ_DEBUG(1-debug, "Entered mfw_info\n");
376 if (offset < 64)
378 sprintf(temp, "FwMark > RAddr RPort PrCnt Pref");
379 len = sprintf(buffer, "%-63s\n", temp);
381 pos = 64;
383 for(idx = 0; idx < IP_MASQ_MFW_HSIZE; idx++)
385 read_lock(&mfw_lock);
386 for(mfw = ip_masq_mfw_table[idx]; mfw ; mfw = mfw->next)
388 read_lock_bh(&mfw->lock);
389 l=&mfw->hosts;
391 for(e=l->next;l!=e;e=e->next) {
392 h = list_entry(e, struct ip_masq_mfw_host, list);
393 pos += 64;
394 if (pos <= offset) {
395 len = 0;
396 continue;
399 sprintf(temp,"0x%x > %08lX %5u %5d %5d",
400 h->fwmark,
401 ntohl(h->addr), ntohs(h->port),
402 atomic_read(&h->pref_cnt), h->pref);
403 len += sprintf(buffer+len, "%-63s\n", temp);
405 if(len >= length) {
406 read_unlock_bh(&mfw->lock);
407 read_unlock(&mfw_lock);
408 goto done;
411 read_unlock_bh(&mfw->lock);
413 read_unlock(&mfw_lock);
416 done:
418 if (len) {
419 begin = len - (pos - offset);
420 *start = buffer + begin;
421 len -= begin;
423 if(len>length)
424 len = length;
425 MOD_DEC_USE_COUNT;
426 return len;
428 static struct proc_dir_entry mfw_proc_entry = {
429 /* 0, 0, NULL", */
430 0, 3, "mfw",
431 S_IFREG | S_IRUGO, 1, 0, 0,
432 0, &proc_net_inode_operations,
433 mfw_procinfo
436 #define proc_ent &mfw_proc_entry
437 #else /* !CONFIG_PROC_FS */
439 #define proc_ent NULL
440 #endif
443 static void mfw_flush(void)
445 struct ip_masq_mfw *mfw, *local_table[IP_MASQ_MFW_HSIZE];
446 struct ip_masq_mfw_host *h;
447 struct ip_masq_mfw *mfw_next;
448 int idx;
449 struct list_head *l,*e;
451 write_lock_bh(&mfw_lock);
452 memcpy(local_table, ip_masq_mfw_table, sizeof ip_masq_mfw_table);
453 memset(ip_masq_mfw_table, 0, sizeof ip_masq_mfw_table);
454 write_unlock_bh(&mfw_lock);
457 * For every hash table row ...
459 for(idx=0;idx<IP_MASQ_MFW_HSIZE;idx++) {
462 * For every m-entry in row ...
464 for(mfw=local_table[idx];mfw;mfw=mfw_next) {
466 * For every m.host in m-entry ...
468 l=&mfw->hosts;
469 while((e=l->next) != l) {
470 h = list_entry(e, struct ip_masq_mfw_host, list);
471 atomic_dec(&mfw->nhosts);
472 list_del(&h->list);
473 kfree_s(h, sizeof(*h));
474 MOD_DEC_USE_COUNT;
477 if (atomic_read(&mfw->nhosts)) {
478 IP_MASQ_ERR("mfw_flush(): after flushing row nhosts=%d\n",
479 atomic_read(&mfw->nhosts));
481 mfw_next = mfw->next;
482 kfree_s(mfw, sizeof(*mfw));
483 MOD_DEC_USE_COUNT;
484 ip_masq_mod_dec_nent(mmod_self);
490 * User space control entry point
492 static int mfw_ctl(int optname, struct ip_masq_ctl *mctl, int optlen)
494 struct ip_mfw_user *mu = &mctl->u.mfw_user;
495 struct ip_masq_mfw *mfw;
496 int ret = EINVAL;
497 int arglen = optlen - IP_MASQ_CTL_BSIZE;
498 int cmd;
501 IP_MASQ_DEBUG(1-debug, "ip_masq_user_ctl(len=%d/%d|%d/%d)\n",
502 arglen,
503 sizeof (*mu),
504 optlen,
505 sizeof (*mctl));
508 * checks ...
510 if (arglen != sizeof(*mu) && optlen != sizeof(*mctl))
511 return -EINVAL;
514 * Don't trust the lusers - plenty of error checking!
516 cmd = mctl->m_cmd;
517 IP_MASQ_DEBUG(1-debug, "ip_masq_mfw_ctl(cmd=%d, fwmark=%d)\n",
518 cmd, mu->fwmark);
521 switch(cmd) {
522 case IP_MASQ_CMD_NONE:
523 return 0;
524 case IP_MASQ_CMD_FLUSH:
525 break;
526 case IP_MASQ_CMD_ADD:
527 case IP_MASQ_CMD_INSERT:
528 case IP_MASQ_CMD_SET:
529 if (mu->fwmark == 0) {
530 IP_MASQ_DEBUG(1-debug, "invalid fwmark==0\n");
531 return -EINVAL;
533 if (mu->pref < 0) {
534 IP_MASQ_DEBUG(1-debug, "invalid pref==%d\n",
535 mu->pref);
536 return -EINVAL;
538 break;
542 ret = -EINVAL;
544 switch(cmd) {
545 case IP_MASQ_CMD_ADD:
546 case IP_MASQ_CMD_INSERT:
547 if (!mu->raddr) {
548 IP_MASQ_DEBUG(0-debug, "ip_masq_mfw_ctl(ADD): invalid redirect 0x%x:%d\n",
549 mu->raddr, mu->rport);
550 goto out;
554 * Cannot just use mfw_lock because below
555 * are allocations that can sleep; so
556 * to assure "new entry" atomic creation
557 * I use a semaphore.
560 down(&mfw_sema);
562 read_lock(&mfw_lock);
563 mfw = __mfw_get(mu->fwmark);
564 read_unlock(&mfw_lock);
567 * If first host, create m-entry
569 if (mfw == NULL) {
570 mfw = mfw_new(mu->fwmark);
571 if (mfw == NULL)
572 ret = -ENOMEM;
575 if (mfw) {
577 * Put m.host in m-entry.
579 ret = mfw_addhost(mfw, mu, cmd == IP_MASQ_CMD_ADD);
582 * If first host, link m-entry to hash table.
583 * Already protected by global lock.
585 if (ret == 0 && atomic_read(&mfw->nhosts) == 1) {
586 write_lock_bh(&mfw_lock);
587 __mfw_add(mfw);
588 write_unlock_bh(&mfw_lock);
590 if (atomic_read(&mfw->nhosts) == 0) {
591 mfw_destroy(mfw);
595 up(&mfw_sema);
597 break;
599 case IP_MASQ_CMD_DEL:
600 down(&mfw_sema);
602 read_lock(&mfw_lock);
603 mfw = __mfw_get(mu->fwmark);
604 read_unlock(&mfw_lock);
606 if (mfw) {
607 ret = mfw_delhost(mfw, mu);
610 * Last lease will free
611 * XXX check logic XXX
613 if (atomic_read(&mfw->nhosts) == 0) {
614 write_lock_bh(&mfw_lock);
615 __mfw_del(mfw);
616 write_unlock_bh(&mfw_lock);
617 mfw_destroy(mfw);
619 } else
620 ret = -ESRCH;
622 up(&mfw_sema);
623 break;
624 case IP_MASQ_CMD_FLUSH:
626 down(&mfw_sema);
627 mfw_flush();
628 up(&mfw_sema);
629 ret = 0;
630 break;
631 case IP_MASQ_CMD_SET:
633 * No need to semaphorize here, main list is not
634 * modified.
636 read_lock(&mfw_lock);
638 mfw = __mfw_get(mu->fwmark);
639 if (mfw) {
640 write_lock_bh(&mfw->lock);
642 if (mu->flags & IP_MASQ_MFW_SCHED) {
643 struct ip_masq_mfw_host *h;
644 if ((h=__mfw_sched(mfw, 1))) {
645 mfw_host_to_user(h, mu);
646 ret = 0;
648 } else {
649 ret = __mfw_edithost(mfw, mu);
652 write_unlock_bh(&mfw->lock);
655 read_unlock(&mfw_lock);
656 break;
658 out:
660 return ret;
664 * Module stubs called from ip_masq core module
668 * Input rule stub, called very early for each incoming packet,
669 * to see if this module has "interest" in packet.
671 static int mfw_in_rule(const struct sk_buff *skb, const struct iphdr *iph)
673 int val;
674 read_lock(&mfw_lock);
675 val = ( __mfw_get(skb->fwmark) != 0);
676 read_unlock(&mfw_lock);
677 return val;
681 * Input-create stub, called to allow "custom" masq creation
683 static struct ip_masq * mfw_in_create(const struct sk_buff *skb, const struct iphdr *iph, __u32 maddr)
685 union ip_masq_tphdr tph;
686 struct ip_masq *ms = NULL;
687 struct ip_masq_mfw_host *h = NULL;
689 tph.raw = (char*) iph + iph->ihl * 4;
691 switch (iph->protocol) {
692 case IPPROTO_TCP:
694 * Only open TCP tunnel if SYN+!ACK packet
696 if (!tph.th->syn && tph.th->ack)
697 return NULL;
698 case IPPROTO_UDP:
699 break;
700 default:
701 return NULL;
705 * If no entry exists in the masquerading table
706 * and the port is involved
707 * in port forwarding, create a new masq entry
710 if ((h=mfw_lookup(skb->fwmark))) {
711 ms = ip_masq_new(iph->protocol,
712 iph->daddr, tph.portp[1],
713 /* if no redir-port, use packet dest port */
714 h->addr, h->port? h->port : tph.portp[1],
715 iph->saddr, tph.portp[0],
718 if (ms != NULL)
719 ip_masq_listen(ms);
721 return ms;
725 #define mfw_in_update NULL
726 #define mfw_out_rule NULL
727 #define mfw_out_create NULL
728 #define mfw_out_update NULL
730 static struct ip_masq_mod mfw_mod = {
731 NULL, /* next */
732 NULL, /* next_reg */
733 "mfw", /* name */
734 ATOMIC_INIT(0), /* nent */
735 ATOMIC_INIT(0), /* refcnt */
736 proc_ent,
737 mfw_ctl,
738 NULL, /* masq_mod_init */
739 NULL, /* masq_mod_done */
740 mfw_in_rule,
741 mfw_in_update,
742 mfw_in_create,
743 mfw_out_rule,
744 mfw_out_update,
745 mfw_out_create,
749 __initfunc(int ip_mfw_init(void))
751 return register_ip_masq_mod ((mmod_self=&mfw_mod));
754 int ip_mfw_done(void)
756 return unregister_ip_masq_mod(&mfw_mod);
759 #ifdef MODULE
760 EXPORT_NO_SYMBOLS;
762 int init_module(void)
764 if (ip_mfw_init() != 0)
765 return -EIO;
766 return 0;
769 void cleanup_module(void)
771 if (ip_mfw_done() != 0)
772 printk(KERN_INFO "can't remove module");
775 #endif /* MODULE */