2 * IP_MASQ_MARKFW masquerading module
4 * Does (reverse-masq) forwarding based on skb->fwmark value
6 * $Id: ip_masq_mfw.c,v 1.3 1999/01/26 05:33:47 davem Exp $
8 * Author: Juan Jose Ciarlante <jjciarla@raiz.uncu.edu.ar>
9 * based on Steven Clarke's portfw
12 * JuanJo Ciarlante: added u-space sched support
13 * JuanJo Ciarlante: if rport==0, use packet dest port *grin*
14 * JuanJo Ciarlante: fixed tcp syn&&!ack creation
18 #include <linux/config.h>
19 #include <linux/module.h>
20 #include <linux/types.h>
21 #include <linux/kernel.h>
22 #include <linux/errno.h>
23 #include <linux/list.h>
25 #include <linux/ip_fw.h>
26 #include <linux/ip_masq.h>
27 #include <net/ip_masq.h>
28 #include <net/ip_masq_mod.h>
29 #include <linux/proc_fs.h>
30 #include <linux/init.h>
31 #include <asm/softirq.h>
32 #include <asm/spinlock.h>
33 #include <asm/atomic.h>
35 static struct ip_masq_mod
*mmod_self
= NULL
;
36 #ifdef CONFIG_IP_MASQ_DEBUG
38 MODULE_PARM(debug
, "i");
43 * There is a "main" linked list with entries hashed
44 * by fwmark value (struct ip_masq_mfw, the "m-entries").
46 * Each of this m-entry holds a double linked list
47 * of "forward-to" hosts (struct ip_masq_mfw_host, the "m.host"),
48 * the round-robin scheduling takes place by rotating m.host entries
49 * "inside" its m-entry.
53 * Each forwarded host (addr:port) is stored here
55 struct ip_masq_mfw_host
{
56 struct list_head list
;
65 #define IP_MASQ_MFW_HSIZE 16
67 * This entries are indexed by fwmark,
68 * they hold a list of forwarded addr:port
72 struct ip_masq_mfw
*next
; /* linked list */
73 __u32 fwmark
; /* key: firewall mark */
74 struct list_head hosts
; /* list of forward-to hosts */
75 atomic_t nhosts
; /* number of "" */
82 static DECLARE_MUTEX(mfw_sema
);
84 static rwlock_t mfw_lock
= RW_LOCK_UNLOCKED
;
87 static struct ip_masq_mfw
*ip_masq_mfw_table
[IP_MASQ_MFW_HSIZE
];
89 static __inline__
int mfw_hash_val(int fwmark
)
95 * Get m-entry by "fwmark"
96 * Caller must lock tables.
99 static struct ip_masq_mfw
*__mfw_get(int fwmark
)
101 struct ip_masq_mfw
* mfw
;
102 int hash
= mfw_hash_val(fwmark
);
104 for (mfw
=ip_masq_mfw_table
[hash
];mfw
;mfw
=mfw
->next
) {
105 if (mfw
->fwmark
==fwmark
) {
115 * Caller should have checked if already present for same fwmark
117 * Caller must lock tables.
119 static int __mfw_add(struct ip_masq_mfw
*mfw
)
121 int fwmark
= mfw
->fwmark
;
122 int hash
= mfw_hash_val(fwmark
);
124 mfw
->next
= ip_masq_mfw_table
[hash
];
125 ip_masq_mfw_table
[hash
] = mfw
;
126 ip_masq_mod_inc_nent(mmod_self
);
132 * Creates a m-entry (doesn't link it)
135 static struct ip_masq_mfw
* mfw_new(int fwmark
)
137 struct ip_masq_mfw
*mfw
;
139 mfw
= kmalloc(sizeof(*mfw
), GFP_KERNEL
);
144 memset(mfw
, 0, sizeof(*mfw
));
145 mfw
->fwmark
= fwmark
;
147 mfw
->lock
= (rwlock_t
) RW_LOCK_UNLOCKED
;
150 INIT_LIST_HEAD(&mfw
->hosts
);
155 static void mfw_host_to_user(struct ip_masq_mfw_host
*h
, struct ip_mfw_user
*mu
)
159 mu
->fwmark
= h
->fwmark
;
164 * Creates a m.host (doesn't link it in a m-entry)
166 static struct ip_masq_mfw_host
* mfw_host_new(struct ip_mfw_user
*mu
)
168 struct ip_masq_mfw_host
* mfw_host
;
169 mfw_host
= kmalloc(sizeof (*mfw_host
), GFP_KERNEL
);
174 memset(mfw_host
, 0, sizeof(*mfw_host
));
175 mfw_host
->addr
= mu
->raddr
;
176 mfw_host
->port
= mu
->rport
;
177 mfw_host
->fwmark
= mu
->fwmark
;
178 mfw_host
->pref
= mu
->pref
;
179 atomic_set(&mfw_host
->pref_cnt
, mu
->pref
);
185 * Create AND link m.host to m-entry.
188 static int mfw_addhost(struct ip_masq_mfw
*mfw
, struct ip_mfw_user
*mu
, int attail
)
190 struct ip_masq_mfw_host
*mfw_host
;
192 mfw_host
= mfw_host_new(mu
);
196 write_lock_bh(&mfw
->lock
);
197 list_add(&mfw_host
->list
, attail
? mfw
->hosts
.prev
: &mfw
->hosts
);
198 atomic_inc(&mfw
->nhosts
);
199 write_unlock_bh(&mfw
->lock
);
205 * Unlink AND destroy m.host(s) from m-entry.
206 * Wildcard (nul host or addr) ok.
209 static int mfw_delhost(struct ip_masq_mfw
*mfw
, struct ip_mfw_user
*mu
)
212 struct list_head
*l
,*e
;
213 struct ip_masq_mfw_host
*h
;
217 write_lock_bh(&mfw
->lock
);
218 for (e
=l
->next
; e
!=l
; e
=e
->next
)
220 h
= list_entry(e
, struct ip_masq_mfw_host
, list
);
221 if ((!mu
->raddr
|| h
->addr
== mu
->raddr
) &&
222 (!mu
->rport
|| h
->port
== mu
->rport
)) {
224 atomic_dec(&mfw
->nhosts
);
226 kfree_s(h
, sizeof(*h
));
232 write_unlock_bh(&mfw
->lock
);
233 return n_del
? 0 : -ESRCH
;
237 * Changes m.host parameters
240 * Caller must lock tables.
242 static int __mfw_edithost(struct ip_masq_mfw
*mfw
, struct ip_mfw_user
*mu
)
245 struct list_head
*l
,*e
;
246 struct ip_masq_mfw_host
*h
;
250 for (e
=l
->next
; e
!=l
; e
=e
->next
)
252 h
= list_entry(e
, struct ip_masq_mfw_host
, list
);
253 if ((!mu
->raddr
|| h
->addr
== mu
->raddr
) &&
254 (!mu
->rport
|| h
->port
== mu
->rport
)) {
257 atomic_set(&h
->pref_cnt
, mu
->pref
);
262 return n_edit
? 0 : -ESRCH
;
267 * Caller must have checked that it doesn't hold any m.host(s)
269 static void mfw_destroy(struct ip_masq_mfw
*mfw
)
271 kfree_s(mfw
, sizeof(*mfw
));
278 * Caller must lock tables.
280 static int __mfw_del(struct ip_masq_mfw
*mfw
)
282 struct ip_masq_mfw
**mfw_p
;
286 for(mfw_p
=&ip_masq_mfw_table
[mfw_hash_val(mfw
->fwmark
)];
288 mfw_p
= &((*mfw_p
)->next
))
292 ip_masq_mod_dec_nent(mmod_self
);
302 * Crude m.host scheduler
303 * This interface could be exported to allow playing with
304 * other sched policies.
306 * Caller must lock m-entry.
308 static struct ip_masq_mfw_host
* __mfw_sched(struct ip_masq_mfw
*mfw
, int force
)
310 struct ip_masq_mfw_host
*h
= NULL
;
312 if (atomic_read(&mfw
->nhosts
) == 0)
316 * Here resides actual sched policy:
317 * When pref_cnt touches 0, entry gets shifted to tail and
318 * its pref_cnt reloaded from h->pref (actual value
319 * passed from u-space).
321 * Exception is pref==0: avoid scheduling.
324 h
= list_entry(mfw
->hosts
.next
, struct ip_masq_mfw_host
, list
);
326 if (atomic_read(&mfw
->nhosts
) <= 1)
329 if ((h
->pref
&& atomic_dec_and_test(&h
->pref_cnt
)) || force
) {
330 atomic_set(&h
->pref_cnt
, h
->pref
);
332 list_add(&h
->list
, mfw
->hosts
.prev
);
339 * Main lookup routine.
340 * HITs fwmark and schedules m.host entries if required
342 static struct ip_masq_mfw_host
* mfw_lookup(int fwmark
)
344 struct ip_masq_mfw
*mfw
;
345 struct ip_masq_mfw_host
*h
= NULL
;
347 read_lock(&mfw_lock
);
348 mfw
= __mfw_get(fwmark
);
351 write_lock(&mfw
->lock
);
352 h
= __mfw_sched(mfw
, 0);
353 write_unlock(&mfw
->lock
);
356 read_unlock(&mfw_lock
);
360 #ifdef CONFIG_PROC_FS
361 static int mfw_procinfo(char *buffer
, char **start
, off_t offset
,
362 int length
, int dummy
)
364 struct ip_masq_mfw
*mfw
;
365 struct ip_masq_mfw_host
*h
;
366 struct list_head
*l
,*e
;
374 IP_MASQ_DEBUG(1-debug
, "Entered mfw_info\n");
378 sprintf(temp
, "FwMark > RAddr RPort PrCnt Pref");
379 len
= sprintf(buffer
, "%-63s\n", temp
);
383 for(idx
= 0; idx
< IP_MASQ_MFW_HSIZE
; idx
++)
385 read_lock(&mfw_lock
);
386 for(mfw
= ip_masq_mfw_table
[idx
]; mfw
; mfw
= mfw
->next
)
388 read_lock_bh(&mfw
->lock
);
391 for(e
=l
->next
;l
!=e
;e
=e
->next
) {
392 h
= list_entry(e
, struct ip_masq_mfw_host
, list
);
399 sprintf(temp
,"0x%x > %08lX %5u %5d %5d",
401 ntohl(h
->addr
), ntohs(h
->port
),
402 atomic_read(&h
->pref_cnt
), h
->pref
);
403 len
+= sprintf(buffer
+len
, "%-63s\n", temp
);
406 read_unlock_bh(&mfw
->lock
);
407 read_unlock(&mfw_lock
);
411 read_unlock_bh(&mfw
->lock
);
413 read_unlock(&mfw_lock
);
419 begin
= len
- (pos
- offset
);
420 *start
= buffer
+ begin
;
428 static struct proc_dir_entry mfw_proc_entry
= {
431 S_IFREG
| S_IRUGO
, 1, 0, 0,
432 0, &proc_net_inode_operations
,
436 #define proc_ent &mfw_proc_entry
437 #else /* !CONFIG_PROC_FS */
439 #define proc_ent NULL
443 static void mfw_flush(void)
445 struct ip_masq_mfw
*mfw
, *local_table
[IP_MASQ_MFW_HSIZE
];
446 struct ip_masq_mfw_host
*h
;
447 struct ip_masq_mfw
*mfw_next
;
449 struct list_head
*l
,*e
;
451 write_lock_bh(&mfw_lock
);
452 memcpy(local_table
, ip_masq_mfw_table
, sizeof ip_masq_mfw_table
);
453 memset(ip_masq_mfw_table
, 0, sizeof ip_masq_mfw_table
);
454 write_unlock_bh(&mfw_lock
);
457 * For every hash table row ...
459 for(idx
=0;idx
<IP_MASQ_MFW_HSIZE
;idx
++) {
462 * For every m-entry in row ...
464 for(mfw
=local_table
[idx
];mfw
;mfw
=mfw_next
) {
466 * For every m.host in m-entry ...
469 while((e
=l
->next
) != l
) {
470 h
= list_entry(e
, struct ip_masq_mfw_host
, list
);
471 atomic_dec(&mfw
->nhosts
);
473 kfree_s(h
, sizeof(*h
));
477 if (atomic_read(&mfw
->nhosts
)) {
478 IP_MASQ_ERR("mfw_flush(): after flushing row nhosts=%d\n",
479 atomic_read(&mfw
->nhosts
));
481 mfw_next
= mfw
->next
;
482 kfree_s(mfw
, sizeof(*mfw
));
484 ip_masq_mod_dec_nent(mmod_self
);
490 * User space control entry point
492 static int mfw_ctl(int optname
, struct ip_masq_ctl
*mctl
, int optlen
)
494 struct ip_mfw_user
*mu
= &mctl
->u
.mfw_user
;
495 struct ip_masq_mfw
*mfw
;
497 int arglen
= optlen
- IP_MASQ_CTL_BSIZE
;
501 IP_MASQ_DEBUG(1-debug
, "ip_masq_user_ctl(len=%d/%d|%d/%d)\n",
510 if (arglen
!= sizeof(*mu
) && optlen
!= sizeof(*mctl
))
514 * Don't trust the lusers - plenty of error checking!
517 IP_MASQ_DEBUG(1-debug
, "ip_masq_mfw_ctl(cmd=%d, fwmark=%d)\n",
522 case IP_MASQ_CMD_NONE
:
524 case IP_MASQ_CMD_FLUSH
:
526 case IP_MASQ_CMD_ADD
:
527 case IP_MASQ_CMD_INSERT
:
528 case IP_MASQ_CMD_SET
:
529 if (mu
->fwmark
== 0) {
530 IP_MASQ_DEBUG(1-debug
, "invalid fwmark==0\n");
534 IP_MASQ_DEBUG(1-debug
, "invalid pref==%d\n",
545 case IP_MASQ_CMD_ADD
:
546 case IP_MASQ_CMD_INSERT
:
548 IP_MASQ_DEBUG(0-debug
, "ip_masq_mfw_ctl(ADD): invalid redirect 0x%x:%d\n",
549 mu
->raddr
, mu
->rport
);
554 * Cannot just use mfw_lock because below
555 * are allocations that can sleep; so
556 * to assure "new entry" atomic creation
562 read_lock(&mfw_lock
);
563 mfw
= __mfw_get(mu
->fwmark
);
564 read_unlock(&mfw_lock
);
567 * If first host, create m-entry
570 mfw
= mfw_new(mu
->fwmark
);
577 * Put m.host in m-entry.
579 ret
= mfw_addhost(mfw
, mu
, cmd
== IP_MASQ_CMD_ADD
);
582 * If first host, link m-entry to hash table.
583 * Already protected by global lock.
585 if (ret
== 0 && atomic_read(&mfw
->nhosts
) == 1) {
586 write_lock_bh(&mfw_lock
);
588 write_unlock_bh(&mfw_lock
);
590 if (atomic_read(&mfw
->nhosts
) == 0) {
599 case IP_MASQ_CMD_DEL
:
602 read_lock(&mfw_lock
);
603 mfw
= __mfw_get(mu
->fwmark
);
604 read_unlock(&mfw_lock
);
607 ret
= mfw_delhost(mfw
, mu
);
610 * Last lease will free
611 * XXX check logic XXX
613 if (atomic_read(&mfw
->nhosts
) == 0) {
614 write_lock_bh(&mfw_lock
);
616 write_unlock_bh(&mfw_lock
);
624 case IP_MASQ_CMD_FLUSH
:
631 case IP_MASQ_CMD_SET
:
633 * No need to semaphorize here, main list is not
636 read_lock(&mfw_lock
);
638 mfw
= __mfw_get(mu
->fwmark
);
640 write_lock_bh(&mfw
->lock
);
642 if (mu
->flags
& IP_MASQ_MFW_SCHED
) {
643 struct ip_masq_mfw_host
*h
;
644 if ((h
=__mfw_sched(mfw
, 1))) {
645 mfw_host_to_user(h
, mu
);
649 ret
= __mfw_edithost(mfw
, mu
);
652 write_unlock_bh(&mfw
->lock
);
655 read_unlock(&mfw_lock
);
664 * Module stubs called from ip_masq core module
668 * Input rule stub, called very early for each incoming packet,
669 * to see if this module has "interest" in packet.
671 static int mfw_in_rule(const struct sk_buff
*skb
, const struct iphdr
*iph
)
674 read_lock(&mfw_lock
);
675 val
= ( __mfw_get(skb
->fwmark
) != 0);
676 read_unlock(&mfw_lock
);
681 * Input-create stub, called to allow "custom" masq creation
683 static struct ip_masq
* mfw_in_create(const struct sk_buff
*skb
, const struct iphdr
*iph
, __u32 maddr
)
685 union ip_masq_tphdr tph
;
686 struct ip_masq
*ms
= NULL
;
687 struct ip_masq_mfw_host
*h
= NULL
;
689 tph
.raw
= (char*) iph
+ iph
->ihl
* 4;
691 switch (iph
->protocol
) {
694 * Only open TCP tunnel if SYN+!ACK packet
696 if (!tph
.th
->syn
&& tph
.th
->ack
)
705 * If no entry exists in the masquerading table
706 * and the port is involved
707 * in port forwarding, create a new masq entry
710 if ((h
=mfw_lookup(skb
->fwmark
))) {
711 ms
= ip_masq_new(iph
->protocol
,
712 iph
->daddr
, tph
.portp
[1],
713 /* if no redir-port, use packet dest port */
714 h
->addr
, h
->port
? h
->port
: tph
.portp
[1],
715 iph
->saddr
, tph
.portp
[0],
725 #define mfw_in_update NULL
726 #define mfw_out_rule NULL
727 #define mfw_out_create NULL
728 #define mfw_out_update NULL
730 static struct ip_masq_mod mfw_mod
= {
734 ATOMIC_INIT(0), /* nent */
735 ATOMIC_INIT(0), /* refcnt */
738 NULL
, /* masq_mod_init */
739 NULL
, /* masq_mod_done */
749 __initfunc(int ip_mfw_init(void))
751 return register_ip_masq_mod ((mmod_self
=&mfw_mod
));
754 int ip_mfw_done(void)
756 return unregister_ip_masq_mod(&mfw_mod
);
762 int init_module(void)
764 if (ip_mfw_init() != 0)
769 void cleanup_module(void)
771 if (ip_mfw_done() != 0)
772 printk(KERN_INFO
"can't remove module");