initial commit with v2.6.9
[linux-2.6.9-moxart.git] / net / ipv4 / netfilter / ipchains_core.c
blob8fd8c1a5cefcdd04c5b0b3b755ccd3427aecbd2f
1 #warning ipchains is obsolete, and will be removed soon.
3 /* Minor modifications to fit on compatibility framework:
4 Rusty.Russell@rustcorp.com.au
5 */
7 /*
8 * This code is heavily based on the code on the old ip_fw.c code; see below for
9 * copyrights and attributions of the old code. This code is basically GPL.
11 * 15-Aug-1997: Major changes to allow graphs for firewall rules.
12 * Paul Russell <Paul.Russell@rustcorp.com.au> and
13 * Michael Neuling <Michael.Neuling@rustcorp.com.au>
14 * 24-Aug-1997: Generalised protocol handling (not just TCP/UDP/ICMP).
15 * Added explicit RETURN from chains.
16 * Removed TOS mangling (done in ipchains 1.0.1).
17 * Fixed read & reset bug by reworking proc handling.
18 * Paul Russell <Paul.Russell@rustcorp.com.au>
19 * 28-Sep-1997: Added packet marking for net sched code.
20 * Removed fw_via comparisons: all done on device name now,
21 * similar to changes in ip_fw.c in DaveM's CVS970924 tree.
22 * Paul Russell <Paul.Russell@rustcorp.com.au>
23 * 2-Nov-1997: Moved types across to __u16, etc.
24 * Added inverse flags.
25 * Fixed fragment bug (in args to port_match).
26 * Changed mark to only one flag (MARKABS).
27 * 21-Nov-1997: Added ability to test ICMP code.
28 * 19-Jan-1998: Added wildcard interfaces.
29 * 6-Feb-1998: Merged 2.0 and 2.1 versions.
30 * Initialised ip_masq for 2.0.x version.
31 * Added explicit NETLINK option for 2.1.x version.
32 * Added packet and byte counters for policy matches.
33 * 26-Feb-1998: Fixed race conditions, added SMP support.
34 * 18-Mar-1998: Fix SMP, fix race condition fix.
35 * 1-May-1998: Remove caching of device pointer.
36 * 12-May-1998: Allow tiny fragment case for TCP/UDP.
37 * 15-May-1998: Treat short packets as fragments, don't just block.
38 * 3-Jan-1999: Fixed serious procfs security hole -- users should never
39 * be allowed to view the chains!
40 * Marc Santoro <ultima@snicker.emoti.com>
41 * 29-Jan-1999: Locally generated bogus IPs dealt with, rather than crash
42 * during dump_packet. --RR.
43 * 19-May-1999: Star Wars: The Phantom Menace opened. Rule num
44 * printed in log (modified from Michael Hasenstein's patch).
45 * Added SYN in log message. --RR
46 * 23-Jul-1999: Fixed small fragment security exposure opened on 15-May-1998.
47 * John McDonald <jm@dataprotect.com>
48 * Thomas Lopatic <tl@dataprotect.com>
53 * The origina Linux port was done Alan Cox, with changes/fixes from
54 * Pauline Middlelink, Jos Vos, Thomas Quinot, Wouter Gadeyne, Juan
55 * Jose Ciarlante, Bernd Eckenfels, Keith Owens and others.
57 * Copyright from the original FreeBSD version follows:
59 * Copyright (c) 1993 Daniel Boulet
60 * Copyright (c) 1994 Ugen J.S.Antsilevich
62 * Redistribution and use in source forms, with and without modification,
63 * are permitted provided that this entire comment appears intact.
65 * Redistribution in binary form may occur without any restrictions.
66 * Obviously, it would be nice if you gave credit where credit is due
67 * but requiring it would be too onerous.
69 * This software is provided ``AS IS'' without any warranties of any kind. */
71 #include <linux/config.h>
73 #include <asm/uaccess.h>
74 #include <asm/system.h>
75 #include <linux/types.h>
76 #include <linux/sched.h>
77 #include <linux/string.h>
78 #include <linux/errno.h>
79 #include <linux/module.h>
81 #include <linux/socket.h>
82 #include <linux/sockios.h>
83 #include <linux/in.h>
84 #include <linux/inet.h>
85 #include <linux/netdevice.h>
86 #include <linux/icmp.h>
87 #include <linux/udp.h>
88 #include <net/ip.h>
89 #include <net/protocol.h>
90 #include <net/route.h>
91 #include <net/tcp.h>
92 #include <net/udp.h>
93 #include <net/sock.h>
94 #include <net/icmp.h>
95 #include <linux/netlink.h>
96 #include <linux/netfilter.h>
97 #include <linux/netfilter_ipv4/compat_firewall.h>
98 #include <linux/netfilter_ipv4/ipchains_core.h>
99 #include <linux/netfilter_ipv4/ip_nat_core.h>
101 #include <net/checksum.h>
102 #include <linux/proc_fs.h>
103 #include <linux/stat.h>
105 MODULE_LICENSE("Dual BSD/GPL");
106 MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
107 MODULE_DESCRIPTION("ipchains backwards compatibility layer");
109 /* Understanding locking in this code: (thanks to Alan Cox for using
110 * little words to explain this to me). -- PR
112 * In UP, there can be two packets traversing the chains:
113 * 1) A packet from the current userspace context
114 * 2) A packet off the bh handlers (timer or net).
116 * For SMP (kernel v2.1+), multiply this by # CPUs.
118 * [Note that this in not correct for 2.2 - because the socket code always
119 * uses lock_kernel() to serialize, and bottom halves (timers and net_bhs)
120 * only run on one CPU at a time. This will probably change for 2.3.
121 * It is still good to use spinlocks because that avoids the global cli()
122 * for updating the tables, which is rather costly in SMP kernels -AK]
124 * This means counters and backchains can get corrupted if no precautions
125 * are taken.
127 * To actually alter a chain on UP, we need only do a cli(), as this will
128 * stop a bh handler firing, as we are in the current userspace context
129 * (coming from a setsockopt()).
131 * On SMP, we need a write_lock_irqsave(), which is a simple cli() in
132 * UP.
134 * For backchains and counters, we use an array, indexed by
135 * [smp_processor_id()*2 + !in_interrupt()]; the array is of
136 * size [NR_CPUS*2]. For v2.0, NR_CPUS is effectively 1. So,
137 * confident of uniqueness, we modify counters even though we only
138 * have a read lock (to read the counters, you need a write lock,
139 * though). */
141 /* Why I didn't use straight locking... -- PR
143 * The backchains can be separated out of the ip_chains structure, and
144 * allocated as needed inside ip_fw_check().
146 * The counters, however, can't. Trying to lock these means blocking
147 * interrupts every time we want to access them. This would suck HARD
148 * performance-wise. Not locking them leads to possible corruption,
149 * made worse on 32-bit machines (counters are 64-bit). */
151 /*#define DEBUG_IP_FIREWALL*/
152 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
153 /*#define DEBUG_IP_FIREWALL_USER*/
154 /*#define DEBUG_IP_FIREWALL_LOCKING*/
156 #if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE)
157 static struct sock *ipfwsk;
158 #endif
160 #ifdef CONFIG_SMP
161 #define SLOT_NUMBER() (smp_processor_id()*2 + !in_interrupt())
162 #else /* !SMP */
163 #define SLOT_NUMBER() (!in_interrupt())
164 #endif /* CONFIG_SMP */
165 #define NUM_SLOTS (NR_CPUS*2)
167 #define SIZEOF_STRUCT_IP_CHAIN (sizeof(struct ip_chain) \
168 + NUM_SLOTS*sizeof(struct ip_reent))
169 #define SIZEOF_STRUCT_IP_FW_KERNEL (sizeof(struct ip_fwkernel) \
170 + NUM_SLOTS*sizeof(struct ip_counters))
172 #ifdef DEBUG_IP_FIREWALL_LOCKING
173 static unsigned int fwc_rlocks, fwc_wlocks;
174 #define FWC_DEBUG_LOCK(d) \
175 do { \
176 FWC_DONT_HAVE_LOCK(d); \
177 d |= (1 << SLOT_NUMBER()); \
178 } while (0)
180 #define FWC_DEBUG_UNLOCK(d) \
181 do { \
182 FWC_HAVE_LOCK(d); \
183 d &= ~(1 << SLOT_NUMBER()); \
184 } while (0)
186 #define FWC_DONT_HAVE_LOCK(d) \
187 do { \
188 if ((d) & (1 << SLOT_NUMBER())) \
189 printk("%s:%i: Got lock on %i already!\n", \
190 __FILE__, __LINE__, SLOT_NUMBER()); \
191 } while(0)
193 #define FWC_HAVE_LOCK(d) \
194 do { \
195 if (!((d) & (1 << SLOT_NUMBER()))) \
196 printk("%s:%i:No lock on %i!\n", \
197 __FILE__, __LINE__, SLOT_NUMBER()); \
198 } while (0)
200 #else
201 #define FWC_DEBUG_LOCK(d) do { } while(0)
202 #define FWC_DEBUG_UNLOCK(d) do { } while(0)
203 #define FWC_DONT_HAVE_LOCK(d) do { } while(0)
204 #define FWC_HAVE_LOCK(d) do { } while(0)
205 #endif /*DEBUG_IP_FIRWALL_LOCKING*/
207 #define FWC_READ_LOCK(l) do { FWC_DEBUG_LOCK(fwc_rlocks); read_lock(l); } while (0)
208 #define FWC_WRITE_LOCK(l) do { FWC_DEBUG_LOCK(fwc_wlocks); write_lock(l); } while (0)
209 #define FWC_READ_LOCK_IRQ(l,f) do { FWC_DEBUG_LOCK(fwc_rlocks); read_lock_irqsave(l,f); } while (0)
210 #define FWC_WRITE_LOCK_IRQ(l,f) do { FWC_DEBUG_LOCK(fwc_wlocks); write_lock_irqsave(l,f); } while (0)
211 #define FWC_READ_UNLOCK(l) do { FWC_DEBUG_UNLOCK(fwc_rlocks); read_unlock(l); } while (0)
212 #define FWC_WRITE_UNLOCK(l) do { FWC_DEBUG_UNLOCK(fwc_wlocks); write_unlock(l); } while (0)
213 #define FWC_READ_UNLOCK_IRQ(l,f) do { FWC_DEBUG_UNLOCK(fwc_rlocks); read_unlock_irqrestore(l,f); } while (0)
214 #define FWC_WRITE_UNLOCK_IRQ(l,f) do { FWC_DEBUG_UNLOCK(fwc_wlocks); write_unlock_irqrestore(l,f); } while (0)
216 struct ip_chain;
218 struct ip_counters
220 __u64 pcnt, bcnt; /* Packet and byte counters */
223 struct ip_fwkernel
225 struct ip_fw ipfw;
226 struct ip_fwkernel *next; /* where to go next if current
227 * rule doesn't match */
228 struct ip_chain *branch; /* which branch to jump to if
229 * current rule matches */
230 int simplebranch; /* Use this if branch == NULL */
231 struct ip_counters counters[0]; /* Actually several of these */
234 struct ip_reent
236 struct ip_chain *prevchain; /* Pointer to referencing chain */
237 struct ip_fwkernel *prevrule; /* Pointer to referencing rule */
238 struct ip_counters counters;
241 struct ip_chain
243 ip_chainlabel label; /* Defines the label for each block */
244 struct ip_chain *next; /* Pointer to next block */
245 struct ip_fwkernel *chain; /* Pointer to first rule in block */
246 __u32 refcount; /* Number of refernces to block */
247 int policy; /* Default rule for chain. Only *
248 * used in built in chains */
249 struct ip_reent reent[0]; /* Actually several of these */
253 * Implement IP packet firewall
256 #ifdef DEBUG_IP_FIREWALL
257 #define dprintf(format, args...) printk(format , ## args)
258 #else
259 #define dprintf(format, args...)
260 #endif
262 #ifdef DEBUG_IP_FIREWALL_USER
263 #define duprintf(format, args...) printk(format , ## args)
264 #else
265 #define duprintf(format, args...)
266 #endif
268 /* Lock around ip_fw_chains linked list structure */
269 rwlock_t ip_fw_lock = RW_LOCK_UNLOCKED;
271 /* Head of linked list of fw rules */
272 static struct ip_chain *ip_fw_chains;
274 #define IP_FW_INPUT_CHAIN ip_fw_chains
275 #define IP_FW_FORWARD_CHAIN (ip_fw_chains->next)
276 #define IP_FW_OUTPUT_CHAIN (ip_fw_chains->next->next)
278 /* Returns 1 if the port is matched by the range, 0 otherwise */
279 extern inline int port_match(__u16 min, __u16 max, __u16 port,
280 int frag, int invert)
282 if (frag) /* Fragments fail ANY port test. */
283 return (min == 0 && max == 0xFFFF);
284 else return (port >= min && port <= max) ^ invert;
287 /* Returns whether matches rule or not. */
288 static int ip_rule_match(struct ip_fwkernel *f,
289 const char *ifname,
290 struct sk_buff **pskb,
291 char tcpsyn,
292 __u16 src_port, __u16 dst_port,
293 char isfrag)
295 struct iphdr *ip = (*pskb)->nh.iph;
297 #define FWINV(bool,invflg) ((bool) ^ !!(f->ipfw.fw_invflg & invflg))
299 * This is a bit simpler as we don't have to walk
300 * an interface chain as you do in BSD - same logic
301 * however.
304 if (FWINV((ip->saddr&f->ipfw.fw_smsk.s_addr) != f->ipfw.fw_src.s_addr,
305 IP_FW_INV_SRCIP)
306 || FWINV((ip->daddr&f->ipfw.fw_dmsk.s_addr)!=f->ipfw.fw_dst.s_addr,
307 IP_FW_INV_DSTIP)) {
308 dprintf("Source or dest mismatch.\n");
310 dprintf("SRC: %u. Mask: %u. Target: %u.%s\n", ip->saddr,
311 f->ipfw.fw_smsk.s_addr, f->ipfw.fw_src.s_addr,
312 f->ipfw.fw_invflg & IP_FW_INV_SRCIP ? " (INV)" : "");
313 dprintf("DST: %u. Mask: %u. Target: %u.%s\n", ip->daddr,
314 f->ipfw.fw_dmsk.s_addr, f->ipfw.fw_dst.s_addr,
315 f->ipfw.fw_invflg & IP_FW_INV_DSTIP ? " (INV)" : "");
316 return 0;
320 * Look for a VIA device match
322 if (f->ipfw.fw_flg & IP_FW_F_WILDIF) {
323 if (FWINV(strncmp(ifname, f->ipfw.fw_vianame,
324 strlen(f->ipfw.fw_vianame)) != 0,
325 IP_FW_INV_VIA)) {
326 dprintf("Wildcard interface mismatch.%s\n",
327 f->ipfw.fw_invflg & IP_FW_INV_VIA ? " (INV)" : "");
328 return 0; /* Mismatch */
331 else if (FWINV(strcmp(ifname, f->ipfw.fw_vianame) != 0,
332 IP_FW_INV_VIA)) {
333 dprintf("Interface name does not match.%s\n",
334 f->ipfw.fw_invflg & IP_FW_INV_VIA
335 ? " (INV)" : "");
336 return 0; /* Mismatch */
340 * Ok the chain addresses match.
343 /* If we have a fragment rule but the packet is not a fragment
344 * the we return zero */
345 if (FWINV((f->ipfw.fw_flg&IP_FW_F_FRAG) && !isfrag, IP_FW_INV_FRAG)) {
346 dprintf("Fragment rule but not fragment.%s\n",
347 f->ipfw.fw_invflg & IP_FW_INV_FRAG ? " (INV)" : "");
348 return 0;
351 /* Fragment NEVER passes a SYN test, even an inverted one. */
352 if (FWINV((f->ipfw.fw_flg&IP_FW_F_TCPSYN) && !tcpsyn, IP_FW_INV_SYN)
353 || (isfrag && (f->ipfw.fw_flg&IP_FW_F_TCPSYN))) {
354 dprintf("Rule requires SYN and packet has no SYN.%s\n",
355 f->ipfw.fw_invflg & IP_FW_INV_SYN ? " (INV)" : "");
356 return 0;
359 if (f->ipfw.fw_proto) {
361 * Specific firewall - packet's protocol
362 * must match firewall's.
365 if (FWINV(ip->protocol!=f->ipfw.fw_proto, IP_FW_INV_PROTO)) {
366 dprintf("Packet protocol %hi does not match %hi.%s\n",
367 ip->protocol, f->ipfw.fw_proto,
368 f->ipfw.fw_invflg&IP_FW_INV_PROTO ? " (INV)":"");
369 return 0;
372 /* For non TCP/UDP/ICMP, port range is max anyway. */
373 if (!port_match(f->ipfw.fw_spts[0],
374 f->ipfw.fw_spts[1],
375 src_port, isfrag,
376 !!(f->ipfw.fw_invflg&IP_FW_INV_SRCPT))
377 || !port_match(f->ipfw.fw_dpts[0],
378 f->ipfw.fw_dpts[1],
379 dst_port, isfrag,
380 !!(f->ipfw.fw_invflg
381 &IP_FW_INV_DSTPT))) {
382 dprintf("Port match failed.\n");
383 return 0;
387 dprintf("Match succeeded.\n");
388 return 1;
391 static const char *branchname(struct ip_chain *branch,int simplebranch)
393 if (branch)
394 return branch->label;
395 switch (simplebranch)
397 case FW_BLOCK: return IP_FW_LABEL_BLOCK;
398 case FW_ACCEPT: return IP_FW_LABEL_ACCEPT;
399 case FW_REJECT: return IP_FW_LABEL_REJECT;
400 case FW_REDIRECT: return IP_FW_LABEL_REDIRECT;
401 case FW_MASQUERADE: return IP_FW_LABEL_MASQUERADE;
402 case FW_SKIP: return "-";
403 case FW_SKIP+1: return IP_FW_LABEL_RETURN;
404 default:
405 return "UNKNOWN";
410 * VERY ugly piece of code which actually
411 * makes kernel printf for matching packets...
413 static void dump_packet(struct sk_buff **pskb,
414 const char *ifname,
415 struct ip_fwkernel *f,
416 const ip_chainlabel chainlabel,
417 __u16 src_port,
418 __u16 dst_port,
419 unsigned int count,
420 int syn)
422 __u32 *opt = (__u32 *) ((*pskb)->nh.iph + 1);
423 int opti;
425 if (f) {
426 printk(KERN_INFO "Packet log: %s ",chainlabel);
427 printk("%s ",branchname(f->branch,f->simplebranch));
428 if (f->simplebranch==FW_REDIRECT)
429 printk("%d ",f->ipfw.fw_redirpt);
432 printk("%s PROTO=%d %u.%u.%u.%u:%hu %u.%u.%u.%u:%hu"
433 " L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu",
434 ifname, (*pskb)->nh.iph->protocol,
435 NIPQUAD((*pskb)->nh.iph->saddr),
436 src_port,
437 NIPQUAD((*pskb)->nh.iph->daddr),
438 dst_port,
439 ntohs((*pskb)->nh.iph->tot_len),
440 (*pskb)->nh.iph->tos,
441 ntohs((*pskb)->nh.iph->id),
442 ntohs((*pskb)->nh.iph->frag_off),
443 (*pskb)->nh.iph->ttl);
445 for (opti = 0; opti < ((*pskb)->nh.iph->ihl - sizeof(struct iphdr) / 4); opti++)
446 printk(" O=0x%8.8X", *opt++);
447 printk(" %s(#%d)\n", syn ? "SYN " : /* "PENANCE" */ "", count);
450 /* function for checking chain labels for user space. */
451 static int check_label(ip_chainlabel label)
453 unsigned int i;
454 /* strlen must be < IP_FW_MAX_LABEL_LENGTH. */
455 for (i = 0; i < IP_FW_MAX_LABEL_LENGTH + 1; i++)
456 if (label[i] == '\0') return 1;
458 return 0;
461 /* This function returns a pointer to the first chain with a label
462 * that matches the one given. */
463 static struct ip_chain *find_label(ip_chainlabel label)
465 struct ip_chain *tmp;
466 FWC_HAVE_LOCK(fwc_rlocks | fwc_wlocks);
467 for (tmp = ip_fw_chains; tmp; tmp = tmp->next)
468 if (strcmp(tmp->label,label) == 0)
469 break;
470 return tmp;
473 /* This function returns a boolean which when true sets answer to one
474 of the FW_*. */
475 static int find_special(ip_chainlabel label, int *answer)
477 if (label[0] == '\0') {
478 *answer = FW_SKIP; /* => pass-through rule */
479 return 1;
480 } else if (strcmp(label,IP_FW_LABEL_ACCEPT) == 0) {
481 *answer = FW_ACCEPT;
482 return 1;
483 } else if (strcmp(label,IP_FW_LABEL_BLOCK) == 0) {
484 *answer = FW_BLOCK;
485 return 1;
486 } else if (strcmp(label,IP_FW_LABEL_REJECT) == 0) {
487 *answer = FW_REJECT;
488 return 1;
489 } else if (strcmp(label,IP_FW_LABEL_REDIRECT) == 0) {
490 *answer = FW_REDIRECT;
491 return 1;
492 } else if (strcmp(label,IP_FW_LABEL_MASQUERADE) == 0) {
493 *answer = FW_MASQUERADE;
494 return 1;
495 } else if (strcmp(label, IP_FW_LABEL_RETURN) == 0) {
496 *answer = FW_SKIP+1;
497 return 1;
498 } else {
499 return 0;
503 /* This function cleans up the prevchain and prevrule. If the verbose
504 * flag is set then he names of the chains will be printed as it
505 * cleans up. */
506 static void cleanup(struct ip_chain *chain,
507 const int verbose,
508 unsigned int slot)
510 struct ip_chain *tmpchain = chain->reent[slot].prevchain;
511 if (verbose)
512 printk(KERN_ERR "Chain backtrace: ");
513 while (tmpchain) {
514 if (verbose)
515 printk("%s<-",chain->label);
516 chain->reent[slot].prevchain = NULL;
517 chain = tmpchain;
518 tmpchain = chain->reent[slot].prevchain;
520 if (verbose)
521 printk("%s\n",chain->label);
524 static inline int
525 ip_fw_domatch(struct ip_fwkernel *f,
526 const char *rif,
527 const ip_chainlabel label,
528 struct sk_buff **pskb,
529 unsigned int slot,
530 __u16 src_port, __u16 dst_port,
531 unsigned int count,
532 int tcpsyn,
533 unsigned char *tos)
535 f->counters[slot].bcnt+=ntohs((*pskb)->nh.iph->tot_len);
536 f->counters[slot].pcnt++;
537 if (f->ipfw.fw_flg & IP_FW_F_PRN) {
538 dump_packet(pskb,rif,f,label,src_port,dst_port,count,tcpsyn);
541 *tos = (*tos & f->ipfw.fw_tosand) ^ f->ipfw.fw_tosxor;
543 /* This functionality is useless in stock 2.0.x series, but we don't
544 * discard the mark thing altogether, to avoid breaking ipchains (and,
545 * more importantly, the ipfwadm wrapper) --PR */
546 if (f->ipfw.fw_flg & IP_FW_F_MARKABS) {
547 (*pskb)->nfmark = f->ipfw.fw_mark;
548 } else {
549 (*pskb)->nfmark += f->ipfw.fw_mark;
551 if (f->ipfw.fw_flg & IP_FW_F_NETLINK) {
552 #if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE)
553 size_t len = min_t(unsigned int, f->ipfw.fw_outputsize, ntohs((*pskb)->nh.iph->tot_len))
554 + sizeof(__u32) + sizeof((*pskb)->nfmark) + IFNAMSIZ;
555 struct sk_buff *outskb=alloc_skb(len, GFP_ATOMIC);
557 duprintf("Sending packet out NETLINK (length = %u).\n",
558 (unsigned int)len);
559 if (outskb) {
560 /* Prepend length, mark & interface */
561 skb_put(outskb, len);
562 *((__u32 *)outskb->data) = (__u32)len;
563 *((__u32 *)(outskb->data+sizeof(__u32))) =
564 (*pskb)->nfmark;
565 strcpy(outskb->data+sizeof(__u32)*2, rif);
566 skb_copy_bits(*pskb,
567 ((char *)(*pskb)->nh.iph - (char *)(*pskb)->data),
568 outskb->data+sizeof(__u32)*2+IFNAMSIZ,
569 len-(sizeof(__u32)*2+IFNAMSIZ));
570 netlink_broadcast(ipfwsk, outskb, 0, ~0, GFP_ATOMIC);
572 else {
573 #endif
574 if (net_ratelimit())
575 printk(KERN_WARNING "ip_fw: packet drop due to "
576 "netlink failure\n");
577 return 0;
578 #if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE)
580 #endif
582 return 1;
586 * Returns one of the generic firewall policies, like FW_ACCEPT.
588 * The testing is either false for normal firewall mode or true for
589 * user checking mode (counters are not updated, TOS & mark not done).
591 static int
592 ip_fw_check(const char *rif,
593 __u16 *redirport,
594 struct ip_chain *chain,
595 struct sk_buff **pskb,
596 unsigned int slot,
597 int testing)
599 __u32 src, dst;
600 __u16 src_port = 0xFFFF, dst_port = 0xFFFF;
601 char tcpsyn=0;
602 __u16 offset;
603 unsigned char tos;
604 struct ip_fwkernel *f;
605 int ret = FW_SKIP+2;
606 unsigned int count;
608 /* We handle fragments by dealing with the first fragment as
609 * if it was a normal packet. All other fragments are treated
610 * normally, except that they will NEVER match rules that ask
611 * things we don't know, ie. tcp syn flag or ports). If the
612 * rule is also a fragment-specific rule, non-fragments won't
613 * match it. */
615 offset = ntohs((*pskb)->nh.iph->frag_off) & IP_OFFSET;
618 * Don't allow a fragment of TCP 8 bytes in. Nobody
619 * normal causes this. Its a cracker trying to break
620 * in by doing a flag overwrite to pass the direction
621 * checks.
623 if (offset == 1 && (*pskb)->nh.iph->protocol == IPPROTO_TCP) {
624 if (!testing && net_ratelimit()) {
625 printk("Suspect TCP fragment.\n");
626 dump_packet(pskb,rif,NULL,NULL,0,0,0,0);
628 return FW_BLOCK;
631 /* If we can't investigate ports, treat as fragment. It's
632 * either a trucated whole packet, or a truncated first
633 * fragment, or a TCP first fragment of length 8-15, in which
634 * case the above rule stops reassembly.
636 if (offset == 0) {
637 unsigned int size_req;
638 switch ((*pskb)->nh.iph->protocol) {
639 case IPPROTO_TCP:
640 /* Don't care about things past flags word */
641 size_req = 16;
642 break;
644 case IPPROTO_UDP:
645 case IPPROTO_ICMP:
646 size_req = 8;
647 break;
649 default:
650 size_req = 0;
653 /* If it is a truncated first fragment then it can be
654 * used to rewrite port information, and thus should
655 * be blocked.
657 if (ntohs((*pskb)->nh.iph->tot_len) <
658 ((*pskb)->nh.iph->ihl<<2)+size_req) {
659 if (!testing && net_ratelimit()) {
660 printk("Suspect short first fragment.\n");
661 dump_packet(pskb,rif,NULL,NULL,0,0,0,0);
663 return FW_BLOCK;
667 src = (*pskb)->nh.iph->saddr;
668 dst = (*pskb)->nh.iph->daddr;
669 tos = (*pskb)->nh.iph->tos;
672 * If we got interface from which packet came
673 * we can use the address directly. Linux 2.1 now uses address
674 * chains per device too, but unlike BSD we first check if the
675 * incoming packet matches a device address and the routing
676 * table before calling the firewall.
679 dprintf("Packet ");
680 switch ((*pskb)->nh.iph->protocol) {
681 case IPPROTO_TCP:
682 dprintf("TCP ");
683 if (!offset) {
684 struct tcphdr _tcph, *th;
686 th = skb_header_pointer(*pskb,
687 (*pskb)->nh.iph->ihl*4,
688 sizeof(_tcph), &_tcph);
689 if (th == NULL)
690 return FW_BLOCK;
692 src_port = ntohs(th->source);
693 dst_port = ntohs(th->dest);
695 /* Connection initilisation can only
696 * be made when the syn bit is set and
697 * neither of the ack or reset is
698 * set. */
699 if (th->syn && !(th->ack || th->rst))
700 tcpsyn = 1;
702 break;
703 case IPPROTO_UDP:
704 dprintf("UDP ");
705 if (!offset) {
706 struct udphdr _udph, *uh;
708 uh = skb_header_pointer(*pskb,
709 (*pskb)->nh.iph->ihl*4,
710 sizeof(_udph), &_udph);
711 if (uh == NULL)
712 return FW_BLOCK;
714 src_port = ntohs(uh->source);
715 dst_port = ntohs(uh->dest);
717 break;
718 case IPPROTO_ICMP:
719 if (!offset) {
720 struct icmphdr _icmph, *ic;
722 ic = skb_header_pointer(*pskb,
723 (*pskb)->nh.iph->ihl*4,
724 sizeof(_icmph),
725 &_icmph);
726 if (ic == NULL)
727 return FW_BLOCK;
729 src_port = (__u16) ic->type;
730 dst_port = (__u16) ic->code;
732 dprintf("ICMP ");
733 break;
734 default:
735 dprintf("p=%d ", (*pskb)->nh.iph->protocol);
736 break;
738 #ifdef DEBUG_IP_FIREWALL
739 print_ip((*pskb)->nh.iph->saddr);
741 if (offset)
742 dprintf(":fragment (%i) ", ((int)offset)<<2);
743 else if ((*pskb)->nh.iph->protocol == IPPROTO_TCP ||
744 (*pskb)->nh.iph->protocol == IPPROTO_UDP ||
745 (*pskb)->nh.iph->protocol == IPPROTO_ICMP)
746 dprintf(":%hu:%hu", src_port, dst_port);
747 dprintf("\n");
748 #endif
750 if (!testing) FWC_READ_LOCK(&ip_fw_lock);
751 else FWC_HAVE_LOCK(fwc_rlocks);
753 f = chain->chain;
754 do {
755 count = 0;
756 for (; f; f = f->next) {
757 count++;
758 if (ip_rule_match(f, rif, pskb,
759 tcpsyn, src_port, dst_port,
760 offset)) {
761 if (!testing
762 && !ip_fw_domatch(f, rif, chain->label,
763 pskb, slot,
764 src_port, dst_port,
765 count, tcpsyn, &tos)) {
766 ret = FW_BLOCK;
767 cleanup(chain, 0, slot);
768 goto out;
770 break;
773 if (f) {
774 if (f->branch) {
775 /* Do sanity check to see if we have
776 * already set prevchain and if so we
777 * must be in a loop */
778 if (f->branch->reent[slot].prevchain) {
779 if (!testing) {
780 printk(KERN_ERR
781 "IP firewall: "
782 "Loop detected "
783 "at `%s'.\n",
784 f->branch->label);
785 cleanup(chain, 1, slot);
786 ret = FW_BLOCK;
787 } else {
788 cleanup(chain, 0, slot);
789 ret = FW_SKIP+1;
792 else {
793 f->branch->reent[slot].prevchain
794 = chain;
795 f->branch->reent[slot].prevrule
796 = f->next;
797 chain = f->branch;
798 f = chain->chain;
801 else if (f->simplebranch == FW_SKIP)
802 f = f->next;
803 else if (f->simplebranch == FW_SKIP+1) {
804 /* Just like falling off the chain */
805 goto fall_off_chain;
806 } else {
807 cleanup(chain, 0, slot);
808 ret = f->simplebranch;
810 } /* f == NULL */
811 else {
812 fall_off_chain:
813 if (chain->reent[slot].prevchain) {
814 struct ip_chain *tmp = chain;
815 f = chain->reent[slot].prevrule;
816 chain = chain->reent[slot].prevchain;
817 tmp->reent[slot].prevchain = NULL;
819 else {
820 ret = chain->policy;
821 if (!testing) {
822 chain->reent[slot].counters.pcnt++;
823 chain->reent[slot].counters.bcnt
824 += ntohs((*pskb)->nh.iph->tot_len);
828 } while (ret == FW_SKIP+2);
830 out:
831 if (!testing) FWC_READ_UNLOCK(&ip_fw_lock);
833 /* Recalculate checksum if not going to reject, and TOS changed. */
834 if ((*pskb)->nh.iph->tos != tos
835 && ret != FW_REJECT && ret != FW_BLOCK
836 && !testing) {
837 if (!skb_ip_make_writable(pskb, offsetof(struct iphdr, tos)+1))
838 ret = FW_BLOCK;
839 else {
840 (*pskb)->nh.iph->tos = tos;
841 ip_send_check((*pskb)->nh.iph);
845 if (ret == FW_REDIRECT && redirport) {
846 if ((*redirport = htons(f->ipfw.fw_redirpt)) == 0) {
847 /* Wildcard redirection.
848 * Note that redirport will become
849 * 0xFFFF for non-TCP/UDP packets.
851 *redirport = htons(dst_port);
855 #ifdef DEBUG_ALLOW_ALL
856 return (testing ? ret : FW_ACCEPT);
857 #else
858 return ret;
859 #endif
862 /* Must have write lock & interrupts off for any of these */
864 /* This function sets all the byte counters in a chain to zero. The
865 * input is a pointer to the chain required for zeroing */
866 static int zero_fw_chain(struct ip_chain *chainptr)
868 struct ip_fwkernel *i;
870 FWC_HAVE_LOCK(fwc_wlocks);
871 for (i = chainptr->chain; i; i = i->next)
872 memset(i->counters, 0, sizeof(struct ip_counters)*NUM_SLOTS);
873 return 0;
876 static int clear_fw_chain(struct ip_chain *chainptr)
878 struct ip_fwkernel *i= chainptr->chain;
880 FWC_HAVE_LOCK(fwc_wlocks);
881 chainptr->chain=NULL;
883 while (i) {
884 struct ip_fwkernel *tmp = i->next;
885 if (i->branch)
886 i->branch->refcount--;
887 kfree(i);
888 i = tmp;
889 /* We will block in cleanup's unregister sockopt if unloaded,
890 so this is safe. */
891 module_put(THIS_MODULE);
893 return 0;
896 static int replace_in_chain(struct ip_chain *chainptr,
897 struct ip_fwkernel *frwl,
898 __u32 position)
900 struct ip_fwkernel *f = chainptr->chain;
902 FWC_HAVE_LOCK(fwc_wlocks);
904 while (--position && f != NULL) f = f->next;
905 if (f == NULL)
906 return EINVAL;
908 if (f->branch) f->branch->refcount--;
909 if (frwl->branch) frwl->branch->refcount++;
911 frwl->next = f->next;
912 memcpy(f,frwl,sizeof(struct ip_fwkernel));
913 kfree(frwl);
914 return 0;
917 static int append_to_chain(struct ip_chain *chainptr, struct ip_fwkernel *rule)
919 struct ip_fwkernel *i;
921 FWC_HAVE_LOCK(fwc_wlocks);
923 /* Are we unloading now? We will block on nf_unregister_sockopt */
924 if (!try_module_get(THIS_MODULE))
925 return ENOPROTOOPT;
927 /* Special case if no rules already present */
928 if (chainptr->chain == NULL) {
930 /* If pointer writes are atomic then turning off
931 * interrupts is not necessary. */
932 chainptr->chain = rule;
933 if (rule->branch) rule->branch->refcount++;
934 goto append_successful;
937 /* Find the rule before the end of the chain */
938 for (i = chainptr->chain; i->next; i = i->next);
939 i->next = rule;
940 if (rule->branch) rule->branch->refcount++;
942 append_successful:
943 return 0;
946 /* This function inserts a rule at the position of position in the
947 * chain refenced by chainptr. If position is 1 then this rule will
948 * become the new rule one. */
949 static int insert_in_chain(struct ip_chain *chainptr,
950 struct ip_fwkernel *frwl,
951 __u32 position)
953 struct ip_fwkernel *f = chainptr->chain;
955 FWC_HAVE_LOCK(fwc_wlocks);
957 /* Are we unloading now? We will block on nf_unregister_sockopt */
958 if (!try_module_get(THIS_MODULE))
959 return ENOPROTOOPT;
961 /* special case if the position is number 1 */
962 if (position == 1) {
963 frwl->next = chainptr->chain;
964 if (frwl->branch) frwl->branch->refcount++;
965 chainptr->chain = frwl;
966 goto insert_successful;
968 position--;
969 while (--position && f != NULL) f = f->next;
970 if (f == NULL)
971 return EINVAL;
972 if (frwl->branch) frwl->branch->refcount++;
973 frwl->next = f->next;
975 f->next = frwl;
977 insert_successful:
978 return 0;
981 /* This function deletes the a rule from a given rulenum and chain.
982 * With rulenum = 1 is the first rule is deleted. */
984 static int del_num_from_chain(struct ip_chain *chainptr, __u32 rulenum)
986 struct ip_fwkernel *i=chainptr->chain,*tmp;
988 FWC_HAVE_LOCK(fwc_wlocks);
990 if (!chainptr->chain)
991 return ENOENT;
993 /* Need a special case for the first rule */
994 if (rulenum == 1) {
995 /* store temp to allow for freeing up of memory */
996 tmp = chainptr->chain;
997 if (chainptr->chain->branch) chainptr->chain->branch->refcount--;
998 chainptr->chain = chainptr->chain->next;
999 kfree(tmp); /* free memory that is now unused */
1000 } else {
1001 rulenum--;
1002 while (--rulenum && i->next ) i = i->next;
1003 if (!i->next)
1004 return ENOENT;
1005 tmp = i->next;
1006 if (i->next->branch)
1007 i->next->branch->refcount--;
1008 i->next = i->next->next;
1009 kfree(tmp);
1012 /* We will block in cleanup's unregister sockopt if unloaded,
1013 so this is safe. */
1014 module_put(THIS_MODULE);
1015 return 0;
1019 /* This function deletes the a rule from a given rule and chain.
1020 * The rule that is deleted is the first occursance of that rule. */
1021 static int del_rule_from_chain(struct ip_chain *chainptr,
1022 struct ip_fwkernel *frwl)
1024 struct ip_fwkernel *ltmp,*ftmp = chainptr->chain ;
1025 int was_found;
1027 FWC_HAVE_LOCK(fwc_wlocks);
1029 /* Sure, we should compare marks, but since the `ipfwadm'
1030 * script uses it for an unholy hack... well, life is easier
1031 * this way. We also mask it out of the flags word. --PR */
1032 for (ltmp=NULL, was_found=0;
1033 !was_found && ftmp != NULL;
1034 ltmp = ftmp,ftmp = ftmp->next) {
1035 if (ftmp->ipfw.fw_src.s_addr!=frwl->ipfw.fw_src.s_addr
1036 || ftmp->ipfw.fw_dst.s_addr!=frwl->ipfw.fw_dst.s_addr
1037 || ftmp->ipfw.fw_smsk.s_addr!=frwl->ipfw.fw_smsk.s_addr
1038 || ftmp->ipfw.fw_dmsk.s_addr!=frwl->ipfw.fw_dmsk.s_addr
1039 #if 0
1040 || ftmp->ipfw.fw_flg!=frwl->ipfw.fw_flg
1041 #else
1042 || ((ftmp->ipfw.fw_flg & ~IP_FW_F_MARKABS)
1043 != (frwl->ipfw.fw_flg & ~IP_FW_F_MARKABS))
1044 #endif
1045 || ftmp->ipfw.fw_invflg!=frwl->ipfw.fw_invflg
1046 || ftmp->ipfw.fw_proto!=frwl->ipfw.fw_proto
1047 #if 0
1048 || ftmp->ipfw.fw_mark!=frwl->ipfw.fw_mark
1049 #endif
1050 || ftmp->ipfw.fw_redirpt!=frwl->ipfw.fw_redirpt
1051 || ftmp->ipfw.fw_spts[0]!=frwl->ipfw.fw_spts[0]
1052 || ftmp->ipfw.fw_spts[1]!=frwl->ipfw.fw_spts[1]
1053 || ftmp->ipfw.fw_dpts[0]!=frwl->ipfw.fw_dpts[0]
1054 || ftmp->ipfw.fw_dpts[1]!=frwl->ipfw.fw_dpts[1]
1055 || ftmp->ipfw.fw_outputsize!=frwl->ipfw.fw_outputsize) {
1056 duprintf("del_rule_from_chain: mismatch:"
1057 "src:%u/%u dst:%u/%u smsk:%u/%u dmsk:%u/%u "
1058 "flg:%hX/%hX invflg:%hX/%hX proto:%u/%u "
1059 "mark:%u/%u "
1060 "ports:%hu-%hu/%hu-%hu %hu-%hu/%hu-%hu "
1061 "outputsize:%hu-%hu\n",
1062 ftmp->ipfw.fw_src.s_addr,
1063 frwl->ipfw.fw_src.s_addr,
1064 ftmp->ipfw.fw_dst.s_addr,
1065 frwl->ipfw.fw_dst.s_addr,
1066 ftmp->ipfw.fw_smsk.s_addr,
1067 frwl->ipfw.fw_smsk.s_addr,
1068 ftmp->ipfw.fw_dmsk.s_addr,
1069 frwl->ipfw.fw_dmsk.s_addr,
1070 ftmp->ipfw.fw_flg,
1071 frwl->ipfw.fw_flg,
1072 ftmp->ipfw.fw_invflg,
1073 frwl->ipfw.fw_invflg,
1074 ftmp->ipfw.fw_proto,
1075 frwl->ipfw.fw_proto,
1076 ftmp->ipfw.fw_mark,
1077 frwl->ipfw.fw_mark,
1078 ftmp->ipfw.fw_spts[0],
1079 frwl->ipfw.fw_spts[0],
1080 ftmp->ipfw.fw_spts[1],
1081 frwl->ipfw.fw_spts[1],
1082 ftmp->ipfw.fw_dpts[0],
1083 frwl->ipfw.fw_dpts[0],
1084 ftmp->ipfw.fw_dpts[1],
1085 frwl->ipfw.fw_dpts[1],
1086 ftmp->ipfw.fw_outputsize,
1087 frwl->ipfw.fw_outputsize);
1088 continue;
1091 if (strncmp(ftmp->ipfw.fw_vianame,
1092 frwl->ipfw.fw_vianame,
1093 IFNAMSIZ)) {
1094 duprintf("del_rule_from_chain: if mismatch: %s/%s\n",
1095 ftmp->ipfw.fw_vianame,
1096 frwl->ipfw.fw_vianame);
1097 continue;
1099 if (ftmp->branch != frwl->branch) {
1100 duprintf("del_rule_from_chain: branch mismatch: "
1101 "%s/%s\n",
1102 ftmp->branch?ftmp->branch->label:"(null)",
1103 frwl->branch?frwl->branch->label:"(null)");
1104 continue;
1106 if (ftmp->branch == NULL
1107 && ftmp->simplebranch != frwl->simplebranch) {
1108 duprintf("del_rule_from_chain: simplebranch mismatch: "
1109 "%i/%i\n",
1110 ftmp->simplebranch, frwl->simplebranch);
1111 continue;
1113 was_found = 1;
1114 if (ftmp->branch)
1115 ftmp->branch->refcount--;
1116 if (ltmp)
1117 ltmp->next = ftmp->next;
1118 else
1119 chainptr->chain = ftmp->next;
1120 kfree(ftmp);
1121 /* We will block in cleanup's unregister sockopt if unloaded,
1122 so this is safe. */
1123 module_put(THIS_MODULE);
1124 break;
1127 if (was_found)
1128 return 0;
1129 else {
1130 duprintf("del_rule_from_chain: no matching rule found\n");
1131 return EINVAL;
1135 /* This function takes the label of a chain and deletes the first
1136 * chain with that name. No special cases required for the built in
1137 * chains as they have their refcount initilised to 1 so that they are
1138 * never deleted. */
1139 static int del_chain(ip_chainlabel label)
1141 struct ip_chain *tmp,*tmp2;
1143 FWC_HAVE_LOCK(fwc_wlocks);
1144 /* Corner case: return EBUSY not ENOENT for first elem ("input") */
1145 if (strcmp(label, ip_fw_chains->label) == 0)
1146 return EBUSY;
1148 for (tmp = ip_fw_chains; tmp->next; tmp = tmp->next)
1149 if(strcmp(tmp->next->label,label) == 0)
1150 break;
1152 tmp2 = tmp->next;
1153 if (!tmp2)
1154 return ENOENT;
1156 if (tmp2->refcount)
1157 return EBUSY;
1159 if (tmp2->chain)
1160 return ENOTEMPTY;
1162 tmp->next = tmp2->next;
1163 kfree(tmp2);
1165 /* We will block in cleanup's unregister sockopt if unloaded,
1166 so this is safe. */
1167 module_put(THIS_MODULE);
1168 return 0;
1171 /* This is a function to initilise a chain. Built in rules start with
1172 * refcount = 1 so that they cannot be deleted. User defined rules
1173 * start with refcount = 0 so they can be deleted. */
1174 static struct ip_chain *ip_init_chain(ip_chainlabel name,
1175 __u32 ref,
1176 int policy)
1178 unsigned int i;
1179 struct ip_chain *label
1180 = kmalloc(SIZEOF_STRUCT_IP_CHAIN, GFP_KERNEL);
1181 if (label == NULL)
1182 panic("Can't kmalloc for firewall chains.\n");
1183 strcpy(label->label,name);
1184 label->next = NULL;
1185 label->chain = NULL;
1186 label->refcount = ref;
1187 label->policy = policy;
1188 for (i = 0; i < NUM_SLOTS; i++) {
1189 label->reent[i].counters.pcnt = label->reent[i].counters.bcnt
1190 = 0;
1191 label->reent[i].prevchain = NULL;
1192 label->reent[i].prevrule = NULL;
1195 return label;
1198 /* This is a function for reating a new chain. The chains is not
1199 * created if a chain of the same name already exists */
1200 static int create_chain(ip_chainlabel label)
1202 struct ip_chain *tmp;
1204 if (!check_label(label))
1205 return EINVAL;
1207 FWC_HAVE_LOCK(fwc_wlocks);
1208 for (tmp = ip_fw_chains; tmp->next; tmp = tmp->next)
1209 if (strcmp(tmp->label,label) == 0)
1210 return EEXIST;
1212 if (strcmp(tmp->label,label) == 0)
1213 return EEXIST;
1215 /* Are we unloading now? We will block on nf_unregister_sockopt */
1216 if (!try_module_get(THIS_MODULE))
1217 return ENOPROTOOPT;
1219 tmp->next = ip_init_chain(label, 0, FW_SKIP); /* refcount is
1220 * zero since this is a
1221 * user defined chain *
1222 * and therefore can be
1223 * deleted */
1224 return 0;
1227 /* This function simply changes the policy on one of the built in
1228 * chains. checking must be done before this is call to ensure that
1229 * chainptr is pointing to one of the three possible chains */
1230 static int change_policy(struct ip_chain *chainptr, int policy)
1232 FWC_HAVE_LOCK(fwc_wlocks);
1233 chainptr->policy = policy;
1234 return 0;
1237 /* This function takes an ip_fwuser and converts it to a ip_fwkernel. It also
1238 * performs some checks in the structure. */
1239 static struct ip_fwkernel *convert_ipfw(struct ip_fwuser *fwuser, int *errno)
1241 struct ip_fwkernel *fwkern;
1243 if ( (fwuser->ipfw.fw_flg & ~IP_FW_F_MASK) != 0 ) {
1244 duprintf("convert_ipfw: undefined flag bits set (flags=%x)\n",
1245 fwuser->ipfw.fw_flg);
1246 *errno = EINVAL;
1247 return NULL;
1250 #ifdef DEBUG_IP_FIREWALL_USER
1251 /* These are sanity checks that don't really matter.
1252 * We can get rid of these once testing is complete.
1254 if ((fwuser->ipfw.fw_flg & IP_FW_F_TCPSYN)
1255 && ((fwuser->ipfw.fw_invflg & IP_FW_INV_PROTO)
1256 || fwuser->ipfw.fw_proto != IPPROTO_TCP)) {
1257 duprintf("convert_ipfw: TCP SYN flag set but proto != TCP!\n");
1258 *errno = EINVAL;
1259 return NULL;
1262 if (strcmp(fwuser->label, IP_FW_LABEL_REDIRECT) != 0
1263 && fwuser->ipfw.fw_redirpt != 0) {
1264 duprintf("convert_ipfw: Target not REDIR but redirpt != 0!\n");
1265 *errno = EINVAL;
1266 return NULL;
1269 if ((!(fwuser->ipfw.fw_flg & IP_FW_F_FRAG)
1270 && (fwuser->ipfw.fw_invflg & IP_FW_INV_FRAG))
1271 || (!(fwuser->ipfw.fw_flg & IP_FW_F_TCPSYN)
1272 && (fwuser->ipfw.fw_invflg & IP_FW_INV_SYN))) {
1273 duprintf("convert_ipfw: Can't have INV flag if flag unset!\n");
1274 *errno = EINVAL;
1275 return NULL;
1278 if (((fwuser->ipfw.fw_invflg & IP_FW_INV_SRCPT)
1279 && fwuser->ipfw.fw_spts[0] == 0
1280 && fwuser->ipfw.fw_spts[1] == 0xFFFF)
1281 || ((fwuser->ipfw.fw_invflg & IP_FW_INV_DSTPT)
1282 && fwuser->ipfw.fw_dpts[0] == 0
1283 && fwuser->ipfw.fw_dpts[1] == 0xFFFF)
1284 || ((fwuser->ipfw.fw_invflg & IP_FW_INV_VIA)
1285 && (fwuser->ipfw.fw_vianame)[0] == '\0')
1286 || ((fwuser->ipfw.fw_invflg & IP_FW_INV_SRCIP)
1287 && fwuser->ipfw.fw_smsk.s_addr == 0)
1288 || ((fwuser->ipfw.fw_invflg & IP_FW_INV_DSTIP)
1289 && fwuser->ipfw.fw_dmsk.s_addr == 0)) {
1290 duprintf("convert_ipfw: INV flag makes rule unmatchable!\n");
1291 *errno = EINVAL;
1292 return NULL;
1295 if ((fwuser->ipfw.fw_flg & IP_FW_F_FRAG)
1296 && !(fwuser->ipfw.fw_invflg & IP_FW_INV_FRAG)
1297 && (fwuser->ipfw.fw_spts[0] != 0
1298 || fwuser->ipfw.fw_spts[1] != 0xFFFF
1299 || fwuser->ipfw.fw_dpts[0] != 0
1300 || fwuser->ipfw.fw_dpts[1] != 0xFFFF
1301 || (fwuser->ipfw.fw_flg & IP_FW_F_TCPSYN))) {
1302 duprintf("convert_ipfw: Can't test ports or SYN with frag!\n");
1303 *errno = EINVAL;
1304 return NULL;
1306 #endif
1308 if ((fwuser->ipfw.fw_spts[0] != 0
1309 || fwuser->ipfw.fw_spts[1] != 0xFFFF
1310 || fwuser->ipfw.fw_dpts[0] != 0
1311 || fwuser->ipfw.fw_dpts[1] != 0xFFFF)
1312 && ((fwuser->ipfw.fw_invflg & IP_FW_INV_PROTO)
1313 || (fwuser->ipfw.fw_proto != IPPROTO_TCP
1314 && fwuser->ipfw.fw_proto != IPPROTO_UDP
1315 && fwuser->ipfw.fw_proto != IPPROTO_ICMP))) {
1316 duprintf("convert_ipfw: Can only test ports for TCP/UDP/ICMP!\n");
1317 *errno = EINVAL;
1318 return NULL;
1321 fwkern = kmalloc(SIZEOF_STRUCT_IP_FW_KERNEL, GFP_ATOMIC);
1322 if (!fwkern) {
1323 duprintf("convert_ipfw: kmalloc failed!\n");
1324 *errno = ENOMEM;
1325 return NULL;
1327 memcpy(&fwkern->ipfw,&fwuser->ipfw,sizeof(struct ip_fw));
1329 if (!find_special(fwuser->label, &fwkern->simplebranch)) {
1330 fwkern->branch = find_label(fwuser->label);
1331 if (!fwkern->branch) {
1332 duprintf("convert_ipfw: chain doesn't exist `%s'.\n",
1333 fwuser->label);
1334 kfree(fwkern);
1335 *errno = ENOENT;
1336 return NULL;
1337 } else if (fwkern->branch == IP_FW_INPUT_CHAIN
1338 || fwkern->branch == IP_FW_FORWARD_CHAIN
1339 || fwkern->branch == IP_FW_OUTPUT_CHAIN) {
1340 duprintf("convert_ipfw: Can't branch to builtin chain `%s'.\n",
1341 fwuser->label);
1342 kfree(fwkern);
1343 *errno = ENOENT;
1344 return NULL;
1346 } else
1347 fwkern->branch = NULL;
1348 memset(fwkern->counters, 0, sizeof(struct ip_counters)*NUM_SLOTS);
1350 /* Handle empty vianame by making it a wildcard */
1351 if ((fwkern->ipfw.fw_vianame)[0] == '\0')
1352 fwkern->ipfw.fw_flg |= IP_FW_F_WILDIF;
1354 fwkern->next = NULL;
1355 return fwkern;
1358 int ip_fw_ctl(int cmd, void *m, int len)
1360 int ret;
1361 struct ip_chain *chain;
1362 unsigned long flags;
1364 FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags);
1366 switch (cmd) {
1367 case IP_FW_FLUSH:
1368 if (len != sizeof(ip_chainlabel) || !check_label(m))
1369 ret = EINVAL;
1370 else if ((chain = find_label(m)) == NULL)
1371 ret = ENOENT;
1372 else ret = clear_fw_chain(chain);
1373 break;
1375 case IP_FW_ZERO:
1376 if (len != sizeof(ip_chainlabel) || !check_label(m))
1377 ret = EINVAL;
1378 else if ((chain = find_label(m)) == NULL)
1379 ret = ENOENT;
1380 else ret = zero_fw_chain(chain);
1381 break;
1383 case IP_FW_CHECK: {
1384 struct ip_fwtest *new = m;
1385 struct iphdr *ip;
1387 /* Don't need write lock. */
1388 FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags);
1390 if (len != sizeof(struct ip_fwtest) || !check_label(m))
1391 return EINVAL;
1393 /* Need readlock to do find_label */
1394 FWC_READ_LOCK(&ip_fw_lock);
1396 if ((chain = find_label(new->fwt_label)) == NULL)
1397 ret = ENOENT;
1398 else {
1399 struct sk_buff *tmp_skb;
1400 int hdrlen;
1402 hdrlen = sizeof(struct ip_fwpkt) -
1403 sizeof(struct in_addr) -
1404 IFNAMSIZ;
1406 ip = &(new->fwt_packet.fwp_iph);
1408 /* Fix this one up by hand, who knows how many
1409 * tools will break if we start to barf on this.
1411 if (ntohs(ip->tot_len) > hdrlen)
1412 ip->tot_len = htons(hdrlen);
1414 if (ip->ihl != sizeof(struct iphdr) / sizeof(u32)) {
1415 duprintf("ip_fw_ctl: ip->ihl=%d, want %d\n",
1416 ip->ihl,
1417 sizeof(struct iphdr) / sizeof(u32));
1418 ret = EINVAL;
1419 } else if ((tmp_skb = alloc_skb(hdrlen,
1420 GFP_ATOMIC)) == NULL) {
1421 duprintf("ip_fw_ctl: tmp_skb alloc failure\n");
1422 ret = EFAULT;
1423 } else {
1424 skb_reserve(tmp_skb, hdrlen);
1425 skb_push(tmp_skb, hdrlen);
1426 memcpy(tmp_skb->data, ip, hdrlen);
1427 tmp_skb->nh.raw =
1428 (unsigned char *) tmp_skb->data;
1429 ret = ip_fw_check(new->fwt_packet.fwp_vianame,
1430 NULL, chain,
1431 &tmp_skb, SLOT_NUMBER(), 1);
1432 kfree_skb(tmp_skb);
1433 switch (ret) {
1434 case FW_ACCEPT:
1435 ret = 0; break;
1436 case FW_REDIRECT:
1437 ret = ECONNABORTED; break;
1438 case FW_MASQUERADE:
1439 ret = ECONNRESET; break;
1440 case FW_REJECT:
1441 ret = ECONNREFUSED; break;
1442 /* Hack to help diag; these only get
1443 returned when testing. */
1444 case FW_SKIP+1:
1445 ret = ELOOP; break;
1446 case FW_SKIP:
1447 ret = ENFILE; break;
1448 default: /* FW_BLOCK */
1449 ret = ETIMEDOUT; break;
1453 FWC_READ_UNLOCK(&ip_fw_lock);
1454 return ret;
1457 case IP_FW_MASQ_TIMEOUTS: {
1458 ret = ip_fw_masq_timeouts(m, len);
1460 break;
1462 case IP_FW_REPLACE: {
1463 struct ip_fwkernel *ip_fwkern;
1464 struct ip_fwnew *new = m;
1466 if (len != sizeof(struct ip_fwnew)
1467 || !check_label(new->fwn_label))
1468 ret = EINVAL;
1469 else if ((chain = find_label(new->fwn_label)) == NULL)
1470 ret = ENOENT;
1471 else if ((ip_fwkern = convert_ipfw(&new->fwn_rule, &ret))
1472 != NULL)
1473 ret = replace_in_chain(chain, ip_fwkern,
1474 new->fwn_rulenum);
1476 break;
1478 case IP_FW_APPEND: {
1479 struct ip_fwchange *new = m;
1480 struct ip_fwkernel *ip_fwkern;
1482 if (len != sizeof(struct ip_fwchange)
1483 || !check_label(new->fwc_label))
1484 ret = EINVAL;
1485 else if ((chain = find_label(new->fwc_label)) == NULL)
1486 ret = ENOENT;
1487 else if ((ip_fwkern = convert_ipfw(&new->fwc_rule, &ret))
1488 != NULL)
1489 ret = append_to_chain(chain, ip_fwkern);
1491 break;
1493 case IP_FW_INSERT: {
1494 struct ip_fwkernel *ip_fwkern;
1495 struct ip_fwnew *new = m;
1497 if (len != sizeof(struct ip_fwnew)
1498 || !check_label(new->fwn_label))
1499 ret = EINVAL;
1500 else if ((chain = find_label(new->fwn_label)) == NULL)
1501 ret = ENOENT;
1502 else if ((ip_fwkern = convert_ipfw(&new->fwn_rule, &ret))
1503 != NULL)
1504 ret = insert_in_chain(chain, ip_fwkern,
1505 new->fwn_rulenum);
1507 break;
1509 case IP_FW_DELETE: {
1510 struct ip_fwchange *new = m;
1511 struct ip_fwkernel *ip_fwkern;
1513 if (len != sizeof(struct ip_fwchange)
1514 || !check_label(new->fwc_label))
1515 ret = EINVAL;
1516 else if ((chain = find_label(new->fwc_label)) == NULL)
1517 ret = ENOENT;
1518 else if ((ip_fwkern = convert_ipfw(&new->fwc_rule, &ret))
1519 != NULL) {
1520 ret = del_rule_from_chain(chain, ip_fwkern);
1521 kfree(ip_fwkern);
1524 break;
1526 case IP_FW_DELETE_NUM: {
1527 struct ip_fwdelnum *new = m;
1529 if (len != sizeof(struct ip_fwdelnum)
1530 || !check_label(new->fwd_label))
1531 ret = EINVAL;
1532 else if ((chain = find_label(new->fwd_label)) == NULL)
1533 ret = ENOENT;
1534 else ret = del_num_from_chain(chain, new->fwd_rulenum);
1536 break;
1538 case IP_FW_CREATECHAIN: {
1539 if (len != sizeof(ip_chainlabel)) {
1540 duprintf("create_chain: bad size %i\n", len);
1541 ret = EINVAL;
1543 else ret = create_chain(m);
1545 break;
1547 case IP_FW_DELETECHAIN: {
1548 if (len != sizeof(ip_chainlabel)) {
1549 duprintf("delete_chain: bad size %i\n", len);
1550 ret = EINVAL;
1552 else ret = del_chain(m);
1554 break;
1556 case IP_FW_POLICY: {
1557 struct ip_fwpolicy *new = m;
1559 if (len != sizeof(struct ip_fwpolicy)
1560 || !check_label(new->fwp_label))
1561 ret = EINVAL;
1562 else if ((chain = find_label(new->fwp_label)) == NULL)
1563 ret = ENOENT;
1564 else if (chain != IP_FW_INPUT_CHAIN
1565 && chain != IP_FW_FORWARD_CHAIN
1566 && chain != IP_FW_OUTPUT_CHAIN) {
1567 duprintf("change_policy: can't change policy on user"
1568 " defined chain.\n");
1569 ret = EINVAL;
1571 else {
1572 int pol = FW_SKIP;
1573 find_special(new->fwp_policy, &pol);
1575 switch(pol) {
1576 case FW_MASQUERADE:
1577 if (chain != IP_FW_FORWARD_CHAIN) {
1578 ret = EINVAL;
1579 break;
1581 /* Fall thru... */
1582 case FW_BLOCK:
1583 case FW_ACCEPT:
1584 case FW_REJECT:
1585 ret = change_policy(chain, pol);
1586 break;
1587 default:
1588 duprintf("change_policy: bad policy `%s'\n",
1589 new->fwp_policy);
1590 ret = EINVAL;
1593 break;
1595 default:
1596 duprintf("ip_fw_ctl: unknown request %d\n",cmd);
1597 ret = ENOPROTOOPT;
1600 FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags);
1601 return ret;
1604 /* Returns bytes used - doesn't NUL terminate */
1605 static int dump_rule(char *buffer,
1606 const char *chainlabel,
1607 const struct ip_fwkernel *rule)
1609 int len;
1610 unsigned int i;
1611 __u64 packets = 0, bytes = 0;
1613 FWC_HAVE_LOCK(fwc_wlocks);
1614 for (i = 0; i < NUM_SLOTS; i++) {
1615 packets += rule->counters[i].pcnt;
1616 bytes += rule->counters[i].bcnt;
1619 len=sprintf(buffer,
1620 "%9s " /* Chain name */
1621 "%08X/%08X->%08X/%08X " /* Source & Destination IPs */
1622 "%.16s " /* Interface */
1623 "%X %X " /* fw_flg and fw_invflg fields */
1624 "%u " /* Protocol */
1625 "%-9u %-9u %-9u %-9u " /* Packet & byte counters */
1626 "%u-%u %u-%u " /* Source & Dest port ranges */
1627 "A%02X X%02X " /* TOS and and xor masks */
1628 "%08X " /* Redirection port */
1629 "%u " /* fw_mark field */
1630 "%u " /* output size */
1631 "%9s\n", /* Target */
1632 chainlabel,
1633 ntohl(rule->ipfw.fw_src.s_addr),
1634 ntohl(rule->ipfw.fw_smsk.s_addr),
1635 ntohl(rule->ipfw.fw_dst.s_addr),
1636 ntohl(rule->ipfw.fw_dmsk.s_addr),
1637 (rule->ipfw.fw_vianame)[0] ? rule->ipfw.fw_vianame : "-",
1638 rule->ipfw.fw_flg,
1639 rule->ipfw.fw_invflg,
1640 rule->ipfw.fw_proto,
1641 (__u32)(packets >> 32), (__u32)packets,
1642 (__u32)(bytes >> 32), (__u32)bytes,
1643 rule->ipfw.fw_spts[0], rule->ipfw.fw_spts[1],
1644 rule->ipfw.fw_dpts[0], rule->ipfw.fw_dpts[1],
1645 rule->ipfw.fw_tosand, rule->ipfw.fw_tosxor,
1646 rule->ipfw.fw_redirpt,
1647 rule->ipfw.fw_mark,
1648 rule->ipfw.fw_outputsize,
1649 branchname(rule->branch,rule->simplebranch));
1651 duprintf("dump_rule: %i bytes done.\n", len);
1652 return len;
1655 /* File offset is actually in records, not bytes. */
1656 static int ip_chain_procinfo(char *buffer, char **start,
1657 off_t offset, int length)
1659 struct ip_chain *i;
1660 struct ip_fwkernel *j = ip_fw_chains->chain;
1661 unsigned long flags;
1662 int len = 0;
1663 int last_len = 0;
1664 off_t upto = 0;
1666 duprintf("Offset starts at %lu\n", offset);
1667 duprintf("ip_fw_chains is 0x%0lX\n", (unsigned long int)ip_fw_chains);
1669 /* Need a write lock to lock out ``readers'' which update counters. */
1670 FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags);
1672 for (i = ip_fw_chains; i; i = i->next) {
1673 for (j = i->chain; j; j = j->next) {
1674 if (upto == offset) break;
1675 duprintf("Skipping rule in chain `%s'\n",
1676 i->label);
1677 upto++;
1679 if (upto == offset) break;
1682 /* Don't init j first time, or once i = NULL */
1683 for (; i; (void)((i = i->next) && (j = i->chain))) {
1684 duprintf("Dumping chain `%s'\n", i->label);
1685 for (; j; j = j->next, upto++, last_len = len)
1687 len += dump_rule(buffer+len, i->label, j);
1688 if (len > length) {
1689 duprintf("Dumped to %i (past %i). "
1690 "Moving back to %i.\n",
1691 len, length, last_len);
1692 len = last_len;
1693 goto outside;
1697 outside:
1698 FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags);
1699 buffer[len] = '\0';
1701 duprintf("ip_chain_procinfo: Length = %i (of %i). Offset = %li.\n",
1702 len, length, upto);
1703 /* `start' hack - see fs/proc/generic.c line ~165 */
1704 *start=(char *)((unsigned int)upto-offset);
1705 return len;
1708 static int ip_chain_name_procinfo(char *buffer, char **start,
1709 off_t offset, int length)
1711 struct ip_chain *i;
1712 int len = 0,last_len = 0;
1713 off_t pos = 0,begin = 0;
1714 unsigned long flags;
1716 /* Need a write lock to lock out ``readers'' which update counters. */
1717 FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags);
1719 for (i = ip_fw_chains; i; i = i->next)
1721 unsigned int j;
1722 __u32 packetsHi = 0, packetsLo = 0, bytesHi = 0, bytesLo = 0;
1724 for (j = 0; j < NUM_SLOTS; j++) {
1725 packetsLo += i->reent[j].counters.pcnt & 0xFFFFFFFF;
1726 packetsHi += ((i->reent[j].counters.pcnt >> 32)
1727 & 0xFFFFFFFF);
1728 bytesLo += i->reent[j].counters.bcnt & 0xFFFFFFFF;
1729 bytesHi += ((i->reent[j].counters.bcnt >> 32)
1730 & 0xFFFFFFFF);
1733 /* print the label and the policy */
1734 len+=sprintf(buffer+len,"%s %s %i %u %u %u %u\n",
1735 i->label,branchname(NULL, i->policy),i->refcount,
1736 packetsHi, packetsLo, bytesHi, bytesLo);
1737 pos=begin+len;
1738 if(pos<offset) {
1739 len=0;
1740 begin=pos;
1742 else if(pos>offset+length) {
1743 len = last_len;
1744 break;
1747 last_len = len;
1749 FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags);
1751 *start = buffer+(offset-begin);
1752 len-=(offset-begin);
1753 if(len>length)
1754 len=length;
1755 return len;
1759 * Interface to the generic firewall chains.
1761 int ipfw_input_check(struct firewall_ops *this, int pf,
1762 struct net_device *dev, void *arg,
1763 struct sk_buff **pskb)
1765 return ip_fw_check(dev->name,
1766 arg, IP_FW_INPUT_CHAIN, pskb, SLOT_NUMBER(), 0);
1769 int ipfw_output_check(struct firewall_ops *this, int pf,
1770 struct net_device *dev, void *arg,
1771 struct sk_buff **pskb)
1773 /* Locally generated bogus packets by root. <SIGH>. */
1774 if ((*pskb)->len < sizeof(struct iphdr) ||
1775 (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
1776 return FW_ACCEPT;
1777 return ip_fw_check(dev->name,
1778 arg, IP_FW_OUTPUT_CHAIN, pskb, SLOT_NUMBER(), 0);
1781 int ipfw_forward_check(struct firewall_ops *this, int pf,
1782 struct net_device *dev, void *arg,
1783 struct sk_buff **pskb)
1785 return ip_fw_check(dev->name,
1786 arg, IP_FW_FORWARD_CHAIN, pskb, SLOT_NUMBER(), 0);
1789 struct firewall_ops ipfw_ops = {
1790 .fw_forward = ipfw_forward_check,
1791 .fw_input = ipfw_input_check,
1792 .fw_output = ipfw_output_check,
1795 int ipfw_init_or_cleanup(int init)
1797 struct proc_dir_entry *proc;
1798 int ret = 0;
1799 unsigned long flags;
1801 if (!init) goto cleanup;
1803 #ifdef DEBUG_IP_FIREWALL_LOCKING
1804 fwc_wlocks = fwc_rlocks = 0;
1805 #endif
1807 #if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE)
1808 ipfwsk = netlink_kernel_create(NETLINK_FIREWALL, NULL);
1809 if (ipfwsk == NULL)
1810 goto cleanup_nothing;
1811 #endif
1813 ret = register_firewall(PF_INET, &ipfw_ops);
1814 if (ret < 0)
1815 goto cleanup_netlink;
1817 proc = proc_net_create(IP_FW_PROC_CHAINS, S_IFREG | S_IRUSR | S_IWUSR,
1818 ip_chain_procinfo);
1819 if (proc) proc->owner = THIS_MODULE;
1820 proc = proc_net_create(IP_FW_PROC_CHAIN_NAMES,
1821 S_IFREG | S_IRUSR | S_IWUSR,
1822 ip_chain_name_procinfo);
1823 if (proc) proc->owner = THIS_MODULE;
1825 IP_FW_INPUT_CHAIN = ip_init_chain(IP_FW_LABEL_INPUT, 1, FW_ACCEPT);
1826 IP_FW_FORWARD_CHAIN = ip_init_chain(IP_FW_LABEL_FORWARD, 1, FW_ACCEPT);
1827 IP_FW_OUTPUT_CHAIN = ip_init_chain(IP_FW_LABEL_OUTPUT, 1, FW_ACCEPT);
1829 return ret;
1831 cleanup:
1832 unregister_firewall(PF_INET, &ipfw_ops);
1834 FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags);
1835 while (ip_fw_chains) {
1836 struct ip_chain *next = ip_fw_chains->next;
1838 clear_fw_chain(ip_fw_chains);
1839 kfree(ip_fw_chains);
1840 ip_fw_chains = next;
1842 FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags);
1844 proc_net_remove(IP_FW_PROC_CHAINS);
1845 proc_net_remove(IP_FW_PROC_CHAIN_NAMES);
1847 cleanup_netlink:
1848 #if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE)
1849 sock_release(ipfwsk->sk_socket);
1851 cleanup_nothing:
1852 #endif
1853 return ret;