pre-2.3.4..
[davej-history.git] / net / ipv4 / ip_fw.c
blobf3dbafc04f0ab89463bce28a45bb9277493d8f59
1 /*
2 * This code is heavily based on the code on the old ip_fw.c code; see below for
3 * copyrights and attributions of the old code. This code is basically GPL.
5 * 15-Aug-1997: Major changes to allow graphs for firewall rules.
6 * Paul Russell <Paul.Russell@rustcorp.com.au> and
7 * Michael Neuling <Michael.Neuling@rustcorp.com.au>
8 * 24-Aug-1997: Generalised protocol handling (not just TCP/UDP/ICMP).
9 * Added explicit RETURN from chains.
10 * Removed TOS mangling (done in ipchains 1.0.1).
11 * Fixed read & reset bug by reworking proc handling.
12 * Paul Russell <Paul.Russell@rustcorp.com.au>
13 * 28-Sep-1997: Added packet marking for net sched code.
14 * Removed fw_via comparisons: all done on device name now,
15 * similar to changes in ip_fw.c in DaveM's CVS970924 tree.
16 * Paul Russell <Paul.Russell@rustcorp.com.au>
17 * 2-Nov-1997: Moved types across to __u16, etc.
18 * Added inverse flags.
19 * Fixed fragment bug (in args to port_match).
20 * Changed mark to only one flag (MARKABS).
21 * 21-Nov-1997: Added ability to test ICMP code.
22 * 19-Jan-1998: Added wildcard interfaces.
23 * 6-Feb-1998: Merged 2.0 and 2.1 versions.
24 * Initialised ip_masq for 2.0.x version.
25 * Added explicit NETLINK option for 2.1.x version.
26 * Added packet and byte counters for policy matches.
27 * 26-Feb-1998: Fixed race conditions, added SMP support.
28 * 18-Mar-1998: Fix SMP, fix race condition fix.
29 * 1-May-1998: Remove caching of device pointer.
30 * 12-May-1998: Allow tiny fragment case for TCP/UDP.
31 * 15-May-1998: Treat short packets as fragments, don't just block.
32 * 3-Jan-1999: Fixed serious procfs security hole -- users should never
33 * be allowed to view the chains!
34 * Marc Santoro <ultima@snicker.emoti.com>
35 * 29-Jan-1999: Locally generated bogus IPs dealt with, rather than crash
36 * during dump_packet. --RR.
41 * The origina Linux port was done Alan Cox, with changes/fixes from
42 * Pauline Middlelink, Jos Vos, Thomas Quinot, Wouter Gadeyne, Juan
43 * Jose Ciarlante, Bernd Eckenfels, Keith Owens and others.
45 * Copyright from the original FreeBSD version follows:
47 * Copyright (c) 1993 Daniel Boulet
48 * Copyright (c) 1994 Ugen J.S.Antsilevich
50 * Redistribution and use in source forms, with and without modification,
51 * are permitted provided that this entire comment appears intact.
53 * Redistribution in binary form may occur without any restrictions.
54 * Obviously, it would be nice if you gave credit where credit is due
55 * but requiring it would be too onerous.
57 * This software is provided ``AS IS'' without any warranties of any kind. */
60 #include <linux/config.h>
62 #include <asm/uaccess.h>
63 #include <asm/system.h>
64 #include <linux/types.h>
65 #include <linux/sched.h>
66 #include <linux/string.h>
67 #include <linux/errno.h>
69 #include <linux/socket.h>
70 #include <linux/sockios.h>
71 #include <linux/in.h>
72 #include <linux/inet.h>
73 #include <linux/netdevice.h>
74 #include <linux/icmp.h>
75 #include <linux/udp.h>
76 #include <net/ip.h>
77 #include <net/protocol.h>
78 #include <net/route.h>
79 #include <net/tcp.h>
80 #include <net/udp.h>
81 #include <net/sock.h>
82 #include <net/icmp.h>
83 #include <linux/netlink.h>
84 #include <linux/init.h>
85 #include <linux/firewall.h>
86 #include <linux/ip_fw.h>
88 #ifdef CONFIG_IP_MASQUERADE
89 #include <net/ip_masq.h>
90 #endif
92 #include <net/checksum.h>
93 #include <linux/proc_fs.h>
94 #include <linux/stat.h>
96 /* Understanding locking in this code: (thanks to Alan Cox for using
97 * little words to explain this to me). -- PR
99 * In UP, there can be two packets traversing the chains:
100 * 1) A packet from the current userspace context
101 * 2) A packet off the bh handlers (timer or net).
103 * For SMP (kernel v2.1+), multiply this by # CPUs.
105 * [Note that this in not correct for 2.2 - because the socket code always
106 * uses lock_kernel() to serialize, and bottom halves (timers and net_bhs)
107 * only run on one CPU at a time. This will probably change for 2.3.
108 * It is still good to use spinlocks because that avoids the global cli()
109 * for updating the tables, which is rather costly in SMP kernels -AK]
111 * This means counters and backchains can get corrupted if no precautions
112 * are taken.
114 * To actually alter a chain on UP, we need only do a cli(), as this will
115 * stop a bh handler firing, as we are in the current userspace context
116 * (coming from a setsockopt()).
118 * On SMP, we need a write_lock_irqsave(), which is a simple cli() in
119 * UP.
121 * For backchains and counters, we use an array, indexed by
122 * [cpu_number_map[smp_processor_id()]*2 + !in_interrupt()]; the array is of
123 * size [smp_num_cpus*2]. For v2.0, smp_num_cpus is effectively 1. So,
124 * confident of uniqueness, we modify counters even though we only
125 * have a read lock (to read the counters, you need a write lock,
126 * though). */
128 /* Why I didn't use straight locking... -- PR
130 * The backchains can be separated out of the ip_chains structure, and
131 * allocated as needed inside ip_fw_check().
133 * The counters, however, can't. Trying to lock these means blocking
134 * interrupts every time we want to access them. This would suck HARD
135 * performance-wise. Not locking them leads to possible corruption,
136 * made worse on 32-bit machines (counters are 64-bit). */
138 /*#define DEBUG_IP_FIREWALL*/
139 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
140 /*#define DEBUG_IP_FIREWALL_USER*/
141 /*#define DEBUG_IP_FIREWALL_LOCKING*/
143 #ifdef CONFIG_IP_FIREWALL_NETLINK
144 static struct sock *ipfwsk;
145 #endif
147 #ifdef __SMP__
148 #define SLOT_NUMBER() (cpu_number_map[smp_processor_id()]*2 + !in_interrupt())
149 #else
150 #define SLOT_NUMBER() (!in_interrupt())
151 #endif
152 #define NUM_SLOTS (smp_num_cpus*2)
154 #define SIZEOF_STRUCT_IP_CHAIN (sizeof(struct ip_chain) \
155 + NUM_SLOTS*sizeof(struct ip_reent))
156 #define SIZEOF_STRUCT_IP_FW_KERNEL (sizeof(struct ip_fwkernel) \
157 + NUM_SLOTS*sizeof(struct ip_counters))
159 #ifdef DEBUG_IP_FIREWALL_LOCKING
160 static unsigned int fwc_rlocks, fwc_wlocks;
161 #define FWC_DEBUG_LOCK(d) \
162 do { \
163 FWC_DONT_HAVE_LOCK(d); \
164 d |= (1 << SLOT_NUMBER()); \
165 } while (0)
167 #define FWC_DEBUG_UNLOCK(d) \
168 do { \
169 FWC_HAVE_LOCK(d); \
170 d &= ~(1 << SLOT_NUMBER()); \
171 } while (0)
173 #define FWC_DONT_HAVE_LOCK(d) \
174 do { \
175 if ((d) & (1 << SLOT_NUMBER())) \
176 printk("%s:%i: Got lock on %i already!\n", \
177 __FILE__, __LINE__, SLOT_NUMBER()); \
178 } while(0)
180 #define FWC_HAVE_LOCK(d) \
181 do { \
182 if (!((d) & (1 << SLOT_NUMBER()))) \
183 printk("%s:%i:No lock on %i!\n", \
184 __FILE__, __LINE__, SLOT_NUMBER()); \
185 } while (0)
187 #else
188 #define FWC_DEBUG_LOCK(d) do { } while(0)
189 #define FWC_DEBUG_UNLOCK(d) do { } while(0)
190 #define FWC_DONT_HAVE_LOCK(d) do { } while(0)
191 #define FWC_HAVE_LOCK(d) do { } while(0)
192 #endif /*DEBUG_IP_FIRWALL_LOCKING*/
194 #define FWC_READ_LOCK(l) do { FWC_DEBUG_LOCK(fwc_rlocks); read_lock(l); } while (0)
195 #define FWC_WRITE_LOCK(l) do { FWC_DEBUG_LOCK(fwc_wlocks); write_lock(l); } while (0)
196 #define FWC_READ_LOCK_IRQ(l,f) do { FWC_DEBUG_LOCK(fwc_rlocks); read_lock_irqsave(l,f); } while (0)
197 #define FWC_WRITE_LOCK_IRQ(l,f) do { FWC_DEBUG_LOCK(fwc_wlocks); write_lock_irqsave(l,f); } while (0)
198 #define FWC_READ_UNLOCK(l) do { FWC_DEBUG_UNLOCK(fwc_rlocks); read_unlock(l); } while (0)
199 #define FWC_WRITE_UNLOCK(l) do { FWC_DEBUG_UNLOCK(fwc_wlocks); write_unlock(l); } while (0)
200 #define FWC_READ_UNLOCK_IRQ(l,f) do { FWC_DEBUG_UNLOCK(fwc_rlocks); read_unlock_irqrestore(l,f); } while (0)
201 #define FWC_WRITE_UNLOCK_IRQ(l,f) do { FWC_DEBUG_UNLOCK(fwc_wlocks); write_unlock_irqrestore(l,f); } while (0)
203 struct ip_chain;
205 struct ip_counters
207 __u64 pcnt, bcnt; /* Packet and byte counters */
210 struct ip_fwkernel
212 struct ip_fw ipfw;
213 struct ip_fwkernel *next; /* where to go next if current
214 * rule doesn't match */
215 struct ip_chain *branch; /* which branch to jump to if
216 * current rule matches */
217 int simplebranch; /* Use this if branch == NULL */
218 struct ip_counters counters[0]; /* Actually several of these */
221 struct ip_reent
223 struct ip_chain *prevchain; /* Pointer to referencing chain */
224 struct ip_fwkernel *prevrule; /* Pointer to referencing rule */
225 struct ip_counters counters;
228 struct ip_chain
230 ip_chainlabel label; /* Defines the label for each block */
231 struct ip_chain *next; /* Pointer to next block */
232 struct ip_fwkernel *chain; /* Pointer to first rule in block */
233 __u32 refcount; /* Number of refernces to block */
234 int policy; /* Default rule for chain. Only *
235 * used in built in chains */
236 struct ip_reent reent[0]; /* Actually several of these */
240 * Implement IP packet firewall
243 #ifdef DEBUG_IP_FIREWALL
244 #define dprintf(format, args...) printk(format , ## args)
245 #else
246 #define dprintf(format, args...)
247 #endif
249 #ifdef DEBUG_IP_FIREWALL_USER
250 #define duprintf(format, args...) printk(format , ## args)
251 #else
252 #define duprintf(format, args...)
253 #endif
255 /* Lock around ip_fw_chains linked list structure */
256 rwlock_t ip_fw_lock = RW_LOCK_UNLOCKED;
258 /* Head of linked list of fw rules */
259 static struct ip_chain *ip_fw_chains;
261 #define IP_FW_INPUT_CHAIN ip_fw_chains
262 #define IP_FW_FORWARD_CHAIN (ip_fw_chains->next)
263 #define IP_FW_OUTPUT_CHAIN (ip_fw_chains->next->next)
265 /* Returns 1 if the port is matched by the range, 0 otherwise */
266 extern inline int port_match(__u16 min, __u16 max, __u16 port,
267 int frag, int invert)
269 if (frag) /* Fragments fail ANY port test. */
270 return (min == 0 && max == 0xFFFF);
271 else return (port >= min && port <= max) ^ invert;
274 /* Returns whether matches rule or not. */
275 static int ip_rule_match(struct ip_fwkernel *f,
276 const char *ifname,
277 struct iphdr *ip,
278 char tcpsyn,
279 __u16 src_port, __u16 dst_port,
280 char isfrag)
282 #define FWINV(bool,invflg) ((bool) ^ !!(f->ipfw.fw_invflg & invflg))
284 * This is a bit simpler as we don't have to walk
285 * an interface chain as you do in BSD - same logic
286 * however.
289 if (FWINV((ip->saddr&f->ipfw.fw_smsk.s_addr) != f->ipfw.fw_src.s_addr,
290 IP_FW_INV_SRCIP)
291 || FWINV((ip->daddr&f->ipfw.fw_dmsk.s_addr)!=f->ipfw.fw_dst.s_addr,
292 IP_FW_INV_DSTIP)) {
293 dprintf("Source or dest mismatch.\n");
295 dprintf("SRC: %u. Mask: %u. Target: %u.%s\n", ip->saddr,
296 f->ipfw.fw_smsk.s_addr, f->ipfw.fw_src.s_addr,
297 f->ipfw.fw_invflg & IP_FW_INV_SRCIP ? " (INV)" : "");
298 dprintf("DST: %u. Mask: %u. Target: %u.%s\n", ip->daddr,
299 f->ipfw.fw_dmsk.s_addr, f->ipfw.fw_dst.s_addr,
300 f->ipfw.fw_invflg & IP_FW_INV_DSTIP ? " (INV)" : "");
301 return 0;
305 * Look for a VIA device match
307 if (f->ipfw.fw_flg & IP_FW_F_WILDIF) {
308 if (FWINV(strncmp(ifname, f->ipfw.fw_vianame,
309 strlen(f->ipfw.fw_vianame)) != 0,
310 IP_FW_INV_VIA)) {
311 dprintf("Wildcard interface mismatch.%s\n",
312 f->ipfw.fw_invflg & IP_FW_INV_VIA ? " (INV)" : "");
313 return 0; /* Mismatch */
316 else if (FWINV(strcmp(ifname, f->ipfw.fw_vianame) != 0,
317 IP_FW_INV_VIA)) {
318 dprintf("Interface name does not match.%s\n",
319 f->ipfw.fw_invflg & IP_FW_INV_VIA
320 ? " (INV)" : "");
321 return 0; /* Mismatch */
325 * Ok the chain addresses match.
328 /* If we have a fragment rule but the packet is not a fragment
329 * the we return zero */
330 if (FWINV((f->ipfw.fw_flg&IP_FW_F_FRAG) && !isfrag, IP_FW_INV_FRAG)) {
331 dprintf("Fragment rule but not fragment.%s\n",
332 f->ipfw.fw_invflg & IP_FW_INV_FRAG ? " (INV)" : "");
333 return 0;
336 /* Fragment NEVER passes a SYN test, even an inverted one. */
337 if (FWINV((f->ipfw.fw_flg&IP_FW_F_TCPSYN) && !tcpsyn, IP_FW_INV_SYN)
338 || (isfrag && (f->ipfw.fw_flg&IP_FW_F_TCPSYN))) {
339 dprintf("Rule requires SYN and packet has no SYN.%s\n",
340 f->ipfw.fw_invflg & IP_FW_INV_SYN ? " (INV)" : "");
341 return 0;
344 if (f->ipfw.fw_proto) {
346 * Specific firewall - packet's protocol
347 * must match firewall's.
350 if (FWINV(ip->protocol!=f->ipfw.fw_proto, IP_FW_INV_PROTO)) {
351 dprintf("Packet protocol %hi does not match %hi.%s\n",
352 ip->protocol, f->ipfw.fw_proto,
353 f->ipfw.fw_invflg&IP_FW_INV_PROTO ? " (INV)":"");
354 return 0;
357 /* For non TCP/UDP/ICMP, port range is max anyway. */
358 if (!port_match(f->ipfw.fw_spts[0],
359 f->ipfw.fw_spts[1],
360 src_port, isfrag,
361 !!(f->ipfw.fw_invflg&IP_FW_INV_SRCPT))
362 || !port_match(f->ipfw.fw_dpts[0],
363 f->ipfw.fw_dpts[1],
364 dst_port, isfrag,
365 !!(f->ipfw.fw_invflg
366 &IP_FW_INV_DSTPT))) {
367 dprintf("Port match failed.\n");
368 return 0;
372 dprintf("Match succeeded.\n");
373 return 1;
376 static const char *branchname(struct ip_chain *branch,int simplebranch)
378 if (branch)
379 return branch->label;
380 switch (simplebranch)
382 case FW_BLOCK: return IP_FW_LABEL_BLOCK;
383 case FW_ACCEPT: return IP_FW_LABEL_ACCEPT;
384 case FW_REJECT: return IP_FW_LABEL_REJECT;
385 case FW_REDIRECT: return IP_FW_LABEL_REDIRECT;
386 case FW_MASQUERADE: return IP_FW_LABEL_MASQUERADE;
387 case FW_SKIP: return "-";
388 case FW_SKIP+1: return IP_FW_LABEL_RETURN;
389 default:
390 return "UNKNOWN";
395 * VERY ugly piece of code which actually
396 * makes kernel printf for matching packets...
398 static void dump_packet(const struct iphdr *ip,
399 const char *ifname,
400 struct ip_fwkernel *f,
401 const ip_chainlabel chainlabel,
402 __u16 src_port,
403 __u16 dst_port)
405 __u32 *opt = (__u32 *) (ip + 1);
406 int opti;
408 if (f)
410 printk(KERN_INFO "Packet log: %s ",chainlabel);
412 printk("%s ",branchname(f->branch,f->simplebranch));
413 if (f->simplebranch==FW_REDIRECT)
414 printk("%d ",f->ipfw.fw_redirpt);
417 printk("%s PROTO=%d %ld.%ld.%ld.%ld:%hu %ld.%ld.%ld.%ld:%hu"
418 " L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu",
419 ifname, ip->protocol,
420 (ntohl(ip->saddr)>>24)&0xFF,
421 (ntohl(ip->saddr)>>16)&0xFF,
422 (ntohl(ip->saddr)>>8)&0xFF,
423 (ntohl(ip->saddr))&0xFF,
424 src_port,
425 (ntohl(ip->daddr)>>24)&0xFF,
426 (ntohl(ip->daddr)>>16)&0xFF,
427 (ntohl(ip->daddr)>>8)&0xFF,
428 (ntohl(ip->daddr))&0xFF,
429 dst_port,
430 ntohs(ip->tot_len), ip->tos, ntohs(ip->id),
431 ntohs(ip->frag_off), ip->ttl);
433 for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++)
434 printk(" O=0x%8.8X", *opt++);
435 printk("\n");
438 /* function for checking chain labels for user space. */
439 static int check_label(ip_chainlabel label)
441 unsigned int i;
442 /* strlen must be < IP_FW_MAX_LABEL_LENGTH. */
443 for (i = 0; i < IP_FW_MAX_LABEL_LENGTH + 1; i++)
444 if (label[i] == '\0') return 1;
446 return 0;
449 /* This function returns a pointer to the first chain with a label
450 * that matches the one given. */
451 static struct ip_chain *find_label(ip_chainlabel label)
453 struct ip_chain *tmp;
454 FWC_HAVE_LOCK(fwc_rlocks | fwc_wlocks);
455 for (tmp = ip_fw_chains; tmp; tmp = tmp->next)
456 if (strcmp(tmp->label,label) == 0)
457 break;
458 return tmp;
461 /* This function returns a boolean which when true sets answer to one
462 of the FW_*. */
463 static int find_special(ip_chainlabel label, int *answer)
465 if (label[0] == '\0') {
466 *answer = FW_SKIP; /* => pass-through rule */
467 return 1;
468 } else if (strcmp(label,IP_FW_LABEL_ACCEPT) == 0) {
469 *answer = FW_ACCEPT;
470 return 1;
471 } else if (strcmp(label,IP_FW_LABEL_BLOCK) == 0) {
472 *answer = FW_BLOCK;
473 return 1;
474 } else if (strcmp(label,IP_FW_LABEL_REJECT) == 0) {
475 *answer = FW_REJECT;
476 return 1;
477 #ifdef CONFIG_IP_TRANSPARENT_PROXY
478 } else if (strcmp(label,IP_FW_LABEL_REDIRECT) == 0) {
479 *answer = FW_REDIRECT;
480 return 1;
481 #endif
482 #ifdef CONFIG_IP_MASQUERADE
483 } else if (strcmp(label,IP_FW_LABEL_MASQUERADE) == 0) {
484 *answer = FW_MASQUERADE;
485 return 1;
486 #endif
487 } else if (strcmp(label, IP_FW_LABEL_RETURN) == 0) {
488 *answer = FW_SKIP+1;
489 return 1;
490 } else {
491 return 0;
495 /* This function cleans up the prevchain and prevrule. If the verbose
496 * flag is set then he names of the chains will be printed as it
497 * cleans up. */
498 static void cleanup(struct ip_chain *chain,
499 const int verbose,
500 unsigned int slot)
502 struct ip_chain *tmpchain = chain->reent[slot].prevchain;
503 if (verbose)
504 printk(KERN_ERR "Chain backtrace: ");
505 while (tmpchain) {
506 if (verbose)
507 printk("%s<-",chain->label);
508 chain->reent[slot].prevchain = NULL;
509 chain = tmpchain;
510 tmpchain = chain->reent[slot].prevchain;
512 if (verbose)
513 printk("%s\n",chain->label);
516 static inline int
517 ip_fw_domatch(struct ip_fwkernel *f,
518 struct iphdr *ip,
519 const char *rif,
520 const ip_chainlabel label,
521 struct sk_buff *skb,
522 unsigned int slot,
523 __u16 src_port, __u16 dst_port)
525 f->counters[slot].bcnt+=ntohs(ip->tot_len);
526 f->counters[slot].pcnt++;
527 if (f->ipfw.fw_flg & IP_FW_F_PRN) {
528 dump_packet(ip,rif,f,label,src_port,dst_port);
530 ip->tos = (ip->tos & f->ipfw.fw_tosand) ^ f->ipfw.fw_tosxor;
532 /* This functionality is useless in stock 2.0.x series, but we don't
533 * discard the mark thing altogether, to avoid breaking ipchains (and,
534 * more importantly, the ipfwadm wrapper) --PR */
535 if (f->ipfw.fw_flg & IP_FW_F_MARKABS)
536 skb->fwmark = f->ipfw.fw_mark;
537 else
538 skb->fwmark+=f->ipfw.fw_mark;
539 #ifdef CONFIG_IP_FIREWALL_NETLINK
540 if (f->ipfw.fw_flg & IP_FW_F_NETLINK) {
541 size_t len = min(f->ipfw.fw_outputsize, ntohs(ip->tot_len))
542 + sizeof(__u32) + sizeof(skb->fwmark) + IFNAMSIZ;
543 struct sk_buff *outskb=alloc_skb(len, GFP_ATOMIC);
545 duprintf("Sending packet out NETLINK (length = %u).\n",
546 (unsigned int)len);
547 if (outskb) {
548 /* Prepend length, mark & interface */
549 skb_put(outskb, len);
550 *((__u32 *)outskb->data) = (__u32)len;
551 *((__u32 *)(outskb->data+sizeof(__u32))) = skb->fwmark;
552 strcpy(outskb->data+sizeof(__u32)*2, rif);
553 memcpy(outskb->data+sizeof(__u32)*2+IFNAMSIZ, ip,
554 len-(sizeof(__u32)*2+IFNAMSIZ));
555 netlink_broadcast(ipfwsk, outskb, 0, ~0, GFP_KERNEL);
557 else {
558 if (net_ratelimit())
559 printk(KERN_WARNING "ip_fw: packet drop due to "
560 "netlink failure\n");
561 return 0;
564 #endif
565 return 1;
569 * Returns one of the generic firewall policies, like FW_ACCEPT.
571 * The testing is either false for normal firewall mode or true for
572 * user checking mode (counters are not updated, TOS & mark not done).
574 static int
575 ip_fw_check(struct iphdr *ip,
576 const char *rif,
577 __u16 *redirport,
578 struct ip_chain *chain,
579 struct sk_buff *skb,
580 unsigned int slot,
581 int testing)
583 struct tcphdr *tcp=(struct tcphdr *)((__u32 *)ip+ip->ihl);
584 struct udphdr *udp=(struct udphdr *)((__u32 *)ip+ip->ihl);
585 struct icmphdr *icmp=(struct icmphdr *)((__u32 *)ip+ip->ihl);
586 __u32 src, dst;
587 __u16 src_port = 0xFFFF, dst_port = 0xFFFF;
588 char tcpsyn=0;
589 __u16 offset;
590 unsigned char oldtos;
591 struct ip_fwkernel *f;
592 int ret = FW_SKIP+2;
594 /* We handle fragments by dealing with the first fragment as
595 * if it was a normal packet. All other fragments are treated
596 * normally, except that they will NEVER match rules that ask
597 * things we don't know, ie. tcp syn flag or ports). If the
598 * rule is also a fragment-specific rule, non-fragments won't
599 * match it. */
601 offset = ntohs(ip->frag_off) & IP_OFFSET;
604 * Don't allow a fragment of TCP 8 bytes in. Nobody
605 * normal causes this. Its a cracker trying to break
606 * in by doing a flag overwrite to pass the direction
607 * checks.
610 if (offset == 1 && ip->protocol == IPPROTO_TCP) {
611 if (!testing && net_ratelimit()) {
612 printk("Suspect TCP fragment.\n");
613 dump_packet(ip,rif,NULL,NULL,0,0);
615 return FW_BLOCK;
618 /* If we can't investigate ports, treat as fragment. It's
619 * either a trucated whole packet, or a truncated first
620 * fragment, or a TCP first fragment of length 8-15, in which
621 * case the above rule stops reassembly.
623 if (offset == 0) {
624 unsigned int size_req;
625 switch (ip->protocol) {
626 case IPPROTO_TCP:
627 /* Don't care about things past flags word */
628 size_req = 16;
629 break;
631 case IPPROTO_UDP:
632 case IPPROTO_ICMP:
633 size_req = 8;
634 break;
636 default:
637 size_req = 0;
639 offset = (ntohs(ip->tot_len) < (ip->ihl<<2)+size_req);
642 src = ip->saddr;
643 dst = ip->daddr;
644 oldtos = ip->tos;
647 * If we got interface from which packet came
648 * we can use the address directly. Linux 2.1 now uses address
649 * chains per device too, but unlike BSD we first check if the
650 * incoming packet matches a device address and the routing
651 * table before calling the firewall.
654 dprintf("Packet ");
655 switch(ip->protocol)
657 case IPPROTO_TCP:
658 dprintf("TCP ");
659 if (!offset) {
660 src_port=ntohs(tcp->source);
661 dst_port=ntohs(tcp->dest);
663 /* Connection initilisation can only
664 * be made when the syn bit is set and
665 * neither of the ack or reset is
666 * set. */
667 if(tcp->syn && !(tcp->ack || tcp->rst))
668 tcpsyn=1;
670 break;
671 case IPPROTO_UDP:
672 dprintf("UDP ");
673 if (!offset) {
674 src_port=ntohs(udp->source);
675 dst_port=ntohs(udp->dest);
677 break;
678 case IPPROTO_ICMP:
679 if (!offset) {
680 src_port=(__u16)icmp->type;
681 dst_port=(__u16)icmp->code;
683 dprintf("ICMP ");
684 break;
685 default:
686 dprintf("p=%d ",ip->protocol);
687 break;
689 #ifdef DEBUG_IP_FIREWALL
690 print_ip(ip->saddr);
692 if (offset)
693 dprintf(":fragment (%i) ", ((int)offset)<<2);
694 else if (ip->protocol==IPPROTO_TCP || ip->protocol==IPPROTO_UDP
695 || ip->protocol==IPPROTO_ICMP)
696 dprintf(":%hu:%hu", src_port, dst_port);
697 dprintf("\n");
698 #endif
700 if (!testing) FWC_READ_LOCK(&ip_fw_lock);
701 else FWC_HAVE_LOCK(fwc_rlocks);
703 f = chain->chain;
704 do {
705 for (; f; f = f->next) {
706 if (ip_rule_match(f,rif,ip,
707 tcpsyn,src_port,dst_port,offset)) {
708 if (!testing
709 && !ip_fw_domatch(f, ip, rif, chain->label,
710 skb, slot,
711 src_port, dst_port)) {
712 ret = FW_BLOCK;
713 goto out;
715 break;
718 if (f) {
719 if (f->branch) {
720 /* Do sanity check to see if we have
721 * already set prevchain and if so we
722 * must be in a loop */
723 if (f->branch->reent[slot].prevchain) {
724 if (!testing) {
725 printk(KERN_ERR
726 "IP firewall: "
727 "Loop detected "
728 "at `%s'.\n",
729 f->branch->label);
730 cleanup(chain, 1, slot);
731 ret = FW_BLOCK;
732 } else {
733 cleanup(chain, 0, slot);
734 ret = FW_SKIP+1;
737 else {
738 f->branch->reent[slot].prevchain
739 = chain;
740 f->branch->reent[slot].prevrule
741 = f->next;
742 chain = f->branch;
743 f = chain->chain;
746 else if (f->simplebranch == FW_SKIP)
747 f = f->next;
748 else if (f->simplebranch == FW_SKIP+1) {
749 /* Just like falling off the chain */
750 goto fall_off_chain;
752 else {
753 cleanup(chain, 0, slot);
754 ret = f->simplebranch;
756 } /* f == NULL */
757 else {
758 fall_off_chain:
759 if (chain->reent[slot].prevchain) {
760 struct ip_chain *tmp = chain;
761 f = chain->reent[slot].prevrule;
762 chain = chain->reent[slot].prevchain;
763 tmp->reent[slot].prevchain = NULL;
765 else {
766 ret = chain->policy;
767 if (!testing) {
768 chain->reent[slot].counters.pcnt++;
769 chain->reent[slot].counters.bcnt
770 += ntohs(ip->tot_len);
774 } while (ret == FW_SKIP+2);
776 out:
777 if (!testing) FWC_READ_UNLOCK(&ip_fw_lock);
779 /* Recalculate checksum if not going to reject, and TOS changed. */
780 if (ip->tos != oldtos
781 && ret != FW_REJECT && ret != FW_BLOCK
782 && !testing)
783 ip_send_check(ip);
785 #ifdef CONFIG_IP_TRANSPARENT_PROXY
786 if (ret == FW_REDIRECT && redirport) {
787 if ((*redirport = htons(f->ipfw.fw_redirpt)) == 0) {
788 /* Wildcard redirection.
789 * Note that redirport will become
790 * 0xFFFF for non-TCP/UDP packets.
792 *redirport = htons(dst_port);
795 #endif
797 #ifdef DEBUG_ALLOW_ALL
798 return (testing ? ret : FW_ACCEPT);
799 #else
800 return ret;
801 #endif
804 /* Must have write lock & interrupts off for any of these */
806 /* This function sets all the byte counters in a chain to zero. The
807 * input is a pointer to the chain required for zeroing */
808 static int zero_fw_chain(struct ip_chain *chainptr)
810 struct ip_fwkernel *i;
812 FWC_HAVE_LOCK(fwc_wlocks);
813 for (i = chainptr->chain; i; i = i->next)
814 memset(i->counters, 0, sizeof(struct ip_counters)*NUM_SLOTS);
815 return 0;
818 static int clear_fw_chain(struct ip_chain *chainptr)
820 struct ip_fwkernel *i= chainptr->chain;
822 FWC_HAVE_LOCK(fwc_wlocks);
823 chainptr->chain=NULL;
825 while (i) {
826 struct ip_fwkernel *tmp = i->next;
827 if (i->branch)
828 i->branch->refcount--;
829 kfree(i);
830 i = tmp;
832 return 0;
835 static int replace_in_chain(struct ip_chain *chainptr,
836 struct ip_fwkernel *frwl,
837 __u32 position)
839 struct ip_fwkernel *f = chainptr->chain;
841 FWC_HAVE_LOCK(fwc_wlocks);
843 while (--position && f != NULL) f = f->next;
844 if (f == NULL)
845 return EINVAL;
847 if (f->branch) f->branch->refcount--;
848 if (frwl->branch) frwl->branch->refcount++;
850 frwl->next = f->next;
851 memcpy(f,frwl,sizeof(struct ip_fwkernel));
852 kfree(frwl);
853 return 0;
856 static int append_to_chain(struct ip_chain *chainptr, struct ip_fwkernel *rule)
858 struct ip_fwkernel *i;
860 FWC_HAVE_LOCK(fwc_wlocks);
861 /* Special case if no rules already present */
862 if (chainptr->chain == NULL) {
864 /* If pointer writes are atomic then turning off
865 * interupts is not necessary. */
866 chainptr->chain = rule;
867 if (rule->branch) rule->branch->refcount++;
868 return 0;
871 /* Find the rule before the end of the chain */
872 for (i = chainptr->chain; i->next; i = i->next);
873 i->next = rule;
874 if (rule->branch) rule->branch->refcount++;
875 return 0;
878 /* This function inserts a rule at the position of position in the
879 * chain refenced by chainptr. If position is 1 then this rule will
880 * become the new rule one. */
881 static int insert_in_chain(struct ip_chain *chainptr,
882 struct ip_fwkernel *frwl,
883 __u32 position)
885 struct ip_fwkernel *f = chainptr->chain;
887 FWC_HAVE_LOCK(fwc_wlocks);
888 /* special case if the position is number 1 */
889 if (position == 1) {
890 frwl->next = chainptr->chain;
891 if (frwl->branch) frwl->branch->refcount++;
892 chainptr->chain = frwl;
893 return 0;
895 position--;
896 while (--position && f != NULL) f = f->next;
897 if (f == NULL)
898 return EINVAL;
899 if (frwl->branch) frwl->branch->refcount++;
900 frwl->next = f->next;
902 f->next = frwl;
903 return 0;
906 /* This function deletes the a rule from a given rulenum and chain.
907 * With rulenum = 1 is the first rule is deleted. */
909 static int del_num_from_chain(struct ip_chain *chainptr, __u32 rulenum)
911 struct ip_fwkernel *i=chainptr->chain,*tmp;
913 FWC_HAVE_LOCK(fwc_wlocks);
915 if (!chainptr->chain)
916 return ENOENT;
918 /* Need a special case for the first rule */
919 if (rulenum == 1) {
920 /* store temp to allow for freeing up of memory */
921 tmp = chainptr->chain;
922 if (chainptr->chain->branch) chainptr->chain->branch->refcount--;
923 chainptr->chain = chainptr->chain->next;
924 kfree(tmp); /* free memory that is now unused */
925 } else {
926 rulenum--;
927 while (--rulenum && i->next ) i = i->next;
928 if (!i->next)
929 return ENOENT;
930 tmp = i->next;
931 if (i->next->branch)
932 i->next->branch->refcount--;
933 i->next = i->next->next;
934 kfree(tmp);
936 return 0;
940 /* This function deletes the a rule from a given rule and chain.
941 * The rule that is deleted is the first occursance of that rule. */
942 static int del_rule_from_chain(struct ip_chain *chainptr,
943 struct ip_fwkernel *frwl)
945 struct ip_fwkernel *ltmp,*ftmp = chainptr->chain ;
946 int was_found;
948 FWC_HAVE_LOCK(fwc_wlocks);
950 /* Sure, we should compare marks, but since the `ipfwadm'
951 * script uses it for an unholy hack... well, life is easier
952 * this way. We also mask it out of the flags word. --PR */
953 for (ltmp=NULL, was_found=0;
954 !was_found && ftmp != NULL;
955 ltmp = ftmp,ftmp = ftmp->next) {
956 if (ftmp->ipfw.fw_src.s_addr!=frwl->ipfw.fw_src.s_addr
957 || ftmp->ipfw.fw_dst.s_addr!=frwl->ipfw.fw_dst.s_addr
958 || ftmp->ipfw.fw_smsk.s_addr!=frwl->ipfw.fw_smsk.s_addr
959 || ftmp->ipfw.fw_dmsk.s_addr!=frwl->ipfw.fw_dmsk.s_addr
960 #if 0
961 || ftmp->ipfw.fw_flg!=frwl->ipfw.fw_flg
962 #else
963 || ((ftmp->ipfw.fw_flg & ~IP_FW_F_MARKABS)
964 != (frwl->ipfw.fw_flg & ~IP_FW_F_MARKABS))
965 #endif
966 || ftmp->ipfw.fw_invflg!=frwl->ipfw.fw_invflg
967 || ftmp->ipfw.fw_proto!=frwl->ipfw.fw_proto
968 #if 0
969 || ftmp->ipfw.fw_mark!=frwl->ipfw.fw_mark
970 #endif
971 || ftmp->ipfw.fw_redirpt!=frwl->ipfw.fw_redirpt
972 || ftmp->ipfw.fw_spts[0]!=frwl->ipfw.fw_spts[0]
973 || ftmp->ipfw.fw_spts[1]!=frwl->ipfw.fw_spts[1]
974 || ftmp->ipfw.fw_dpts[0]!=frwl->ipfw.fw_dpts[0]
975 || ftmp->ipfw.fw_dpts[1]!=frwl->ipfw.fw_dpts[1]
976 || ftmp->ipfw.fw_outputsize!=frwl->ipfw.fw_outputsize) {
977 duprintf("del_rule_from_chain: mismatch:"
978 "src:%u/%u dst:%u/%u smsk:%u/%u dmsk:%u/%u "
979 "flg:%hX/%hX invflg:%hX/%hX proto:%u/%u "
980 "mark:%u/%u "
981 "ports:%hu-%hu/%hu-%hu %hu-%hu/%hu-%hu "
982 "outputsize:%hu-%hu\n",
983 ftmp->ipfw.fw_src.s_addr,
984 frwl->ipfw.fw_src.s_addr,
985 ftmp->ipfw.fw_dst.s_addr,
986 frwl->ipfw.fw_dst.s_addr,
987 ftmp->ipfw.fw_smsk.s_addr,
988 frwl->ipfw.fw_smsk.s_addr,
989 ftmp->ipfw.fw_dmsk.s_addr,
990 frwl->ipfw.fw_dmsk.s_addr,
991 ftmp->ipfw.fw_flg,
992 frwl->ipfw.fw_flg,
993 ftmp->ipfw.fw_invflg,
994 frwl->ipfw.fw_invflg,
995 ftmp->ipfw.fw_proto,
996 frwl->ipfw.fw_proto,
997 ftmp->ipfw.fw_mark,
998 frwl->ipfw.fw_mark,
999 ftmp->ipfw.fw_spts[0],
1000 frwl->ipfw.fw_spts[0],
1001 ftmp->ipfw.fw_spts[1],
1002 frwl->ipfw.fw_spts[1],
1003 ftmp->ipfw.fw_dpts[0],
1004 frwl->ipfw.fw_dpts[0],
1005 ftmp->ipfw.fw_dpts[1],
1006 frwl->ipfw.fw_dpts[1],
1007 ftmp->ipfw.fw_outputsize,
1008 frwl->ipfw.fw_outputsize);
1009 continue;
1012 if (strncmp(ftmp->ipfw.fw_vianame,
1013 frwl->ipfw.fw_vianame,
1014 IFNAMSIZ)) {
1015 duprintf("del_rule_from_chain: if mismatch: %s/%s\n",
1016 ftmp->ipfw.fw_vianame,
1017 frwl->ipfw.fw_vianame);
1018 continue;
1020 if (ftmp->branch != frwl->branch) {
1021 duprintf("del_rule_from_chain: branch mismatch: "
1022 "%s/%s\n",
1023 ftmp->branch?ftmp->branch->label:"(null)",
1024 frwl->branch?frwl->branch->label:"(null)");
1025 continue;
1027 if (ftmp->branch == NULL
1028 && ftmp->simplebranch != frwl->simplebranch) {
1029 duprintf("del_rule_from_chain: simplebranch mismatch: "
1030 "%i/%i\n",
1031 ftmp->simplebranch, frwl->simplebranch);
1032 continue;
1034 was_found = 1;
1035 if (ftmp->branch)
1036 ftmp->branch->refcount--;
1037 if (ltmp)
1038 ltmp->next = ftmp->next;
1039 else
1040 chainptr->chain = ftmp->next;
1041 kfree(ftmp);
1042 break;
1045 if (was_found)
1046 return 0;
1047 else {
1048 duprintf("del_rule_from_chain: no matching rule found\n");
1049 return EINVAL;
1053 /* This function takes the label of a chain and deletes the first
1054 * chain with that name. No special cases required for the built in
1055 * chains as they have their refcount initilised to 1 so that they are
1056 * never deleted. */
1057 static int del_chain(ip_chainlabel label)
1059 struct ip_chain *tmp,*tmp2;
1061 FWC_HAVE_LOCK(fwc_wlocks);
1062 /* Corner case: return EBUSY not ENOENT for first elem ("input") */
1063 if (strcmp(label, ip_fw_chains->label) == 0)
1064 return EBUSY;
1066 for (tmp = ip_fw_chains; tmp->next; tmp = tmp->next)
1067 if(strcmp(tmp->next->label,label) == 0)
1068 break;
1070 tmp2 = tmp->next;
1071 if (!tmp2)
1072 return ENOENT;
1074 if (tmp2->refcount)
1075 return EBUSY;
1077 if (tmp2->chain)
1078 return ENOTEMPTY;
1080 tmp->next = tmp2->next;
1081 kfree(tmp2);
1082 return 0;
1085 /* This is a function to initilise a chain. Built in rules start with
1086 * refcount = 1 so that they cannot be deleted. User defined rules
1087 * start with refcount = 0 so they can be deleted. */
1088 static struct ip_chain *ip_init_chain(ip_chainlabel name,
1089 __u32 ref,
1090 int policy)
1092 unsigned int i;
1093 struct ip_chain *label
1094 = kmalloc(SIZEOF_STRUCT_IP_CHAIN, GFP_KERNEL);
1095 if (label == NULL)
1096 panic("Can't kmalloc for firewall chains.\n");
1097 strcpy(label->label,name);
1098 label->next = NULL;
1099 label->chain = NULL;
1100 label->refcount = ref;
1101 label->policy = policy;
1102 for (i = 0; i < smp_num_cpus*2; i++) {
1103 label->reent[i].counters.pcnt = label->reent[i].counters.bcnt
1104 = 0;
1105 label->reent[i].prevchain = NULL;
1106 label->reent[i].prevrule = NULL;
1109 return label;
1112 /* This is a function for reating a new chain. The chains is not
1113 * created if a chain of the same name already exists */
1114 static int create_chain(ip_chainlabel label)
1116 struct ip_chain *tmp;
1118 if (!check_label(label))
1119 return EINVAL;
1121 FWC_HAVE_LOCK(fwc_wlocks);
1122 for (tmp = ip_fw_chains; tmp->next; tmp = tmp->next)
1123 if (strcmp(tmp->label,label) == 0)
1124 return EEXIST;
1126 if (strcmp(tmp->label,label) == 0)
1127 return EEXIST;
1129 tmp->next = ip_init_chain(label, 0, FW_SKIP); /* refcount is
1130 * zero since this is a
1131 * user defined chain *
1132 * and therefore can be
1133 * deleted */
1134 return 0;
1137 /* This function simply changes the policy on one of the built in
1138 * chains. checking must be done before this is call to ensure that
1139 * chainptr is pointing to one of the three possible chains */
1140 static int change_policy(struct ip_chain *chainptr, int policy)
1142 FWC_HAVE_LOCK(fwc_wlocks);
1143 chainptr->policy = policy;
1144 return 0;
1147 /* This function takes an ip_fwuser and converts it to a ip_fwkernel. It also
1148 * performs some checks in the structure. */
1149 static struct ip_fwkernel *convert_ipfw(struct ip_fwuser *fwuser, int *errno)
1151 struct ip_fwkernel *fwkern;
1153 if ( (fwuser->ipfw.fw_flg & ~IP_FW_F_MASK) != 0 ) {
1154 duprintf("convert_ipfw: undefined flag bits set (flags=%x)\n",
1155 fwuser->ipfw.fw_flg);
1156 *errno = EINVAL;
1157 return NULL;
1160 #ifdef DEBUG_IP_FIREWALL_USER
1161 /* These are sanity checks that don't really matter.
1162 * We can get rid of these once testing is complete.
1164 if ((fwuser->ipfw.fw_flg & IP_FW_F_TCPSYN)
1165 && ((fwuser->ipfw.fw_invflg & IP_FW_INV_PROTO)
1166 || fwuser->ipfw.fw_proto != IPPROTO_TCP)) {
1167 duprintf("convert_ipfw: TCP SYN flag set but proto != TCP!\n");
1168 *errno = EINVAL;
1169 return NULL;
1172 if (strcmp(fwuser->label, IP_FW_LABEL_REDIRECT) != 0
1173 && fwuser->ipfw.fw_redirpt != 0) {
1174 duprintf("convert_ipfw: Target not REDIR but redirpt != 0!\n");
1175 *errno = EINVAL;
1176 return NULL;
1179 if ((!(fwuser->ipfw.fw_flg & IP_FW_F_FRAG)
1180 && (fwuser->ipfw.fw_invflg & IP_FW_INV_FRAG))
1181 || (!(fwuser->ipfw.fw_flg & IP_FW_F_TCPSYN)
1182 && (fwuser->ipfw.fw_invflg & IP_FW_INV_SYN))) {
1183 duprintf("convert_ipfw: Can't have INV flag if flag unset!\n");
1184 *errno = EINVAL;
1185 return NULL;
1188 if (((fwuser->ipfw.fw_invflg & IP_FW_INV_SRCPT)
1189 && fwuser->ipfw.fw_spts[0] == 0
1190 && fwuser->ipfw.fw_spts[1] == 0xFFFF)
1191 || ((fwuser->ipfw.fw_invflg & IP_FW_INV_DSTPT)
1192 && fwuser->ipfw.fw_dpts[0] == 0
1193 && fwuser->ipfw.fw_dpts[1] == 0xFFFF)
1194 || ((fwuser->ipfw.fw_invflg & IP_FW_INV_VIA)
1195 && (fwuser->ipfw.fw_vianame)[0] == '\0')
1196 || ((fwuser->ipfw.fw_invflg & IP_FW_INV_SRCIP)
1197 && fwuser->ipfw.fw_smsk.s_addr == 0)
1198 || ((fwuser->ipfw.fw_invflg & IP_FW_INV_DSTIP)
1199 && fwuser->ipfw.fw_dmsk.s_addr == 0)) {
1200 duprintf("convert_ipfw: INV flag makes rule unmatchable!\n");
1201 *errno = EINVAL;
1202 return NULL;
1205 if ((fwuser->ipfw.fw_flg & IP_FW_F_FRAG)
1206 && !(fwuser->ipfw.fw_invflg & IP_FW_INV_FRAG)
1207 && (fwuser->ipfw.fw_spts[0] != 0
1208 || fwuser->ipfw.fw_spts[1] != 0xFFFF
1209 || fwuser->ipfw.fw_dpts[0] != 0
1210 || fwuser->ipfw.fw_dpts[1] != 0xFFFF
1211 || (fwuser->ipfw.fw_flg & IP_FW_F_TCPSYN))) {
1212 duprintf("convert_ipfw: Can't test ports or SYN with frag!\n");
1213 *errno = EINVAL;
1214 return NULL;
1216 #endif
1218 if ((fwuser->ipfw.fw_spts[0] != 0
1219 || fwuser->ipfw.fw_spts[1] != 0xFFFF
1220 || fwuser->ipfw.fw_dpts[0] != 0
1221 || fwuser->ipfw.fw_dpts[1] != 0xFFFF)
1222 && ((fwuser->ipfw.fw_invflg & IP_FW_INV_PROTO)
1223 || (fwuser->ipfw.fw_proto != IPPROTO_TCP
1224 && fwuser->ipfw.fw_proto != IPPROTO_UDP
1225 && fwuser->ipfw.fw_proto != IPPROTO_ICMP))) {
1226 duprintf("convert_ipfw: Can only test ports for TCP/UDP/ICMP!\n");
1227 *errno = EINVAL;
1228 return NULL;
1231 fwkern = kmalloc(SIZEOF_STRUCT_IP_FW_KERNEL, GFP_KERNEL);
1232 if (!fwkern) {
1233 duprintf("convert_ipfw: kmalloc failed!\n");
1234 *errno = ENOMEM;
1235 return NULL;
1237 memcpy(&fwkern->ipfw,&fwuser->ipfw,sizeof(struct ip_fw));
1239 if (!find_special(fwuser->label, &fwkern->simplebranch)) {
1240 fwkern->branch = find_label(fwuser->label);
1241 if (!fwkern->branch) {
1242 duprintf("convert_ipfw: chain doesn't exist `%s'.\n",
1243 fwuser->label);
1244 kfree(fwkern);
1245 *errno = ENOENT;
1246 return NULL;
1247 } else if (fwkern->branch == IP_FW_INPUT_CHAIN
1248 || fwkern->branch == IP_FW_FORWARD_CHAIN
1249 || fwkern->branch == IP_FW_OUTPUT_CHAIN) {
1250 duprintf("convert_ipfw: Can't branch to builtin chain `%s'.\n",
1251 fwuser->label);
1252 kfree(fwkern);
1253 *errno = ENOENT;
1254 return NULL;
1256 } else
1257 fwkern->branch = NULL;
1258 memset(fwkern->counters, 0, sizeof(struct ip_counters)*NUM_SLOTS);
1260 /* Handle empty vianame by making it a wildcard */
1261 if ((fwkern->ipfw.fw_vianame)[0] == '\0')
1262 fwkern->ipfw.fw_flg |= IP_FW_F_WILDIF;
1264 fwkern->next = NULL;
1265 return fwkern;
1268 int ip_fw_ctl(int cmd, void *m, int len)
1270 int ret;
1271 struct ip_chain *chain;
1272 unsigned long flags;
1274 FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags);
1276 switch (cmd) {
1277 case IP_FW_FLUSH:
1278 if (len != sizeof(ip_chainlabel) || !check_label(m))
1279 ret = EINVAL;
1280 else if ((chain = find_label(m)) == NULL)
1281 ret = ENOENT;
1282 else ret = clear_fw_chain(chain);
1283 break;
1285 case IP_FW_ZERO:
1286 if (len != sizeof(ip_chainlabel) || !check_label(m))
1287 ret = EINVAL;
1288 else if ((chain = find_label(m)) == NULL)
1289 ret = ENOENT;
1290 else ret = zero_fw_chain(chain);
1291 break;
1293 case IP_FW_CHECK: {
1294 struct ip_fwtest *new = m;
1295 struct iphdr *ip;
1297 /* Don't need write lock. */
1298 FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags);
1300 if (len != sizeof(struct ip_fwtest) || !check_label(m))
1301 return EINVAL;
1303 /* Need readlock to do find_label */
1304 FWC_READ_LOCK(&ip_fw_lock);
1306 if ((chain = find_label(new->fwt_label)) == NULL)
1307 ret = ENOENT;
1308 else {
1309 ip = &(new->fwt_packet.fwp_iph);
1311 if (ip->ihl != sizeof(struct iphdr) / sizeof(int)) {
1312 duprintf("ip_fw_ctl: ip->ihl=%d, want %d\n",
1313 ip->ihl,
1314 sizeof(struct iphdr) / sizeof(int));
1315 ret = EINVAL;
1317 else {
1318 ret = ip_fw_check(ip, new->fwt_packet.fwp_vianame,
1319 NULL, chain,
1320 NULL, SLOT_NUMBER(), 1);
1321 switch (ret) {
1322 case FW_ACCEPT:
1323 ret = 0; break;
1324 case FW_REDIRECT:
1325 ret = ECONNABORTED; break;
1326 case FW_MASQUERADE:
1327 ret = ECONNRESET; break;
1328 case FW_REJECT:
1329 ret = ECONNREFUSED; break;
1330 /* Hack to help diag; these only get
1331 returned when testing. */
1332 case FW_SKIP+1:
1333 ret = ELOOP; break;
1334 case FW_SKIP:
1335 ret = ENFILE; break;
1336 default: /* FW_BLOCK */
1337 ret = ETIMEDOUT; break;
1341 FWC_READ_UNLOCK(&ip_fw_lock);
1342 return ret;
1345 case IP_FW_MASQ_TIMEOUTS: {
1346 #ifdef CONFIG_IP_MASQUERADE
1347 ret = ip_fw_masq_timeouts(m, len);
1348 #else
1349 ret = EINVAL;
1350 #endif
1352 break;
1354 case IP_FW_REPLACE: {
1355 struct ip_fwkernel *ip_fwkern;
1356 struct ip_fwnew *new = m;
1358 if (len != sizeof(struct ip_fwnew)
1359 || !check_label(new->fwn_label))
1360 ret = EINVAL;
1361 else if ((chain = find_label(new->fwn_label)) == NULL)
1362 ret = ENOENT;
1363 else if ((ip_fwkern = convert_ipfw(&new->fwn_rule, &ret))
1364 != NULL)
1365 ret = replace_in_chain(chain, ip_fwkern,
1366 new->fwn_rulenum);
1368 break;
1370 case IP_FW_APPEND: {
1371 struct ip_fwchange *new = m;
1372 struct ip_fwkernel *ip_fwkern;
1374 if (len != sizeof(struct ip_fwchange)
1375 || !check_label(new->fwc_label))
1376 ret = EINVAL;
1377 else if ((chain = find_label(new->fwc_label)) == NULL)
1378 ret = ENOENT;
1379 else if ((ip_fwkern = convert_ipfw(&new->fwc_rule, &ret))
1380 != NULL)
1381 ret = append_to_chain(chain, ip_fwkern);
1383 break;
1385 case IP_FW_INSERT: {
1386 struct ip_fwkernel *ip_fwkern;
1387 struct ip_fwnew *new = m;
1389 if (len != sizeof(struct ip_fwnew)
1390 || !check_label(new->fwn_label))
1391 ret = EINVAL;
1392 else if ((chain = find_label(new->fwn_label)) == NULL)
1393 ret = ENOENT;
1394 else if ((ip_fwkern = convert_ipfw(&new->fwn_rule, &ret))
1395 != NULL)
1396 ret = insert_in_chain(chain, ip_fwkern,
1397 new->fwn_rulenum);
1399 break;
1401 case IP_FW_DELETE: {
1402 struct ip_fwchange *new = m;
1403 struct ip_fwkernel *ip_fwkern;
1405 if (len != sizeof(struct ip_fwchange)
1406 || !check_label(new->fwc_label))
1407 ret = EINVAL;
1408 else if ((chain = find_label(new->fwc_label)) == NULL)
1409 ret = ENOENT;
1410 else if ((ip_fwkern = convert_ipfw(&new->fwc_rule, &ret))
1411 != NULL)
1412 ret = del_rule_from_chain(chain, ip_fwkern);
1414 break;
1416 case IP_FW_DELETE_NUM: {
1417 struct ip_fwdelnum *new = m;
1419 if (len != sizeof(struct ip_fwdelnum)
1420 || !check_label(new->fwd_label))
1421 ret = EINVAL;
1422 else if ((chain = find_label(new->fwd_label)) == NULL)
1423 ret = ENOENT;
1424 else ret = del_num_from_chain(chain, new->fwd_rulenum);
1426 break;
1428 case IP_FW_CREATECHAIN: {
1429 if (len != sizeof(ip_chainlabel)) {
1430 duprintf("create_chain: bad size %i\n", len);
1431 ret = EINVAL;
1433 else ret = create_chain(m);
1435 break;
1437 case IP_FW_DELETECHAIN: {
1438 if (len != sizeof(ip_chainlabel)) {
1439 duprintf("delete_chain: bad size %i\n", len);
1440 ret = EINVAL;
1442 else ret = del_chain(m);
1444 break;
1446 case IP_FW_POLICY: {
1447 struct ip_fwpolicy *new = m;
1449 if (len != sizeof(struct ip_fwpolicy)
1450 || !check_label(new->fwp_label))
1451 ret = EINVAL;
1452 else if ((chain = find_label(new->fwp_label)) == NULL)
1453 ret = ENOENT;
1454 else if (chain != IP_FW_INPUT_CHAIN
1455 && chain != IP_FW_FORWARD_CHAIN
1456 && chain != IP_FW_OUTPUT_CHAIN) {
1457 duprintf("change_policy: can't change policy on user"
1458 " defined chain.\n");
1459 ret = EINVAL;
1461 else {
1462 int pol = FW_SKIP;
1463 find_special(new->fwp_policy, &pol);
1465 switch(pol) {
1466 case FW_MASQUERADE:
1467 if (chain != IP_FW_FORWARD_CHAIN) {
1468 ret = EINVAL;
1469 break;
1471 /* Fall thru... */
1472 case FW_BLOCK:
1473 case FW_ACCEPT:
1474 case FW_REJECT:
1475 ret = change_policy(chain, pol);
1476 break;
1477 default:
1478 duprintf("change_policy: bad policy `%s'\n",
1479 new->fwp_policy);
1480 ret = EINVAL;
1483 break;
1486 default:
1487 duprintf("ip_fw_ctl: unknown request %d\n",cmd);
1488 ret = EINVAL;
1491 FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags);
1492 return ret;
1495 /* Returns bytes used - doesn't NUL terminate */
1496 static int dump_rule(char *buffer,
1497 const char *chainlabel,
1498 const struct ip_fwkernel *rule)
1500 int len;
1501 unsigned int i;
1502 __u64 packets = 0, bytes = 0;
1504 FWC_HAVE_LOCK(fwc_wlocks);
1505 for (i = 0; i < NUM_SLOTS; i++) {
1506 packets += rule->counters[i].pcnt;
1507 bytes += rule->counters[i].bcnt;
1510 len=sprintf(buffer,
1511 "%9s " /* Chain name */
1512 "%08lX/%08lX->%08lX/%08lX " /* Source & Destination IPs */
1513 "%.16s " /* Interface */
1514 "%X %X " /* fw_flg and fw_invflg fields */
1515 "%u " /* Protocol */
1516 "%-9u %-9u %-9u %-9u " /* Packet & byte counters */
1517 "%u-%u %u-%u " /* Source & Dest port ranges */
1518 "A%02X X%02X " /* TOS and and xor masks */
1519 "%08X " /* Redirection port */
1520 "%u " /* fw_mark field */
1521 "%u " /* output size */
1522 "%9s\n", /* Target */
1523 chainlabel,
1524 ntohl(rule->ipfw.fw_src.s_addr),
1525 ntohl(rule->ipfw.fw_smsk.s_addr),
1526 ntohl(rule->ipfw.fw_dst.s_addr),
1527 ntohl(rule->ipfw.fw_dmsk.s_addr),
1528 (rule->ipfw.fw_vianame)[0] ? rule->ipfw.fw_vianame : "-",
1529 rule->ipfw.fw_flg,
1530 rule->ipfw.fw_invflg,
1531 rule->ipfw.fw_proto,
1532 (__u32)(packets >> 32), (__u32)packets,
1533 (__u32)(bytes >> 32), (__u32)bytes,
1534 rule->ipfw.fw_spts[0], rule->ipfw.fw_spts[1],
1535 rule->ipfw.fw_dpts[0], rule->ipfw.fw_dpts[1],
1536 rule->ipfw.fw_tosand, rule->ipfw.fw_tosxor,
1537 rule->ipfw.fw_redirpt,
1538 rule->ipfw.fw_mark,
1539 rule->ipfw.fw_outputsize,
1540 branchname(rule->branch,rule->simplebranch));
1542 duprintf("dump_rule: %i bytes done.\n", len);
1543 return len;
1546 /* File offset is actually in records, not bytes. */
1547 static int ip_chain_procinfo(char *buffer, char **start,
1548 off_t offset, int length, int reset)
1550 struct ip_chain *i;
1551 struct ip_fwkernel *j = ip_fw_chains->chain;
1552 unsigned long flags;
1553 int len = 0;
1554 int last_len = 0;
1555 off_t upto = 0;
1557 duprintf("Offset starts at %lu\n", offset);
1558 duprintf("ip_fw_chains is 0x%0lX\n", (unsigned long int)ip_fw_chains);
1560 /* Need a write lock to lock out ``readers'' which update counters. */
1561 FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags);
1563 for (i = ip_fw_chains; i; i = i->next) {
1564 for (j = i->chain; j; j = j->next) {
1565 if (upto == offset) break;
1566 duprintf("Skipping rule in chain `%s'\n",
1567 i->label);
1568 upto++;
1570 if (upto == offset) break;
1573 /* Don't init j first time, or once i = NULL */
1574 for (; i; (void)((i = i->next) && (j = i->chain))) {
1575 duprintf("Dumping chain `%s'\n", i->label);
1576 for (; j; j = j->next, upto++, last_len = len)
1578 len += dump_rule(buffer+len, i->label, j);
1579 if (len > length) {
1580 duprintf("Dumped to %i (past %i). "
1581 "Moving back to %i.\n",
1582 len, length, last_len);
1583 len = last_len;
1584 goto outside;
1586 else if (reset)
1587 memset(j->counters, 0,
1588 sizeof(struct ip_counters)*NUM_SLOTS);
1591 outside:
1592 FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags);
1593 buffer[len] = '\0';
1595 duprintf("ip_chain_procinfo: Length = %i (of %i). Offset = %li.\n",
1596 len, length, upto);
1597 /* `start' hack - see fs/proc/generic.c line ~165 */
1598 *start=(char *)((unsigned int)upto-offset);
1599 return len;
1602 static int ip_chain_name_procinfo(char *buffer, char **start,
1603 off_t offset, int length, int reset)
1605 struct ip_chain *i;
1606 int len = 0,last_len = 0;
1607 off_t pos = 0,begin = 0;
1608 unsigned long flags;
1610 /* Need a write lock to lock out ``readers'' which update counters. */
1611 FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags);
1613 for (i = ip_fw_chains; i; i = i->next)
1615 unsigned int j;
1616 __u32 packetsHi = 0, packetsLo = 0, bytesHi = 0, bytesLo = 0;
1618 for (j = 0; j < NUM_SLOTS; j++) {
1619 packetsLo += i->reent[j].counters.pcnt & 0xFFFFFFFF;
1620 packetsHi += ((i->reent[j].counters.pcnt >> 32)
1621 & 0xFFFFFFFF);
1622 bytesLo += i->reent[j].counters.bcnt & 0xFFFFFFFF;
1623 bytesHi += ((i->reent[j].counters.bcnt >> 32)
1624 & 0xFFFFFFFF);
1627 /* print the label and the policy */
1628 len+=sprintf(buffer+len,"%s %s %i %u %u %u %u\n",
1629 i->label,branchname(NULL, i->policy),i->refcount,
1630 packetsHi, packetsLo, bytesHi, bytesLo);
1631 pos=begin+len;
1632 if(pos<offset) {
1633 len=0;
1634 begin=pos;
1636 else if(pos>offset+length) {
1637 len = last_len;
1638 break;
1641 last_len = len;
1643 FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags);
1645 *start = buffer+(offset-begin);
1646 len-=(offset-begin);
1647 if(len>length)
1648 len=length;
1649 return len;
1653 * Interface to the generic firewall chains.
1655 int ipfw_input_check(struct firewall_ops *this, int pf, struct device *dev,
1656 void *phdr, void *arg, struct sk_buff **pskb)
1658 return ip_fw_check(phdr, dev->name,
1659 arg, IP_FW_INPUT_CHAIN, *pskb, SLOT_NUMBER(), 0);
1662 int ipfw_output_check(struct firewall_ops *this, int pf, struct device *dev,
1663 void *phdr, void *arg, struct sk_buff **pskb)
1665 /* Locally generated bogus packets by root. <SIGH>. */
1666 if (((struct iphdr *)phdr)->ihl * 4 < sizeof(struct iphdr)
1667 || (*pskb)->len < sizeof(struct iphdr))
1668 return FW_ACCEPT;
1669 return ip_fw_check(phdr, dev->name,
1670 arg, IP_FW_OUTPUT_CHAIN, *pskb, SLOT_NUMBER(), 0);
1673 int ipfw_forward_check(struct firewall_ops *this, int pf, struct device *dev,
1674 void *phdr, void *arg, struct sk_buff **pskb)
1676 return ip_fw_check(phdr, dev->name,
1677 arg, IP_FW_FORWARD_CHAIN, *pskb, SLOT_NUMBER(), 0);
1680 struct firewall_ops ipfw_ops=
1682 NULL,
1683 ipfw_forward_check,
1684 ipfw_input_check,
1685 ipfw_output_check,
1686 PF_INET,
1687 0 /* We don't even allow a fall through so we are last */
1690 #ifdef CONFIG_PROC_FS
1691 static struct proc_dir_entry proc_net_ipfwchains_chain = {
1692 PROC_NET_IPFW_CHAINS, sizeof(IP_FW_PROC_CHAINS)-1,
1693 IP_FW_PROC_CHAINS, S_IFREG | S_IRUSR | S_IWUSR, 1, 0, 0,
1694 0, &proc_net_inode_operations, ip_chain_procinfo
1697 static struct proc_dir_entry proc_net_ipfwchains_chainnames = {
1698 PROC_NET_IPFW_CHAIN_NAMES, sizeof(IP_FW_PROC_CHAIN_NAMES)-1,
1699 IP_FW_PROC_CHAIN_NAMES, S_IFREG | S_IRUSR | S_IWUSR, 1, 0, 0,
1700 0, &proc_net_inode_operations, ip_chain_name_procinfo
1703 #endif
1705 __initfunc(void ip_fw_init(void))
1707 #ifdef DEBUG_IP_FIRWALL_LOCKING
1708 fwc_wlocks = fwc_rlocks = 0;
1709 #endif
1711 IP_FW_INPUT_CHAIN = ip_init_chain(IP_FW_LABEL_INPUT, 1, FW_ACCEPT);
1712 IP_FW_FORWARD_CHAIN = ip_init_chain(IP_FW_LABEL_FORWARD, 1, FW_ACCEPT);
1713 IP_FW_OUTPUT_CHAIN = ip_init_chain(IP_FW_LABEL_OUTPUT, 1, FW_ACCEPT);
1715 if(register_firewall(PF_INET,&ipfw_ops)<0)
1716 panic("Unable to register IP firewall.\n");
1718 #ifdef CONFIG_PROC_FS
1719 proc_net_register(&proc_net_ipfwchains_chain);
1720 proc_net_register(&proc_net_ipfwchains_chainnames);
1721 #endif
1723 #ifdef CONFIG_IP_FIREWALL_NETLINK
1724 ipfwsk = netlink_kernel_create(NETLINK_FIREWALL, NULL);
1725 if (ipfwsk == NULL)
1726 panic("ip_fw_init: cannot initialize netlink\n");
1727 #endif
1728 #if defined(DEBUG_IP_FIREWALL) || defined(DEBUG_IP_FIREWALL_USER)
1729 printk("Firewall graphs enabled! Untested kernel coming thru. \n");
1730 #endif