2 * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * $FreeBSD: src/sys/netinet/ip_fw2.c,v 1.6.2.12 2003/04/08 10:42:32 maxim Exp $
29 * Implement IP packet firewall (new version)
35 #error IPFIREWALL requires INET.
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/malloc.h>
42 #include <sys/kernel.h>
44 #include <sys/socket.h>
45 #include <sys/socketvar.h>
46 #include <sys/sysctl.h>
47 #include <sys/syslog.h>
48 #include <sys/ucred.h>
49 #include <sys/in_cksum.h>
50 #include <sys/limits.h>
55 #include <net/route.h>
57 #include <net/dummynet/ip_dummynet.h>
59 #include <sys/thread2.h>
60 #include <sys/mplock2.h>
61 #include <net/netmsg2.h>
63 #include <netinet/in.h>
64 #include <netinet/in_systm.h>
65 #include <netinet/in_var.h>
66 #include <netinet/in_pcb.h>
67 #include <netinet/ip.h>
68 #include <netinet/ip_var.h>
69 #include <netinet/ip_icmp.h>
70 #include <netinet/tcp.h>
71 #include <netinet/tcp_seq.h>
72 #include <netinet/tcp_timer.h>
73 #include <netinet/tcp_var.h>
74 #include <netinet/tcpip.h>
75 #include <netinet/udp.h>
76 #include <netinet/udp_var.h>
77 #include <netinet/ip_divert.h>
78 #include <netinet/if_ether.h> /* XXX for ETHERTYPE_IP */
80 #include <net/ipfw/ip_fw2.h>
82 #ifdef IPFIREWALL_DEBUG
83 #define DPRINTF(fmt, ...) \
86 kprintf(fmt, __VA_ARGS__); \
89 #define DPRINTF(fmt, ...) ((void)0)
93 * Description about per-CPU rule duplication:
95 * Module loading/unloading and all ioctl operations are serialized
96 * by netisr0, so we don't have any ordering or locking problems.
98 * Following graph shows how operation on per-CPU rule list is
99 * performed [2 CPU case]:
103 * netisr0 <------------------------------------+
109 * forwardmsg---------->netisr1 |
114 * replymsg--------------+
118 * Rule structure [2 CPU case]
122 * layer3_chain layer3_chain
125 * +-------+ sibling +-------+ sibling
126 * | rule1 |--------->| rule1 |--------->NULL
127 * +-------+ +-------+
131 * +-------+ sibling +-------+ sibling
132 * | rule2 |--------->| rule2 |--------->NULL
133 * +-------+ +-------+
136 * 1) Ease statistics calculation during IP_FW_GET. We only need to
137 * iterate layer3_chain in netisr0; the current rule's duplication
138 * to the other CPUs could safely be read-only accessed through
140 * 2) Accelerate rule insertion and deletion, e.g. rule insertion:
141 * a) In netisr0 rule3 is determined to be inserted between rule1
142 * and rule2. To make this decision we need to iterate the
143 * layer3_chain in netisr0. The netmsg, which is used to insert
144 * the rule, will contain rule1 in netisr0 as prev_rule and rule2
145 * in netisr0 as next_rule.
146 * b) After the insertion in netisr0 is done, we will move on to
147 * netisr1. But instead of relocating the rule3's position in
148 * netisr1 by iterating the layer3_chain in netisr1, we set the
149 * netmsg's prev_rule to rule1->sibling and next_rule to
150 * rule2->sibling before the netmsg is forwarded to netisr1 from
155 * Description of states and tracks.
157 * Both states and tracks are stored in per-cpu RB trees instead of
158 * per-cpu hash tables to avoid the worst case hash degeneration.
160 * The lifetimes of states and tracks are regulated by dyn_*_lifetime,
161 * measured in seconds and depending on the flags.
163 * When a packet is received, its address fields are first masked with
164 * the mask defined for the rule, then matched against the entries in
165 * the per-cpu state RB tree. States are generated by 'keep-state'
166 * and 'limit' options.
168 * The max number of states is ipfw_state_max. When we reach the
169 * maximum number of states we do not create anymore. This is done to
170 * avoid consuming too much memory, but also too much time when
171 * searching on each packet.
173 * Each state holds a pointer to the parent ipfw rule of the current
174 * CPU so we know what action to perform. States are removed when the
175 * parent rule is deleted. XXX we should make them survive.
177 * There are some limitations with states -- we do not obey the
178 * 'randomized match', and we do not do multiple passes through the
179 * firewall. XXX check the latter!!!
181 * States grow independently on each CPU, e.g. 2 CPU case:
184 * ................... ...................
185 * : state RB tree : : state RB tree :
187 * : state1 state2 : : state3 :
189 * :.....|....|......: :........|........:
194 * +-------+ +-------+
195 * | rule1 | | rule1 |
196 * +-------+ +-------+
198 * Tracks are used to enforce limits on the number of sessions. Tracks
199 * are generated by 'limit' option.
201 * The max number of tracks is ipfw_track_max. When we reach the
202 * maximum number of tracks we do not create anymore. This is done to
203 * avoid consuming too much memory.
205 * Tracks are organized into two layers, track counter RB tree is
206 * shared between CPUs, track RB tree is per-cpu. States generated by
207 * 'limit' option are linked to the track in addition to the per-cpu
208 * state RB tree; mainly to ease expiration. e.g. 2 CPU case:
210 * ..............................
211 * : track counter RB tree :
216 * : +--->counter<----+ :
218 * : | +-----------+ | :
219 * :......|................|....:
222 * ................. |t_count | .................
223 * : track RB tree : | | : track RB tree :
225 * : +-->track1-------+ +--------track2 :
228 * :.|.....|.......: :...............:
229 * | +----------------+
230 * | .................... |
231 * | : state RB tree : |st_track
233 * +---state1 state2---+
235 * :.....|.......|....:
244 #define IPFW_AUTOINC_STEP_MIN 1
245 #define IPFW_AUTOINC_STEP_MAX 1000
246 #define IPFW_AUTOINC_STEP_DEF 100
248 #define IPFW_DEFAULT_RULE 65535 /* rulenum for the default rule */
249 #define IPFW_DEFAULT_SET 31 /* set number for the default rule */
251 #define MATCH_REVERSE 0
252 #define MATCH_FORWARD 1
254 #define MATCH_UNKNOWN 3
256 #define IPFW_STATE_TCPFLAGS (TH_SYN | TH_FIN | TH_RST)
257 #define IPFW_STATE_TCPSTATES (IPFW_STATE_TCPFLAGS | \
258 (IPFW_STATE_TCPFLAGS << 8))
260 #define BOTH_SYN (TH_SYN | (TH_SYN << 8))
261 #define BOTH_FIN (TH_FIN | (TH_FIN << 8))
262 #define BOTH_RST (TH_RST | (TH_RST << 8))
263 /* TH_ACK here means FIN was ACKed. */
264 #define BOTH_FINACK (TH_ACK | (TH_ACK << 8))
266 #define IPFW_STATE_TCPCLOSED(s) ((s)->st_proto == IPPROTO_TCP && \
267 (((s)->st_state & BOTH_RST) || \
268 ((s)->st_state & BOTH_FINACK) == BOTH_FINACK))
270 #define O_ANCHOR O_NOP
273 struct netmsg_base base
;
274 const struct ipfw_ioc_rule
*ioc_rule
;
275 struct ip_fw
*next_rule
;
276 struct ip_fw
*prev_rule
;
277 struct ip_fw
*sibling
;
282 struct netmsg_base base
;
283 struct ip_fw
*start_rule
;
284 struct ip_fw
*prev_rule
;
291 struct netmsg_base base
;
292 struct ip_fw
*start_rule
;
297 struct netmsg_cpstate
{
298 struct netmsg_base base
;
299 struct ipfw_ioc_state
*ioc_state
;
316 struct ipfw_addrs addrs
;
320 struct ipfw_ports ports
;
324 uint8_t swap
; /* IPFW_KEY_SWAP_ */
328 #define IPFW_KEY_SWAP_ADDRS 0x1
329 #define IPFW_KEY_SWAP_PORTS 0x2
330 #define IPFW_KEY_SWAP_ALL (IPFW_KEY_SWAP_ADDRS | IPFW_KEY_SWAP_PORTS)
333 RB_ENTRY(ipfw_trkcnt
) tc_rblink
;
334 struct ipfw_key tc_key
;
338 time_t tc_expire
; /* userland get-only */
339 uint16_t tc_rulenum
; /* userland get-only */
342 #define tc_addrs tc_key.addr_u.value
343 #define tc_ports tc_key.port_u.value
344 #define tc_proto tc_key.proto
345 #define tc_saddr tc_key.addr_u.addrs.addr1
346 #define tc_daddr tc_key.addr_u.addrs.addr2
347 #define tc_sport tc_key.port_u.ports.port1
348 #define tc_dport tc_key.port_u.ports.port2
350 RB_HEAD(ipfw_trkcnt_tree
, ipfw_trkcnt
);
355 RB_ENTRY(ipfw_track
) t_rblink
;
356 struct ipfw_key t_key
;
357 struct ip_fw
*t_rule
;
359 LIST_HEAD(, ipfw_state
) t_state_list
;
361 volatile int *t_count
;
362 struct ipfw_trkcnt
*t_trkcnt
;
363 TAILQ_ENTRY(ipfw_track
) t_link
;
366 #define t_addrs t_key.addr_u.value
367 #define t_ports t_key.port_u.value
368 #define t_proto t_key.proto
369 #define t_saddr t_key.addr_u.addrs.addr1
370 #define t_daddr t_key.addr_u.addrs.addr2
371 #define t_sport t_key.port_u.ports.port1
372 #define t_dport t_key.port_u.ports.port2
374 RB_HEAD(ipfw_track_tree
, ipfw_track
);
375 TAILQ_HEAD(ipfw_track_list
, ipfw_track
);
378 RB_ENTRY(ipfw_state
) st_rblink
;
379 struct ipfw_key st_key
;
381 time_t st_expire
; /* expire time */
382 struct ip_fw
*st_rule
;
384 uint64_t st_pcnt
; /* packets */
385 uint64_t st_bcnt
; /* bytes */
389 * State of this rule, typically a combination of TCP flags.
391 * st_ack_fwd/st_ack_rev:
392 * Most recent ACKs in forward and reverse direction. They
393 * are used to generate keepalives.
401 uint16_t st_flags
; /* IPFW_STATE_F_ */
402 uint16_t st_type
; /* O_KEEP_STATE/O_LIMIT */
403 struct ipfw_track
*st_track
;
405 LIST_ENTRY(ipfw_state
) st_trklink
;
406 TAILQ_ENTRY(ipfw_state
) st_link
;
409 #define st_addrs st_key.addr_u.value
410 #define st_ports st_key.port_u.value
411 #define st_proto st_key.proto
412 #define st_swap st_key.swap
414 #define IPFW_STATE_F_ACKFWD 0x0001
415 #define IPFW_STATE_F_SEQFWD 0x0002
416 #define IPFW_STATE_F_ACKREV 0x0004
417 #define IPFW_STATE_F_SEQREV 0x0008
419 TAILQ_HEAD(ipfw_state_list
, ipfw_state
);
420 RB_HEAD(ipfw_state_tree
, ipfw_state
);
422 struct ipfw_context
{
423 struct ip_fw
*ipfw_layer3_chain
; /* rules for layer3 */
424 struct ip_fw
*ipfw_default_rule
; /* default rule */
425 uint64_t ipfw_norule_counter
; /* ipfw_log(NULL) stat*/
428 * ipfw_set_disable contains one bit per set value (0..31).
429 * If the bit is set, all rules with the corresponding set
430 * are disabled. Set IPDW_DEFAULT_SET is reserved for the
431 * default rule and CANNOT be disabled.
433 uint32_t ipfw_set_disable
;
435 uint8_t ipfw_flags
; /* IPFW_FLAG_ */
437 struct ipfw_state_tree ipfw_state_tree
;
438 struct ipfw_state_list ipfw_state_list
;
439 int ipfw_state_loosecnt
;
443 struct ipfw_state state
;
444 struct ipfw_track track
;
445 struct ipfw_trkcnt trkcnt
;
448 struct ipfw_track_tree ipfw_track_tree
;
449 struct ipfw_track_list ipfw_track_list
;
450 struct ipfw_trkcnt
*ipfw_trkcnt_spare
;
452 struct callout ipfw_stateto_ch
;
453 time_t ipfw_state_lastexp
;
454 struct netmsg_base ipfw_stateexp_nm
;
455 struct netmsg_base ipfw_stateexp_more
;
456 struct ipfw_state ipfw_stateexp_anch
;
458 struct callout ipfw_trackto_ch
;
459 time_t ipfw_track_lastexp
;
460 struct netmsg_base ipfw_trackexp_nm
;
461 struct netmsg_base ipfw_trackexp_more
;
462 struct ipfw_track ipfw_trackexp_anch
;
464 struct callout ipfw_keepalive_ch
;
465 struct netmsg_base ipfw_keepalive_nm
;
466 struct netmsg_base ipfw_keepalive_more
;
467 struct ipfw_state ipfw_keepalive_anch
;
472 u_long ipfw_sts_reap
;
473 u_long ipfw_sts_reapfailed
;
474 u_long ipfw_sts_overflow
;
475 u_long ipfw_sts_nomem
;
476 u_long ipfw_sts_tcprecycled
;
478 u_long ipfw_tks_nomem
;
479 u_long ipfw_tks_reap
;
480 u_long ipfw_tks_reapfailed
;
481 u_long ipfw_tks_overflow
;
482 u_long ipfw_tks_cntnomem
;
485 #define IPFW_FLAG_KEEPALIVE 0x01
486 #define IPFW_FLAG_STATEEXP 0x02
487 #define IPFW_FLAG_TRACKEXP 0x04
488 #define IPFW_FLAG_STATEREAP 0x08
489 #define IPFW_FLAG_TRACKREAP 0x10
491 #define ipfw_state_tmpkey ipfw_tmpkey.state
492 #define ipfw_track_tmpkey ipfw_tmpkey.track
493 #define ipfw_trkcnt_tmpkey ipfw_tmpkey.trkcnt
496 int ipfw_state_loosecnt
; /* cache aligned */
497 time_t ipfw_state_globexp __cachealign
;
499 struct lwkt_token ipfw_trkcnt_token __cachealign
;
500 struct ipfw_trkcnt_tree ipfw_trkcnt_tree
;
502 time_t ipfw_track_globexp
;
506 * Module can not be unloaded, if there are references to
507 * certains rules of ipfw(4), e.g. dummynet(4)
509 int ipfw_refcnt __cachealign
;
513 static struct ipfw_context
*ipfw_ctx
[MAXCPU
];
515 MALLOC_DEFINE(M_IPFW
, "IpFw/IpAcct", "IpFw/IpAcct chain's");
518 * Following two global variables are accessed and updated only
521 static uint32_t static_count
; /* # of static rules */
522 static uint32_t static_ioc_len
; /* bytes of static rules */
525 * If 1, then ipfw static rules are being flushed,
526 * ipfw_chk() will skip to the default rule.
528 static int ipfw_flushing
;
530 static int fw_verbose
;
531 static int verbose_limit
;
534 static int autoinc_step
= IPFW_AUTOINC_STEP_DEF
;
536 static int ipfw_sysctl_enable(SYSCTL_HANDLER_ARGS
);
537 static int ipfw_sysctl_autoinc_step(SYSCTL_HANDLER_ARGS
);
539 SYSCTL_NODE(_net_inet_ip
, OID_AUTO
, fw
, CTLFLAG_RW
, 0, "Firewall");
540 SYSCTL_NODE(_net_inet_ip_fw
, OID_AUTO
, stats
, CTLFLAG_RW
, 0,
541 "Firewall statistics");
543 SYSCTL_PROC(_net_inet_ip_fw
, OID_AUTO
, enable
, CTLTYPE_INT
| CTLFLAG_RW
,
544 &fw_enable
, 0, ipfw_sysctl_enable
, "I", "Enable ipfw");
545 SYSCTL_PROC(_net_inet_ip_fw
, OID_AUTO
, autoinc_step
, CTLTYPE_INT
| CTLFLAG_RW
,
546 &autoinc_step
, 0, ipfw_sysctl_autoinc_step
, "I",
547 "Rule number autincrement step");
548 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
,one_pass
,CTLFLAG_RW
,
550 "Only do a single pass through ipfw when using dummynet(4)");
551 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, debug
, CTLFLAG_RW
,
552 &fw_debug
, 0, "Enable printing of debug ip_fw statements");
553 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, verbose
, CTLFLAG_RW
,
554 &fw_verbose
, 0, "Log matches to ipfw rules");
555 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, verbose_limit
, CTLFLAG_RW
,
556 &verbose_limit
, 0, "Set upper limit of matches of ipfw rules logged");
558 static int ipfw_sysctl_dyncnt(SYSCTL_HANDLER_ARGS
);
559 static int ipfw_sysctl_dynmax(SYSCTL_HANDLER_ARGS
);
560 static int ipfw_sysctl_statecnt(SYSCTL_HANDLER_ARGS
);
561 static int ipfw_sysctl_statemax(SYSCTL_HANDLER_ARGS
);
562 static int ipfw_sysctl_scancnt(SYSCTL_HANDLER_ARGS
);
563 static int ipfw_sysctl_stat(SYSCTL_HANDLER_ARGS
);
566 * Timeouts for various events in handing states.
570 * 2 == 1~2 second(s).
572 * We use 2 seconds for FIN lifetime, so that the states will not be
573 * ripped prematurely.
575 static uint32_t dyn_ack_lifetime
= 300;
576 static uint32_t dyn_syn_lifetime
= 20;
577 static uint32_t dyn_finwait_lifetime
= 20;
578 static uint32_t dyn_fin_lifetime
= 2;
579 static uint32_t dyn_rst_lifetime
= 2;
580 static uint32_t dyn_udp_lifetime
= 10;
581 static uint32_t dyn_short_lifetime
= 5; /* used by tracks too */
584 * Keepalives are sent if dyn_keepalive is set. They are sent every
585 * dyn_keepalive_period seconds, in the last dyn_keepalive_interval
586 * seconds of lifetime of a rule.
588 static uint32_t dyn_keepalive_interval
= 20;
589 static uint32_t dyn_keepalive_period
= 5;
590 static uint32_t dyn_keepalive
= 1; /* do send keepalives */
592 static struct ipfw_global ipfw_gd
;
593 static int ipfw_state_loosecnt_updthr
;
594 static int ipfw_state_max
= 4096; /* max # of states */
595 static int ipfw_track_max
= 4096; /* max # of tracks */
597 static int ipfw_state_headroom
; /* setup at module load time */
598 static int ipfw_state_reap_min
= 8;
599 static int ipfw_state_expire_max
= 32;
600 static int ipfw_state_scan_max
= 256;
601 static int ipfw_keepalive_max
= 8;
602 static int ipfw_track_reap_max
= 4;
603 static int ipfw_track_expire_max
= 16;
604 static int ipfw_track_scan_max
= 128;
607 SYSCTL_PROC(_net_inet_ip_fw
, OID_AUTO
, dyn_count
,
608 CTLTYPE_INT
| CTLFLAG_RD
, NULL
, 0, ipfw_sysctl_dyncnt
, "I",
609 "Number of states and tracks");
610 SYSCTL_PROC(_net_inet_ip_fw
, OID_AUTO
, dyn_max
,
611 CTLTYPE_INT
| CTLFLAG_RW
, NULL
, 0, ipfw_sysctl_dynmax
, "I",
612 "Max number of states and tracks");
614 SYSCTL_PROC(_net_inet_ip_fw
, OID_AUTO
, state_cnt
,
615 CTLTYPE_INT
| CTLFLAG_RD
, NULL
, 0, ipfw_sysctl_statecnt
, "I",
617 SYSCTL_PROC(_net_inet_ip_fw
, OID_AUTO
, state_max
,
618 CTLTYPE_INT
| CTLFLAG_RW
, NULL
, 0, ipfw_sysctl_statemax
, "I",
619 "Max number of states");
620 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, state_headroom
, CTLFLAG_RW
,
621 &ipfw_state_headroom
, 0, "headroom for state reap");
622 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, track_cnt
, CTLFLAG_RD
,
623 &ipfw_gd
.ipfw_trkcnt_cnt
, 0, "Number of tracks");
624 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, track_max
, CTLFLAG_RW
,
625 &ipfw_track_max
, 0, "Max number of tracks");
626 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, static_count
, CTLFLAG_RD
,
627 &static_count
, 0, "Number of static rules");
628 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, dyn_ack_lifetime
, CTLFLAG_RW
,
629 &dyn_ack_lifetime
, 0, "Lifetime of dyn. rules for acks");
630 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, dyn_syn_lifetime
, CTLFLAG_RW
,
631 &dyn_syn_lifetime
, 0, "Lifetime of dyn. rules for syn");
632 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, dyn_fin_lifetime
, CTLFLAG_RW
,
633 &dyn_fin_lifetime
, 0, "Lifetime of dyn. rules for fin");
634 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, dyn_finwait_lifetime
, CTLFLAG_RW
,
635 &dyn_finwait_lifetime
, 0, "Lifetime of dyn. rules for fin wait");
636 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, dyn_rst_lifetime
, CTLFLAG_RW
,
637 &dyn_rst_lifetime
, 0, "Lifetime of dyn. rules for rst");
638 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, dyn_udp_lifetime
, CTLFLAG_RW
,
639 &dyn_udp_lifetime
, 0, "Lifetime of dyn. rules for UDP");
640 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, dyn_short_lifetime
, CTLFLAG_RW
,
641 &dyn_short_lifetime
, 0, "Lifetime of dyn. rules for other situations");
642 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, dyn_keepalive
, CTLFLAG_RW
,
643 &dyn_keepalive
, 0, "Enable keepalives for dyn. rules");
644 SYSCTL_PROC(_net_inet_ip_fw
, OID_AUTO
, state_scan_max
,
645 CTLTYPE_INT
| CTLFLAG_RW
, &ipfw_state_scan_max
, 0, ipfw_sysctl_scancnt
,
646 "I", "# of states to scan for each expire iteration");
647 SYSCTL_PROC(_net_inet_ip_fw
, OID_AUTO
, state_expire_max
,
648 CTLTYPE_INT
| CTLFLAG_RW
, &ipfw_state_expire_max
, 0, ipfw_sysctl_scancnt
,
649 "I", "# of states to expire for each expire iteration");
650 SYSCTL_PROC(_net_inet_ip_fw
, OID_AUTO
, keepalive_max
,
651 CTLTYPE_INT
| CTLFLAG_RW
, &ipfw_keepalive_max
, 0, ipfw_sysctl_scancnt
,
652 "I", "# of states to expire for each expire iteration");
653 SYSCTL_PROC(_net_inet_ip_fw
, OID_AUTO
, state_reap_min
,
654 CTLTYPE_INT
| CTLFLAG_RW
, &ipfw_state_reap_min
, 0, ipfw_sysctl_scancnt
,
655 "I", "# of states to reap for state shortage");
656 SYSCTL_PROC(_net_inet_ip_fw
, OID_AUTO
, track_scan_max
,
657 CTLTYPE_INT
| CTLFLAG_RW
, &ipfw_track_scan_max
, 0, ipfw_sysctl_scancnt
,
658 "I", "# of tracks to scan for each expire iteration");
659 SYSCTL_PROC(_net_inet_ip_fw
, OID_AUTO
, track_expire_max
,
660 CTLTYPE_INT
| CTLFLAG_RW
, &ipfw_track_expire_max
, 0, ipfw_sysctl_scancnt
,
661 "I", "# of tracks to expire for each expire iteration");
662 SYSCTL_PROC(_net_inet_ip_fw
, OID_AUTO
, track_reap_max
,
663 CTLTYPE_INT
| CTLFLAG_RW
, &ipfw_track_reap_max
, 0, ipfw_sysctl_scancnt
,
664 "I", "# of tracks to reap for track shortage");
666 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, state_reap
,
667 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
668 __offsetof(struct ipfw_context
, ipfw_sts_reap
), ipfw_sysctl_stat
,
669 "LU", "# of state reaps due to states shortage");
670 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, state_reapfailed
,
671 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
672 __offsetof(struct ipfw_context
, ipfw_sts_reapfailed
), ipfw_sysctl_stat
,
673 "LU", "# of state reap failure");
674 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, state_overflow
,
675 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
676 __offsetof(struct ipfw_context
, ipfw_sts_overflow
), ipfw_sysctl_stat
,
677 "LU", "# of state overflow");
678 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, state_nomem
,
679 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
680 __offsetof(struct ipfw_context
, ipfw_sts_nomem
), ipfw_sysctl_stat
,
681 "LU", "# of state allocation failure");
682 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, state_tcprecycled
,
683 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
684 __offsetof(struct ipfw_context
, ipfw_sts_tcprecycled
), ipfw_sysctl_stat
,
685 "LU", "# of state deleted due to fast TCP port recycling");
687 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, track_nomem
,
688 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
689 __offsetof(struct ipfw_context
, ipfw_tks_nomem
), ipfw_sysctl_stat
,
690 "LU", "# of track allocation failure");
691 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, track_reap
,
692 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
693 __offsetof(struct ipfw_context
, ipfw_tks_reap
), ipfw_sysctl_stat
,
694 "LU", "# of track reap due to tracks shortage");
695 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, track_reapfailed
,
696 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
697 __offsetof(struct ipfw_context
, ipfw_tks_reapfailed
), ipfw_sysctl_stat
,
698 "LU", "# of track reap failure");
699 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, track_overflow
,
700 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
701 __offsetof(struct ipfw_context
, ipfw_tks_overflow
), ipfw_sysctl_stat
,
702 "LU", "# of track overflow");
703 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, track_cntnomem
,
704 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
705 __offsetof(struct ipfw_context
, ipfw_tks_cntnomem
), ipfw_sysctl_stat
,
706 "LU", "# of track counter allocation failure");
708 static int ipfw_state_cmp(struct ipfw_state
*,
709 struct ipfw_state
*);
710 static int ipfw_trkcnt_cmp(struct ipfw_trkcnt
*,
711 struct ipfw_trkcnt
*);
712 static int ipfw_track_cmp(struct ipfw_track
*,
713 struct ipfw_track
*);
715 RB_PROTOTYPE(ipfw_state_tree
, ipfw_state
, st_rblink
, ipfw_state_cmp
);
716 RB_GENERATE(ipfw_state_tree
, ipfw_state
, st_rblink
, ipfw_state_cmp
);
718 RB_PROTOTYPE(ipfw_trkcnt_tree
, ipfw_trkcnt
, tc_rblink
, ipfw_trkcnt_cmp
);
719 RB_GENERATE(ipfw_trkcnt_tree
, ipfw_trkcnt
, tc_rblink
, ipfw_trkcnt_cmp
);
721 RB_PROTOTYPE(ipfw_track_tree
, ipfw_track
, t_rblink
, ipfw_track_cmp
);
722 RB_GENERATE(ipfw_track_tree
, ipfw_track
, t_rblink
, ipfw_track_cmp
);
724 static ip_fw_chk_t ipfw_chk
;
725 static void ipfw_track_expire_ipifunc(void *);
726 static void ipfw_state_expire_ipifunc(void *);
727 static void ipfw_keepalive(void *);
728 static int ipfw_state_expire_start(struct ipfw_context
*,
731 #define IPFW_TRKCNT_TOKGET lwkt_gettoken(&ipfw_gd.ipfw_trkcnt_token)
732 #define IPFW_TRKCNT_TOKREL lwkt_reltoken(&ipfw_gd.ipfw_trkcnt_token)
733 #define IPFW_TRKCNT_TOKINIT \
734 lwkt_token_init(&ipfw_gd.ipfw_trkcnt_token, "ipfw_trkcnt");
737 ipfw_key_build(struct ipfw_key
*key
, in_addr_t saddr
, uint16_t sport
,
738 in_addr_t daddr
, uint16_t dport
, uint8_t proto
)
745 key
->addr_u
.addrs
.addr1
= daddr
;
746 key
->addr_u
.addrs
.addr2
= saddr
;
747 key
->swap
|= IPFW_KEY_SWAP_ADDRS
;
749 key
->addr_u
.addrs
.addr1
= saddr
;
750 key
->addr_u
.addrs
.addr2
= daddr
;
754 key
->port_u
.ports
.port1
= dport
;
755 key
->port_u
.ports
.port2
= sport
;
756 key
->swap
|= IPFW_KEY_SWAP_PORTS
;
758 key
->port_u
.ports
.port1
= sport
;
759 key
->port_u
.ports
.port2
= dport
;
762 if (sport
== dport
&& (key
->swap
& IPFW_KEY_SWAP_ADDRS
))
763 key
->swap
|= IPFW_KEY_SWAP_PORTS
;
764 if (saddr
== daddr
&& (key
->swap
& IPFW_KEY_SWAP_PORTS
))
765 key
->swap
|= IPFW_KEY_SWAP_ADDRS
;
769 ipfw_key_4tuple(const struct ipfw_key
*key
, in_addr_t
*saddr
, uint16_t *sport
,
770 in_addr_t
*daddr
, uint16_t *dport
)
773 if (key
->swap
& IPFW_KEY_SWAP_ADDRS
) {
774 *saddr
= key
->addr_u
.addrs
.addr2
;
775 *daddr
= key
->addr_u
.addrs
.addr1
;
777 *saddr
= key
->addr_u
.addrs
.addr1
;
778 *daddr
= key
->addr_u
.addrs
.addr2
;
781 if (key
->swap
& IPFW_KEY_SWAP_PORTS
) {
782 *sport
= key
->port_u
.ports
.port2
;
783 *dport
= key
->port_u
.ports
.port1
;
785 *sport
= key
->port_u
.ports
.port1
;
786 *dport
= key
->port_u
.ports
.port2
;
791 ipfw_state_cmp(struct ipfw_state
*s1
, struct ipfw_state
*s2
)
794 if (s1
->st_proto
> s2
->st_proto
)
796 if (s1
->st_proto
< s2
->st_proto
)
799 if (s1
->st_addrs
> s2
->st_addrs
)
801 if (s1
->st_addrs
< s2
->st_addrs
)
804 if (s1
->st_ports
> s2
->st_ports
)
806 if (s1
->st_ports
< s2
->st_ports
)
809 if (s1
->st_swap
== s2
->st_swap
||
810 (s1
->st_swap
^ s2
->st_swap
) == IPFW_KEY_SWAP_ALL
)
813 if (s1
->st_swap
> s2
->st_swap
)
820 ipfw_trkcnt_cmp(struct ipfw_trkcnt
*t1
, struct ipfw_trkcnt
*t2
)
823 if (t1
->tc_proto
> t2
->tc_proto
)
825 if (t1
->tc_proto
< t2
->tc_proto
)
828 if (t1
->tc_addrs
> t2
->tc_addrs
)
830 if (t1
->tc_addrs
< t2
->tc_addrs
)
833 if (t1
->tc_ports
> t2
->tc_ports
)
835 if (t1
->tc_ports
< t2
->tc_ports
)
838 if (t1
->tc_ruleid
> t2
->tc_ruleid
)
840 if (t1
->tc_ruleid
< t2
->tc_ruleid
)
847 ipfw_track_cmp(struct ipfw_track
*t1
, struct ipfw_track
*t2
)
850 if (t1
->t_proto
> t2
->t_proto
)
852 if (t1
->t_proto
< t2
->t_proto
)
855 if (t1
->t_addrs
> t2
->t_addrs
)
857 if (t1
->t_addrs
< t2
->t_addrs
)
860 if (t1
->t_ports
> t2
->t_ports
)
862 if (t1
->t_ports
< t2
->t_ports
)
865 if ((uintptr_t)t1
->t_rule
> (uintptr_t)t2
->t_rule
)
867 if ((uintptr_t)t1
->t_rule
< (uintptr_t)t2
->t_rule
)
874 ipfw_state_max_set(int state_max
)
877 ipfw_state_max
= state_max
;
878 /* Allow 5% states over-allocation. */
879 ipfw_state_loosecnt_updthr
= (state_max
/ 20) / netisr_ncpus
;
883 ipfw_state_cntcoll(void)
885 int cpu
, state_cnt
= 0;
887 for (cpu
= 0; cpu
< netisr_ncpus
; ++cpu
)
888 state_cnt
+= ipfw_ctx
[cpu
]->ipfw_state_cnt
;
893 ipfw_state_cntsync(void)
897 state_cnt
= ipfw_state_cntcoll();
898 ipfw_gd
.ipfw_state_loosecnt
= state_cnt
;
903 ipfw_free_rule(struct ip_fw
*rule
)
905 KASSERT(rule
->cpuid
== mycpuid
, ("rule freed on cpu%d", mycpuid
));
906 KASSERT(rule
->refcnt
> 0, ("invalid refcnt %u", rule
->refcnt
));
908 if (rule
->refcnt
== 0) {
916 ipfw_unref_rule(void *priv
)
918 ipfw_free_rule(priv
);
920 atomic_subtract_int(&ipfw_gd
.ipfw_refcnt
, 1);
925 ipfw_ref_rule(struct ip_fw
*rule
)
927 KASSERT(rule
->cpuid
== mycpuid
, ("rule used on cpu%d", mycpuid
));
929 atomic_add_int(&ipfw_gd
.ipfw_refcnt
, 1);
935 * This macro maps an ip pointer into a layer3 header pointer of type T
937 #define L3HDR(T, ip) ((T *)((uint32_t *)(ip) + (ip)->ip_hl))
940 icmptype_match(struct ip
*ip
, ipfw_insn_u32
*cmd
)
942 int type
= L3HDR(struct icmp
,ip
)->icmp_type
;
944 return (type
<= ICMP_MAXTYPE
&& (cmd
->d
[0] & (1 << type
)));
947 #define TT ((1 << ICMP_ECHO) | \
948 (1 << ICMP_ROUTERSOLICIT) | \
949 (1 << ICMP_TSTAMP) | \
954 is_icmp_query(struct ip
*ip
)
956 int type
= L3HDR(struct icmp
, ip
)->icmp_type
;
958 return (type
<= ICMP_MAXTYPE
&& (TT
& (1 << type
)));
964 * The following checks use two arrays of 8 or 16 bits to store the
965 * bits that we want set or clear, respectively. They are in the
966 * low and high half of cmd->arg1 or cmd->d[0].
968 * We scan options and store the bits we find set. We succeed if
970 * (want_set & ~bits) == 0 && (want_clear & ~bits) == want_clear
972 * The code is sometimes optimized not to store additional variables.
975 flags_match(ipfw_insn
*cmd
, uint8_t bits
)
980 if (((cmd
->arg1
& 0xff) & bits
) != 0)
981 return 0; /* some bits we want set were clear */
983 want_clear
= (cmd
->arg1
>> 8) & 0xff;
984 if ((want_clear
& bits
) != want_clear
)
985 return 0; /* some bits we want clear were set */
990 ipopts_match(struct ip
*ip
, ipfw_insn
*cmd
)
992 int optlen
, bits
= 0;
993 u_char
*cp
= (u_char
*)(ip
+ 1);
994 int x
= (ip
->ip_hl
<< 2) - sizeof(struct ip
);
996 for (; x
> 0; x
-= optlen
, cp
+= optlen
) {
997 int opt
= cp
[IPOPT_OPTVAL
];
999 if (opt
== IPOPT_EOL
)
1002 if (opt
== IPOPT_NOP
) {
1005 optlen
= cp
[IPOPT_OLEN
];
1006 if (optlen
<= 0 || optlen
> x
)
1007 return 0; /* invalid or truncated */
1012 bits
|= IP_FW_IPOPT_LSRR
;
1016 bits
|= IP_FW_IPOPT_SSRR
;
1020 bits
|= IP_FW_IPOPT_RR
;
1024 bits
|= IP_FW_IPOPT_TS
;
1031 return (flags_match(cmd
, bits
));
1035 tcpopts_match(struct ip
*ip
, ipfw_insn
*cmd
)
1037 int optlen
, bits
= 0;
1038 struct tcphdr
*tcp
= L3HDR(struct tcphdr
,ip
);
1039 u_char
*cp
= (u_char
*)(tcp
+ 1);
1040 int x
= (tcp
->th_off
<< 2) - sizeof(struct tcphdr
);
1042 for (; x
> 0; x
-= optlen
, cp
+= optlen
) {
1045 if (opt
== TCPOPT_EOL
)
1048 if (opt
== TCPOPT_NOP
) {
1058 bits
|= IP_FW_TCPOPT_MSS
;
1062 bits
|= IP_FW_TCPOPT_WINDOW
;
1065 case TCPOPT_SACK_PERMITTED
:
1067 bits
|= IP_FW_TCPOPT_SACK
;
1070 case TCPOPT_TIMESTAMP
:
1071 bits
|= IP_FW_TCPOPT_TS
;
1077 bits
|= IP_FW_TCPOPT_CC
;
1084 return (flags_match(cmd
, bits
));
1088 iface_match(struct ifnet
*ifp
, ipfw_insn_if
*cmd
)
1090 if (ifp
== NULL
) /* no iface with this packet, match fails */
1093 /* Check by name or by IP address */
1094 if (cmd
->name
[0] != '\0') { /* match by name */
1097 if (kfnmatch(cmd
->name
, ifp
->if_xname
, 0) == 0)
1100 if (strncmp(ifp
->if_xname
, cmd
->name
, IFNAMSIZ
) == 0)
1104 struct ifaddr_container
*ifac
;
1106 TAILQ_FOREACH(ifac
, &ifp
->if_addrheads
[mycpuid
], ifa_link
) {
1107 struct ifaddr
*ia
= ifac
->ifa
;
1109 if (ia
->ifa_addr
== NULL
)
1111 if (ia
->ifa_addr
->sa_family
!= AF_INET
)
1113 if (cmd
->p
.ip
.s_addr
== ((struct sockaddr_in
*)
1114 (ia
->ifa_addr
))->sin_addr
.s_addr
)
1115 return(1); /* match */
1118 return(0); /* no match, fail ... */
1121 #define SNPARGS(buf, len) buf + len, sizeof(buf) > len ? sizeof(buf) - len : 0
1124 * We enter here when we have a rule with O_LOG.
1125 * XXX this function alone takes about 2Kbytes of code!
1128 ipfw_log(struct ipfw_context
*ctx
, struct ip_fw
*f
, u_int hlen
,
1129 struct ether_header
*eh
, struct mbuf
*m
, struct ifnet
*oif
)
1132 int limit_reached
= 0;
1133 char action2
[40], proto
[48], fragment
[28], abuf
[INET_ADDRSTRLEN
];
1138 if (f
== NULL
) { /* bogus pkt */
1139 if (verbose_limit
!= 0 &&
1140 ctx
->ipfw_norule_counter
>= verbose_limit
)
1142 ctx
->ipfw_norule_counter
++;
1143 if (ctx
->ipfw_norule_counter
== verbose_limit
)
1144 limit_reached
= verbose_limit
;
1146 } else { /* O_LOG is the first action, find the real one */
1147 ipfw_insn
*cmd
= ACTION_PTR(f
);
1148 ipfw_insn_log
*l
= (ipfw_insn_log
*)cmd
;
1150 if (l
->max_log
!= 0 && l
->log_left
== 0)
1153 if (l
->log_left
== 0)
1154 limit_reached
= l
->max_log
;
1155 cmd
+= F_LEN(cmd
); /* point to first action */
1156 if (cmd
->opcode
== O_PROB
)
1160 switch (cmd
->opcode
) {
1166 if (cmd
->arg1
==ICMP_REJECT_RST
) {
1168 } else if (cmd
->arg1
==ICMP_UNREACH_HOST
) {
1171 ksnprintf(SNPARGS(action2
, 0), "Unreach %d",
1185 ksnprintf(SNPARGS(action2
, 0), "Divert %d", cmd
->arg1
);
1189 ksnprintf(SNPARGS(action2
, 0), "Tee %d", cmd
->arg1
);
1193 ksnprintf(SNPARGS(action2
, 0), "SkipTo %d", cmd
->arg1
);
1197 ksnprintf(SNPARGS(action2
, 0), "Pipe %d", cmd
->arg1
);
1201 ksnprintf(SNPARGS(action2
, 0), "Queue %d", cmd
->arg1
);
1206 ipfw_insn_sa
*sa
= (ipfw_insn_sa
*)cmd
;
1209 len
= ksnprintf(SNPARGS(action2
, 0),
1211 kinet_ntoa(sa
->sa
.sin_addr
, abuf
));
1212 if (sa
->sa
.sin_port
) {
1213 ksnprintf(SNPARGS(action2
, len
), ":%d",
1225 if (hlen
== 0) { /* non-ip */
1226 ksnprintf(SNPARGS(proto
, 0), "MAC");
1228 struct ip
*ip
= mtod(m
, struct ip
*);
1229 /* these three are all aliases to the same thing */
1230 struct icmp
*const icmp
= L3HDR(struct icmp
, ip
);
1231 struct tcphdr
*const tcp
= (struct tcphdr
*)icmp
;
1232 struct udphdr
*const udp
= (struct udphdr
*)icmp
;
1234 int ip_off
, offset
, ip_len
;
1237 if (eh
!= NULL
) { /* layer 2 packets are as on the wire */
1238 ip_off
= ntohs(ip
->ip_off
);
1239 ip_len
= ntohs(ip
->ip_len
);
1241 ip_off
= ip
->ip_off
;
1242 ip_len
= ip
->ip_len
;
1244 offset
= ip_off
& IP_OFFMASK
;
1247 len
= ksnprintf(SNPARGS(proto
, 0), "TCP %s",
1248 kinet_ntoa(ip
->ip_src
, abuf
));
1250 ksnprintf(SNPARGS(proto
, len
), ":%d %s:%d",
1251 ntohs(tcp
->th_sport
),
1252 kinet_ntoa(ip
->ip_dst
, abuf
),
1253 ntohs(tcp
->th_dport
));
1255 ksnprintf(SNPARGS(proto
, len
), " %s",
1256 kinet_ntoa(ip
->ip_dst
, abuf
));
1261 len
= ksnprintf(SNPARGS(proto
, 0), "UDP %s",
1262 kinet_ntoa(ip
->ip_src
, abuf
));
1264 ksnprintf(SNPARGS(proto
, len
), ":%d %s:%d",
1265 ntohs(udp
->uh_sport
),
1266 kinet_ntoa(ip
->ip_dst
, abuf
),
1267 ntohs(udp
->uh_dport
));
1269 ksnprintf(SNPARGS(proto
, len
), " %s",
1270 kinet_ntoa(ip
->ip_dst
, abuf
));
1276 len
= ksnprintf(SNPARGS(proto
, 0),
1281 len
= ksnprintf(SNPARGS(proto
, 0), "ICMP ");
1283 len
+= ksnprintf(SNPARGS(proto
, len
), "%s",
1284 kinet_ntoa(ip
->ip_src
, abuf
));
1285 ksnprintf(SNPARGS(proto
, len
), " %s",
1286 kinet_ntoa(ip
->ip_dst
, abuf
));
1290 len
= ksnprintf(SNPARGS(proto
, 0), "P:%d %s", ip
->ip_p
,
1291 kinet_ntoa(ip
->ip_src
, abuf
));
1292 ksnprintf(SNPARGS(proto
, len
), " %s",
1293 kinet_ntoa(ip
->ip_dst
, abuf
));
1297 if (ip_off
& (IP_MF
| IP_OFFMASK
)) {
1298 ksnprintf(SNPARGS(fragment
, 0), " (frag %d:%d@%d%s)",
1299 ntohs(ip
->ip_id
), ip_len
- (ip
->ip_hl
<< 2),
1300 offset
<< 3, (ip_off
& IP_MF
) ? "+" : "");
1304 if (oif
|| m
->m_pkthdr
.rcvif
) {
1305 log(LOG_SECURITY
| LOG_INFO
,
1306 "ipfw: %d %s %s %s via %s%s\n",
1307 f
? f
->rulenum
: -1,
1308 action
, proto
, oif
? "out" : "in",
1309 oif
? oif
->if_xname
: m
->m_pkthdr
.rcvif
->if_xname
,
1312 log(LOG_SECURITY
| LOG_INFO
,
1313 "ipfw: %d %s %s [no if info]%s\n",
1314 f
? f
->rulenum
: -1,
1315 action
, proto
, fragment
);
1318 if (limit_reached
) {
1319 log(LOG_SECURITY
| LOG_NOTICE
,
1320 "ipfw: limit %d reached on entry %d\n",
1321 limit_reached
, f
? f
->rulenum
: -1);
1327 #define TIME_LEQ(a, b) ((a) - (b) <= 0)
1330 ipfw_state_del(struct ipfw_context
*ctx
, struct ipfw_state
*s
)
1333 KASSERT(s
->st_type
== O_KEEP_STATE
|| s
->st_type
== O_LIMIT
,
1334 ("invalid state type %u", s
->st_type
));
1335 KASSERT(ctx
->ipfw_state_cnt
> 0,
1336 ("invalid state count %d", ctx
->ipfw_state_cnt
));
1338 if (s
->st_track
!= NULL
) {
1339 struct ipfw_track
*t
= s
->st_track
;
1341 KASSERT(!LIST_EMPTY(&t
->t_state_list
),
1342 ("track state list is empty"));
1343 LIST_REMOVE(s
, st_trklink
);
1345 KASSERT(*t
->t_count
> 0,
1346 ("invalid track count %d", *t
->t_count
));
1347 atomic_subtract_int(t
->t_count
, 1);
1350 TAILQ_REMOVE(&ctx
->ipfw_state_list
, s
, st_link
);
1351 RB_REMOVE(ipfw_state_tree
, &ctx
->ipfw_state_tree
, s
);
1354 ctx
->ipfw_state_cnt
--;
1355 if (ctx
->ipfw_state_loosecnt
> 0)
1356 ctx
->ipfw_state_loosecnt
--;
1360 ipfw_state_reap(struct ipfw_context
*ctx
, int reap_max
)
1362 struct ipfw_state
*s
, *anchor
;
1365 if (reap_max
< ipfw_state_reap_min
)
1366 reap_max
= ipfw_state_reap_min
;
1368 if ((ctx
->ipfw_flags
& IPFW_FLAG_STATEEXP
) == 0) {
1370 * Kick start state expiring. Ignore scan limit,
1371 * we are short of states.
1373 ctx
->ipfw_flags
|= IPFW_FLAG_STATEREAP
;
1374 expired
= ipfw_state_expire_start(ctx
, INT_MAX
, reap_max
);
1375 ctx
->ipfw_flags
&= ~IPFW_FLAG_STATEREAP
;
1380 * States are being expired.
1383 if (ctx
->ipfw_state_cnt
== 0)
1387 anchor
= &ctx
->ipfw_stateexp_anch
;
1388 while ((s
= TAILQ_NEXT(anchor
, st_link
)) != NULL
) {
1390 * Ignore scan limit; we are short of states.
1393 TAILQ_REMOVE(&ctx
->ipfw_state_list
, anchor
, st_link
);
1394 TAILQ_INSERT_AFTER(&ctx
->ipfw_state_list
, s
, anchor
, st_link
);
1396 if (s
->st_type
== O_ANCHOR
)
1399 if (IPFW_STATE_TCPCLOSED(s
) ||
1400 TIME_LEQ(s
->st_expire
, time_uptime
)) {
1401 ipfw_state_del(ctx
, s
);
1402 if (++expired
>= reap_max
)
1404 if ((expired
& 0xff) == 0 &&
1405 ipfw_state_cntcoll() + ipfw_state_headroom
<=
1412 * Leave the anchor on the list, even if the end of the list has
1413 * been reached. ipfw_state_expire_more_dispatch() will handle
1420 ipfw_state_flush(struct ipfw_context
*ctx
, const struct ip_fw
*rule
)
1422 struct ipfw_state
*s
, *sn
;
1424 TAILQ_FOREACH_MUTABLE(s
, &ctx
->ipfw_state_list
, st_link
, sn
) {
1425 if (s
->st_type
== O_ANCHOR
)
1427 if (rule
!= NULL
&& s
->st_rule
!= rule
)
1429 ipfw_state_del(ctx
, s
);
1434 ipfw_state_expire_done(struct ipfw_context
*ctx
)
1437 KASSERT(ctx
->ipfw_flags
& IPFW_FLAG_STATEEXP
,
1438 ("stateexp is not in progress"));
1439 ctx
->ipfw_flags
&= ~IPFW_FLAG_STATEEXP
;
1440 callout_reset(&ctx
->ipfw_stateto_ch
, hz
,
1441 ipfw_state_expire_ipifunc
, NULL
);
1445 ipfw_state_expire_more(struct ipfw_context
*ctx
)
1447 struct netmsg_base
*nm
= &ctx
->ipfw_stateexp_more
;
1449 KASSERT(ctx
->ipfw_flags
& IPFW_FLAG_STATEEXP
,
1450 ("stateexp is not in progress"));
1451 KASSERT(nm
->lmsg
.ms_flags
& MSGF_DONE
,
1452 ("stateexp more did not finish"));
1453 netisr_sendmsg_oncpu(nm
);
1457 ipfw_state_expire_loop(struct ipfw_context
*ctx
, struct ipfw_state
*anchor
,
1458 int scan_max
, int expire_max
)
1460 struct ipfw_state
*s
;
1461 int scanned
= 0, expired
= 0;
1463 KASSERT(ctx
->ipfw_flags
& IPFW_FLAG_STATEEXP
,
1464 ("stateexp is not in progress"));
1466 while ((s
= TAILQ_NEXT(anchor
, st_link
)) != NULL
) {
1467 if (scanned
++ >= scan_max
) {
1468 ipfw_state_expire_more(ctx
);
1472 TAILQ_REMOVE(&ctx
->ipfw_state_list
, anchor
, st_link
);
1473 TAILQ_INSERT_AFTER(&ctx
->ipfw_state_list
, s
, anchor
, st_link
);
1475 if (s
->st_type
== O_ANCHOR
)
1478 if (TIME_LEQ(s
->st_expire
, time_uptime
) ||
1479 ((ctx
->ipfw_flags
& IPFW_FLAG_STATEREAP
) &&
1480 IPFW_STATE_TCPCLOSED(s
))) {
1481 ipfw_state_del(ctx
, s
);
1482 if (++expired
>= expire_max
) {
1483 ipfw_state_expire_more(ctx
);
1486 if ((ctx
->ipfw_flags
& IPFW_FLAG_STATEREAP
) &&
1487 (expired
& 0xff) == 0 &&
1488 ipfw_state_cntcoll() + ipfw_state_headroom
<=
1490 ipfw_state_expire_more(ctx
);
1495 TAILQ_REMOVE(&ctx
->ipfw_state_list
, anchor
, st_link
);
1496 ipfw_state_expire_done(ctx
);
1501 ipfw_state_expire_more_dispatch(netmsg_t nm
)
1503 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
1504 struct ipfw_state
*anchor
;
1506 ASSERT_NETISR_NCPUS(mycpuid
);
1507 KASSERT(ctx
->ipfw_flags
& IPFW_FLAG_STATEEXP
,
1508 ("statexp is not in progress"));
1511 netisr_replymsg(&nm
->base
, 0);
1513 anchor
= &ctx
->ipfw_stateexp_anch
;
1514 if (ctx
->ipfw_state_cnt
== 0) {
1515 TAILQ_REMOVE(&ctx
->ipfw_state_list
, anchor
, st_link
);
1516 ipfw_state_expire_done(ctx
);
1519 ipfw_state_expire_loop(ctx
, anchor
,
1520 ipfw_state_scan_max
, ipfw_state_expire_max
);
1524 ipfw_state_expire_start(struct ipfw_context
*ctx
, int scan_max
, int expire_max
)
1526 struct ipfw_state
*anchor
;
1528 KASSERT((ctx
->ipfw_flags
& IPFW_FLAG_STATEEXP
) == 0,
1529 ("stateexp is in progress"));
1530 ctx
->ipfw_flags
|= IPFW_FLAG_STATEEXP
;
1532 if (ctx
->ipfw_state_cnt
== 0) {
1533 ipfw_state_expire_done(ctx
);
1538 * Do not expire more than once per second, it is useless.
1540 if ((ctx
->ipfw_flags
& IPFW_FLAG_STATEREAP
) == 0 &&
1541 ctx
->ipfw_state_lastexp
== time_uptime
) {
1542 ipfw_state_expire_done(ctx
);
1545 ctx
->ipfw_state_lastexp
= time_uptime
;
1547 anchor
= &ctx
->ipfw_stateexp_anch
;
1548 TAILQ_INSERT_HEAD(&ctx
->ipfw_state_list
, anchor
, st_link
);
1549 return (ipfw_state_expire_loop(ctx
, anchor
, scan_max
, expire_max
));
1553 ipfw_state_expire_dispatch(netmsg_t nm
)
1555 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
1557 ASSERT_NETISR_NCPUS(mycpuid
);
1561 netisr_replymsg(&nm
->base
, 0);
1564 if (ctx
->ipfw_flags
& IPFW_FLAG_STATEEXP
) {
1565 /* Running; done. */
1568 ipfw_state_expire_start(ctx
,
1569 ipfw_state_scan_max
, ipfw_state_expire_max
);
1573 ipfw_state_expire_ipifunc(void *dummy __unused
)
1575 struct netmsg_base
*msg
;
1577 KKASSERT(mycpuid
< netisr_ncpus
);
1578 msg
= &ipfw_ctx
[mycpuid
]->ipfw_stateexp_nm
;
1581 if (msg
->lmsg
.ms_flags
& MSGF_DONE
)
1582 netisr_sendmsg_oncpu(msg
);
1587 ipfw_state_update_tcp(struct ipfw_state
*s
, int dir
, const struct tcphdr
*tcp
)
1589 uint32_t seq
= ntohl(tcp
->th_seq
);
1590 uint32_t ack
= ntohl(tcp
->th_ack
);
1592 if (tcp
->th_flags
& TH_RST
)
1595 if (dir
== MATCH_FORWARD
) {
1596 if ((s
->st_flags
& IPFW_STATE_F_SEQFWD
) == 0) {
1597 s
->st_flags
|= IPFW_STATE_F_SEQFWD
;
1598 s
->st_seq_fwd
= seq
;
1599 } else if (SEQ_GEQ(seq
, s
->st_seq_fwd
)) {
1600 s
->st_seq_fwd
= seq
;
1602 /* Out-of-sequence; done. */
1605 if (tcp
->th_flags
& TH_ACK
) {
1606 if ((s
->st_flags
& IPFW_STATE_F_ACKFWD
) == 0) {
1607 s
->st_flags
|= IPFW_STATE_F_ACKFWD
;
1608 s
->st_ack_fwd
= ack
;
1609 } else if (SEQ_GEQ(ack
, s
->st_ack_fwd
)) {
1610 s
->st_ack_fwd
= ack
;
1612 /* Out-of-sequence; done. */
1616 if ((s
->st_state
& ((TH_FIN
| TH_ACK
) << 8)) ==
1617 (TH_FIN
<< 8) && s
->st_ack_fwd
== s
->st_seq_rev
+ 1)
1618 s
->st_state
|= (TH_ACK
<< 8);
1621 if ((s
->st_flags
& IPFW_STATE_F_SEQREV
) == 0) {
1622 s
->st_flags
|= IPFW_STATE_F_SEQREV
;
1623 s
->st_seq_rev
= seq
;
1624 } else if (SEQ_GEQ(seq
, s
->st_seq_rev
)) {
1625 s
->st_seq_rev
= seq
;
1627 /* Out-of-sequence; done. */
1630 if (tcp
->th_flags
& TH_ACK
) {
1631 if ((s
->st_flags
& IPFW_STATE_F_ACKREV
) == 0) {
1632 s
->st_flags
|= IPFW_STATE_F_ACKREV
;
1634 } else if (SEQ_GEQ(ack
, s
->st_ack_rev
)) {
1635 s
->st_ack_rev
= ack
;
1637 /* Out-of-sequence; done. */
1641 if ((s
->st_state
& (TH_FIN
| TH_ACK
)) == TH_FIN
&&
1642 s
->st_ack_rev
== s
->st_seq_fwd
+ 1)
1643 s
->st_state
|= TH_ACK
;
1650 ipfw_state_update(const struct ipfw_flow_id
*pkt
, int dir
,
1651 const struct tcphdr
*tcp
, struct ipfw_state
*s
)
1654 if (pkt
->proto
== IPPROTO_TCP
) { /* update state according to flags */
1655 u_char flags
= pkt
->flags
& IPFW_STATE_TCPFLAGS
;
1657 if (tcp
!= NULL
&& !ipfw_state_update_tcp(s
, dir
, tcp
))
1660 s
->st_state
|= (dir
== MATCH_FORWARD
) ? flags
: (flags
<< 8);
1661 switch (s
->st_state
& IPFW_STATE_TCPSTATES
) {
1662 case TH_SYN
: /* opening */
1663 s
->st_expire
= time_uptime
+ dyn_syn_lifetime
;
1666 case BOTH_SYN
: /* move to established */
1667 case BOTH_SYN
| TH_FIN
: /* one side tries to close */
1668 case BOTH_SYN
| (TH_FIN
<< 8):
1669 s
->st_expire
= time_uptime
+ dyn_ack_lifetime
;
1672 case BOTH_SYN
| BOTH_FIN
: /* both sides closed */
1673 if ((s
->st_state
& BOTH_FINACK
) == BOTH_FINACK
) {
1674 /* And both FINs were ACKed. */
1675 s
->st_expire
= time_uptime
+ dyn_fin_lifetime
;
1677 s
->st_expire
= time_uptime
+
1678 dyn_finwait_lifetime
;
1685 * reset or some invalid combination, but can also
1686 * occur if we use keep-state the wrong way.
1688 if ((s
->st_state
& ((TH_RST
<< 8) | TH_RST
)) == 0)
1689 kprintf("invalid state: 0x%x\n", s
->st_state
);
1691 s
->st_expire
= time_uptime
+ dyn_rst_lifetime
;
1694 } else if (pkt
->proto
== IPPROTO_UDP
) {
1695 s
->st_expire
= time_uptime
+ dyn_udp_lifetime
;
1697 /* other protocols */
1698 s
->st_expire
= time_uptime
+ dyn_short_lifetime
;
1705 static struct ipfw_state
*
1706 ipfw_state_lookup(struct ipfw_context
*ctx
, const struct ipfw_flow_id
*pkt
,
1707 int *match_direction
, const struct tcphdr
*tcp
)
1709 struct ipfw_state
*key
, *s
;
1710 int dir
= MATCH_NONE
;
1712 key
= &ctx
->ipfw_state_tmpkey
;
1713 ipfw_key_build(&key
->st_key
, pkt
->src_ip
, pkt
->src_port
,
1714 pkt
->dst_ip
, pkt
->dst_port
, pkt
->proto
);
1715 s
= RB_FIND(ipfw_state_tree
, &ctx
->ipfw_state_tree
, key
);
1717 goto done
; /* not found. */
1718 if (TIME_LEQ(s
->st_expire
, time_uptime
)) {
1720 ipfw_state_del(ctx
, s
);
1724 if ((pkt
->flags
& TH_SYN
) && IPFW_STATE_TCPCLOSED(s
)) {
1725 /* TCP ports recycling is too fast. */
1726 ctx
->ipfw_sts_tcprecycled
++;
1727 ipfw_state_del(ctx
, s
);
1732 if (s
->st_swap
== key
->st_swap
) {
1733 dir
= MATCH_FORWARD
;
1735 KASSERT((s
->st_swap
& key
->st_swap
) == 0,
1736 ("found mismatch state"));
1737 dir
= MATCH_REVERSE
;
1740 /* Update this state. */
1741 ipfw_state_update(pkt
, dir
, tcp
, s
);
1743 if (s
->st_track
!= NULL
) {
1744 /* This track has been used. */
1745 s
->st_track
->t_expire
= time_uptime
+ dyn_short_lifetime
;
1748 if (match_direction
)
1749 *match_direction
= dir
;
1753 static __inline
struct ip_fw
*
1754 ipfw_state_lookup_rule(struct ipfw_context
*ctx
, const struct ipfw_flow_id
*pkt
,
1755 int *match_direction
, const struct tcphdr
*tcp
, uint16_t len
)
1757 struct ipfw_state
*s
;
1759 s
= ipfw_state_lookup(ctx
, pkt
, match_direction
, tcp
);
1763 KASSERT(s
->st_rule
->cpuid
== mycpuid
,
1764 ("rule %p (cpu%d) does not belong to the current cpu%d",
1765 s
->st_rule
, s
->st_rule
->cpuid
, mycpuid
));
1770 return (s
->st_rule
);
1773 static struct ipfw_state
*
1774 ipfw_state_add(struct ipfw_context
*ctx
, const struct ipfw_flow_id
*id
,
1775 uint16_t type
, struct ip_fw
*rule
, struct ipfw_track
*t
,
1776 const struct tcphdr
*tcp
)
1778 struct ipfw_state
*s
, *dup
;
1780 KASSERT(type
== O_KEEP_STATE
|| type
== O_LIMIT
,
1781 ("invalid state type %u", type
));
1783 s
= kmalloc(sizeof(*s
), M_IPFW
, M_INTWAIT
| M_NULLOK
| M_ZERO
);
1785 ctx
->ipfw_sts_nomem
++;
1789 ipfw_key_build(&s
->st_key
, id
->src_ip
, id
->src_port
,
1790 id
->dst_ip
, id
->dst_port
, id
->proto
);
1795 ctx
->ipfw_state_cnt
++;
1796 ctx
->ipfw_state_loosecnt
++;
1797 if (ctx
->ipfw_state_loosecnt
>= ipfw_state_loosecnt_updthr
) {
1798 ipfw_gd
.ipfw_state_loosecnt
+= ctx
->ipfw_state_loosecnt
;
1799 ctx
->ipfw_state_loosecnt
= 0;
1802 dup
= RB_INSERT(ipfw_state_tree
, &ctx
->ipfw_state_tree
, s
);
1804 panic("ipfw: state exists");
1805 TAILQ_INSERT_TAIL(&ctx
->ipfw_state_list
, s
, st_link
);
1808 * Update this state:
1809 * Set st_expire and st_state.
1811 ipfw_state_update(id
, MATCH_FORWARD
, tcp
, s
);
1814 /* Keep the track referenced. */
1815 LIST_INSERT_HEAD(&t
->t_state_list
, s
, st_trklink
);
1822 ipfw_track_free(struct ipfw_context
*ctx
, struct ipfw_track
*t
)
1824 struct ipfw_trkcnt
*trk
;
1825 boolean_t trk_freed
= FALSE
;
1827 KASSERT(t
->t_count
!= NULL
, ("track anchor"));
1828 KASSERT(LIST_EMPTY(&t
->t_state_list
),
1829 ("invalid track is still referenced"));
1832 KASSERT(trk
!= NULL
, ("track has no trkcnt"));
1834 RB_REMOVE(ipfw_track_tree
, &ctx
->ipfw_track_tree
, t
);
1835 TAILQ_REMOVE(&ctx
->ipfw_track_list
, t
, t_link
);
1839 * fdrop() style reference counting.
1840 * See kern/kern_descrip.c fdrop().
1843 int refs
= trk
->tc_refs
;
1846 KASSERT(refs
> 0, ("invalid trkcnt refs %d", refs
));
1849 if (atomic_cmpset_int(&trk
->tc_refs
, refs
, 0)) {
1850 KASSERT(trk
->tc_count
== 0,
1851 ("%d states reference this trkcnt",
1853 RB_REMOVE(ipfw_trkcnt_tree
,
1854 &ipfw_gd
.ipfw_trkcnt_tree
, trk
);
1856 KASSERT(ipfw_gd
.ipfw_trkcnt_cnt
> 0,
1857 ("invalid trkcnt cnt %d",
1858 ipfw_gd
.ipfw_trkcnt_cnt
));
1859 ipfw_gd
.ipfw_trkcnt_cnt
--;
1862 if (ctx
->ipfw_trkcnt_spare
== NULL
)
1863 ctx
->ipfw_trkcnt_spare
= trk
;
1871 } else if (atomic_cmpset_int(&trk
->tc_refs
, refs
, refs
- 1)) {
1880 ipfw_track_flush(struct ipfw_context
*ctx
, struct ip_fw
*rule
)
1882 struct ipfw_track
*t
, *tn
;
1884 TAILQ_FOREACH_MUTABLE(t
, &ctx
->ipfw_track_list
, t_link
, tn
) {
1885 if (t
->t_count
== NULL
) /* anchor */
1887 if (rule
!= NULL
&& t
->t_rule
!= rule
)
1889 ipfw_track_free(ctx
, t
);
1894 ipfw_track_state_expire(struct ipfw_context
*ctx
, struct ipfw_track
*t
,
1897 struct ipfw_state
*s
, *sn
;
1898 boolean_t ret
= FALSE
;
1900 KASSERT(t
->t_count
!= NULL
, ("track anchor"));
1902 if (LIST_EMPTY(&t
->t_state_list
))
1906 * Do not expire more than once per second, it is useless.
1908 if (t
->t_lastexp
== time_uptime
)
1910 t
->t_lastexp
= time_uptime
;
1912 LIST_FOREACH_MUTABLE(s
, &t
->t_state_list
, st_trklink
, sn
) {
1913 if (TIME_LEQ(s
->st_expire
, time_uptime
) ||
1914 (reap
&& IPFW_STATE_TCPCLOSED(s
))) {
1915 KASSERT(s
->st_track
== t
,
1916 ("state track %p does not match %p",
1918 ipfw_state_del(ctx
, s
);
1925 static __inline
struct ipfw_trkcnt
*
1926 ipfw_trkcnt_alloc(struct ipfw_context
*ctx
)
1928 struct ipfw_trkcnt
*trk
;
1930 if (ctx
->ipfw_trkcnt_spare
!= NULL
) {
1931 trk
= ctx
->ipfw_trkcnt_spare
;
1932 ctx
->ipfw_trkcnt_spare
= NULL
;
1934 trk
= kmalloc_cachealign(sizeof(*trk
), M_IPFW
,
1935 M_INTWAIT
| M_NULLOK
);
1941 ipfw_track_expire_done(struct ipfw_context
*ctx
)
1944 KASSERT(ctx
->ipfw_flags
& IPFW_FLAG_TRACKEXP
,
1945 ("trackexp is not in progress"));
1946 ctx
->ipfw_flags
&= ~IPFW_FLAG_TRACKEXP
;
1947 callout_reset(&ctx
->ipfw_trackto_ch
, hz
,
1948 ipfw_track_expire_ipifunc
, NULL
);
1952 ipfw_track_expire_more(struct ipfw_context
*ctx
)
1954 struct netmsg_base
*nm
= &ctx
->ipfw_trackexp_more
;
1956 KASSERT(ctx
->ipfw_flags
& IPFW_FLAG_TRACKEXP
,
1957 ("trackexp is not in progress"));
1958 KASSERT(nm
->lmsg
.ms_flags
& MSGF_DONE
,
1959 ("trackexp more did not finish"));
1960 netisr_sendmsg_oncpu(nm
);
1964 ipfw_track_expire_loop(struct ipfw_context
*ctx
, struct ipfw_track
*anchor
,
1965 int scan_max
, int expire_max
)
1967 struct ipfw_track
*t
;
1968 int scanned
= 0, expired
= 0;
1969 boolean_t reap
= FALSE
;
1971 KASSERT(ctx
->ipfw_flags
& IPFW_FLAG_TRACKEXP
,
1972 ("trackexp is not in progress"));
1974 if (ctx
->ipfw_flags
& IPFW_FLAG_TRACKREAP
)
1977 while ((t
= TAILQ_NEXT(anchor
, t_link
)) != NULL
) {
1978 if (scanned
++ >= scan_max
) {
1979 ipfw_track_expire_more(ctx
);
1983 TAILQ_REMOVE(&ctx
->ipfw_track_list
, anchor
, t_link
);
1984 TAILQ_INSERT_AFTER(&ctx
->ipfw_track_list
, t
, anchor
, t_link
);
1986 if (t
->t_count
== NULL
) /* anchor */
1989 ipfw_track_state_expire(ctx
, t
, reap
);
1990 if (!LIST_EMPTY(&t
->t_state_list
)) {
1991 /* There are states referencing this track. */
1995 if (TIME_LEQ(t
->t_expire
, time_uptime
) || reap
) {
1997 if (ipfw_track_free(ctx
, t
)) {
1998 if (++expired
>= expire_max
) {
1999 ipfw_track_expire_more(ctx
);
2005 TAILQ_REMOVE(&ctx
->ipfw_track_list
, anchor
, t_link
);
2006 ipfw_track_expire_done(ctx
);
2011 ipfw_track_expire_start(struct ipfw_context
*ctx
, int scan_max
, int expire_max
)
2013 struct ipfw_track
*anchor
;
2015 KASSERT((ctx
->ipfw_flags
& IPFW_FLAG_TRACKEXP
) == 0,
2016 ("trackexp is in progress"));
2017 ctx
->ipfw_flags
|= IPFW_FLAG_TRACKEXP
;
2019 if (RB_EMPTY(&ctx
->ipfw_track_tree
)) {
2020 ipfw_track_expire_done(ctx
);
2025 * Do not expire more than once per second, it is useless.
2027 if ((ctx
->ipfw_flags
& IPFW_FLAG_TRACKREAP
) == 0 &&
2028 ctx
->ipfw_track_lastexp
== time_uptime
) {
2029 ipfw_track_expire_done(ctx
);
2032 ctx
->ipfw_track_lastexp
= time_uptime
;
2034 anchor
= &ctx
->ipfw_trackexp_anch
;
2035 TAILQ_INSERT_HEAD(&ctx
->ipfw_track_list
, anchor
, t_link
);
2036 return (ipfw_track_expire_loop(ctx
, anchor
, scan_max
, expire_max
));
2040 ipfw_track_expire_more_dispatch(netmsg_t nm
)
2042 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
2043 struct ipfw_track
*anchor
;
2045 ASSERT_NETISR_NCPUS(mycpuid
);
2046 KASSERT(ctx
->ipfw_flags
& IPFW_FLAG_TRACKEXP
,
2047 ("trackexp is not in progress"));
2050 netisr_replymsg(&nm
->base
, 0);
2052 anchor
= &ctx
->ipfw_trackexp_anch
;
2053 if (RB_EMPTY(&ctx
->ipfw_track_tree
)) {
2054 TAILQ_REMOVE(&ctx
->ipfw_track_list
, anchor
, t_link
);
2055 ipfw_track_expire_done(ctx
);
2058 ipfw_track_expire_loop(ctx
, anchor
,
2059 ipfw_track_scan_max
, ipfw_track_expire_max
);
2063 ipfw_track_expire_dispatch(netmsg_t nm
)
2065 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
2067 ASSERT_NETISR_NCPUS(mycpuid
);
2071 netisr_replymsg(&nm
->base
, 0);
2074 if (ctx
->ipfw_flags
& IPFW_FLAG_TRACKEXP
) {
2075 /* Running; done. */
2078 ipfw_track_expire_start(ctx
,
2079 ipfw_track_scan_max
, ipfw_track_expire_max
);
2083 ipfw_track_expire_ipifunc(void *dummy __unused
)
2085 struct netmsg_base
*msg
;
2087 KKASSERT(mycpuid
< netisr_ncpus
);
2088 msg
= &ipfw_ctx
[mycpuid
]->ipfw_trackexp_nm
;
2091 if (msg
->lmsg
.ms_flags
& MSGF_DONE
)
2092 netisr_sendmsg_oncpu(msg
);
2097 ipfw_track_reap(struct ipfw_context
*ctx
)
2099 struct ipfw_track
*t
, *anchor
;
2102 if ((ctx
->ipfw_flags
& IPFW_FLAG_TRACKEXP
) == 0) {
2104 * Kick start track expiring. Ignore scan limit,
2105 * we are short of tracks.
2107 ctx
->ipfw_flags
|= IPFW_FLAG_TRACKREAP
;
2108 expired
= ipfw_track_expire_start(ctx
, INT_MAX
,
2109 ipfw_track_reap_max
);
2110 ctx
->ipfw_flags
&= ~IPFW_FLAG_TRACKREAP
;
2115 * Tracks are being expired.
2118 if (RB_EMPTY(&ctx
->ipfw_track_tree
))
2122 anchor
= &ctx
->ipfw_trackexp_anch
;
2123 while ((t
= TAILQ_NEXT(anchor
, t_link
)) != NULL
) {
2125 * Ignore scan limit; we are short of tracks.
2128 TAILQ_REMOVE(&ctx
->ipfw_track_list
, anchor
, t_link
);
2129 TAILQ_INSERT_AFTER(&ctx
->ipfw_track_list
, t
, anchor
, t_link
);
2131 if (t
->t_count
== NULL
) /* anchor */
2134 ipfw_track_state_expire(ctx
, t
, TRUE
);
2135 if (!LIST_EMPTY(&t
->t_state_list
)) {
2136 /* There are states referencing this track. */
2140 if (ipfw_track_free(ctx
, t
)) {
2141 if (++expired
>= ipfw_track_reap_max
) {
2142 ipfw_track_expire_more(ctx
);
2149 * Leave the anchor on the list, even if the end of the list has
2150 * been reached. ipfw_track_expire_more_dispatch() will handle
2156 static struct ipfw_track
*
2157 ipfw_track_alloc(struct ipfw_context
*ctx
, const struct ipfw_flow_id
*id
,
2158 uint16_t limit_mask
, struct ip_fw
*rule
)
2160 struct ipfw_track
*key
, *t
, *dup
;
2161 struct ipfw_trkcnt
*trk
, *ret
;
2162 boolean_t do_expire
= FALSE
;
2164 KASSERT(rule
->track_ruleid
!= 0,
2165 ("rule %u has no track ruleid", rule
->rulenum
));
2167 key
= &ctx
->ipfw_track_tmpkey
;
2168 key
->t_proto
= id
->proto
;
2172 if (limit_mask
& DYN_SRC_ADDR
)
2173 key
->t_saddr
= id
->src_ip
;
2174 if (limit_mask
& DYN_DST_ADDR
)
2175 key
->t_daddr
= id
->dst_ip
;
2176 if (limit_mask
& DYN_SRC_PORT
)
2177 key
->t_sport
= id
->src_port
;
2178 if (limit_mask
& DYN_DST_PORT
)
2179 key
->t_dport
= id
->dst_port
;
2181 t
= RB_FIND(ipfw_track_tree
, &ctx
->ipfw_track_tree
, key
);
2185 t
= kmalloc(sizeof(*t
), M_IPFW
, M_INTWAIT
| M_NULLOK
);
2187 ctx
->ipfw_tks_nomem
++;
2191 t
->t_key
= key
->t_key
;
2194 LIST_INIT(&t
->t_state_list
);
2196 if (ipfw_gd
.ipfw_trkcnt_cnt
>= ipfw_track_max
) {
2197 time_t globexp
, uptime
;
2203 * Do not expire globally more than once per second,
2206 uptime
= time_uptime
;
2207 globexp
= ipfw_gd
.ipfw_track_globexp
;
2208 if (globexp
!= uptime
&&
2209 atomic_cmpset_long(&ipfw_gd
.ipfw_track_globexp
,
2213 /* Expire tracks on other CPUs. */
2214 for (cpu
= 0; cpu
< netisr_ncpus
; ++cpu
) {
2217 lwkt_send_ipiq(globaldata_find(cpu
),
2218 ipfw_track_expire_ipifunc
, NULL
);
2222 trk
= ipfw_trkcnt_alloc(ctx
);
2225 struct ipfw_trkcnt
*tkey
;
2227 tkey
= &ctx
->ipfw_trkcnt_tmpkey
;
2228 key
= NULL
; /* tkey overlaps key */
2230 tkey
->tc_key
= t
->t_key
;
2231 tkey
->tc_ruleid
= rule
->track_ruleid
;
2234 trk
= RB_FIND(ipfw_trkcnt_tree
, &ipfw_gd
.ipfw_trkcnt_tree
,
2239 ctx
->ipfw_tks_reap
++;
2240 if (ipfw_track_reap(ctx
) > 0) {
2241 if (ipfw_gd
.ipfw_trkcnt_cnt
<
2243 trk
= ipfw_trkcnt_alloc(ctx
);
2246 ctx
->ipfw_tks_cntnomem
++;
2248 ctx
->ipfw_tks_overflow
++;
2251 ctx
->ipfw_tks_reapfailed
++;
2252 ctx
->ipfw_tks_overflow
++;
2255 ctx
->ipfw_tks_cntnomem
++;
2260 KASSERT(trk
->tc_refs
> 0 && trk
->tc_refs
< netisr_ncpus
,
2261 ("invalid trkcnt refs %d", trk
->tc_refs
));
2262 atomic_add_int(&trk
->tc_refs
, 1);
2266 trk
->tc_key
= t
->t_key
;
2267 trk
->tc_ruleid
= rule
->track_ruleid
;
2271 trk
->tc_rulenum
= rule
->rulenum
;
2274 ret
= RB_INSERT(ipfw_trkcnt_tree
, &ipfw_gd
.ipfw_trkcnt_tree
,
2277 KASSERT(ret
->tc_refs
> 0 &&
2278 ret
->tc_refs
< netisr_ncpus
,
2279 ("invalid trkcnt refs %d", ret
->tc_refs
));
2280 KASSERT(ctx
->ipfw_trkcnt_spare
== NULL
,
2281 ("trkcnt spare was installed"));
2282 ctx
->ipfw_trkcnt_spare
= trk
;
2285 ipfw_gd
.ipfw_trkcnt_cnt
++;
2287 atomic_add_int(&trk
->tc_refs
, 1);
2290 t
->t_count
= &trk
->tc_count
;
2293 dup
= RB_INSERT(ipfw_track_tree
, &ctx
->ipfw_track_tree
, t
);
2295 panic("ipfw: track exists");
2296 TAILQ_INSERT_TAIL(&ctx
->ipfw_track_list
, t
, t_link
);
2298 t
->t_expire
= time_uptime
+ dyn_short_lifetime
;
2303 * Install state for rule type cmd->o.opcode
2305 * Returns 1 (failure) if state is not installed because of errors or because
2306 * states limitations are enforced.
2309 ipfw_state_install(struct ipfw_context
*ctx
, struct ip_fw
*rule
,
2310 ipfw_insn_limit
*cmd
, struct ip_fw_args
*args
, const struct tcphdr
*tcp
)
2312 struct ipfw_state
*s
;
2313 struct ipfw_track
*t
;
2316 if (ipfw_gd
.ipfw_state_loosecnt
>= ipfw_state_max
&&
2317 (diff
= (ipfw_state_cntsync() - ipfw_state_max
)) >= 0) {
2318 boolean_t overflow
= TRUE
;
2320 ctx
->ipfw_sts_reap
++;
2321 if (ipfw_state_reap(ctx
, diff
) == 0)
2322 ctx
->ipfw_sts_reapfailed
++;
2323 if (ipfw_state_cntsync() < ipfw_state_max
)
2327 time_t globexp
, uptime
;
2331 * Do not expire globally more than once per second,
2334 uptime
= time_uptime
;
2335 globexp
= ipfw_gd
.ipfw_state_globexp
;
2336 if (globexp
== uptime
||
2337 !atomic_cmpset_long(&ipfw_gd
.ipfw_state_globexp
,
2339 ctx
->ipfw_sts_overflow
++;
2343 /* Expire states on other CPUs. */
2344 for (cpu
= 0; cpu
< netisr_ncpus
; ++cpu
) {
2347 lwkt_send_ipiq(globaldata_find(cpu
),
2348 ipfw_state_expire_ipifunc
, NULL
);
2350 ctx
->ipfw_sts_overflow
++;
2355 switch (cmd
->o
.opcode
) {
2356 case O_KEEP_STATE
: /* bidir rule */
2357 s
= ipfw_state_add(ctx
, &args
->f_id
, O_KEEP_STATE
, rule
, NULL
,
2363 case O_LIMIT
: /* limit number of sessions */
2364 t
= ipfw_track_alloc(ctx
, &args
->f_id
, cmd
->limit_mask
, rule
);
2368 if (*t
->t_count
>= cmd
->conn_limit
) {
2369 if (!ipfw_track_state_expire(ctx
, t
, TRUE
))
2373 count
= *t
->t_count
;
2374 if (count
>= cmd
->conn_limit
)
2376 if (atomic_cmpset_int(t
->t_count
, count
, count
+ 1))
2380 s
= ipfw_state_add(ctx
, &args
->f_id
, O_LIMIT
, rule
, t
, tcp
);
2383 atomic_subtract_int(t
->t_count
, 1);
2389 panic("unknown state type %u\n", cmd
->o
.opcode
);
2395 * Transmit a TCP packet, containing either a RST or a keepalive.
2396 * When flags & TH_RST, we are sending a RST packet, because of a
2397 * "reset" action matched the packet.
2398 * Otherwise we are sending a keepalive, and flags & TH_
2400 * Only {src,dst}_{ip,port} of "id" are used.
2403 send_pkt(const struct ipfw_flow_id
*id
, uint32_t seq
, uint32_t ack
, int flags
)
2408 struct route sro
; /* fake route */
2410 MGETHDR(m
, M_NOWAIT
, MT_HEADER
);
2413 m
->m_pkthdr
.rcvif
= NULL
;
2414 m
->m_pkthdr
.len
= m
->m_len
= sizeof(struct ip
) + sizeof(struct tcphdr
);
2415 m
->m_data
+= max_linkhdr
;
2417 ip
= mtod(m
, struct ip
*);
2418 bzero(ip
, m
->m_len
);
2419 tcp
= (struct tcphdr
*)(ip
+ 1); /* no IP options */
2420 ip
->ip_p
= IPPROTO_TCP
;
2424 * Assume we are sending a RST (or a keepalive in the reverse
2425 * direction), swap src and destination addresses and ports.
2427 ip
->ip_src
.s_addr
= htonl(id
->dst_ip
);
2428 ip
->ip_dst
.s_addr
= htonl(id
->src_ip
);
2429 tcp
->th_sport
= htons(id
->dst_port
);
2430 tcp
->th_dport
= htons(id
->src_port
);
2431 if (flags
& TH_RST
) { /* we are sending a RST */
2432 if (flags
& TH_ACK
) {
2433 tcp
->th_seq
= htonl(ack
);
2434 tcp
->th_ack
= htonl(0);
2435 tcp
->th_flags
= TH_RST
;
2439 tcp
->th_seq
= htonl(0);
2440 tcp
->th_ack
= htonl(seq
);
2441 tcp
->th_flags
= TH_RST
| TH_ACK
;
2445 * We are sending a keepalive. flags & TH_SYN determines
2446 * the direction, forward if set, reverse if clear.
2447 * NOTE: seq and ack are always assumed to be correct
2448 * as set by the caller. This may be confusing...
2450 if (flags
& TH_SYN
) {
2452 * we have to rewrite the correct addresses!
2454 ip
->ip_dst
.s_addr
= htonl(id
->dst_ip
);
2455 ip
->ip_src
.s_addr
= htonl(id
->src_ip
);
2456 tcp
->th_dport
= htons(id
->dst_port
);
2457 tcp
->th_sport
= htons(id
->src_port
);
2459 tcp
->th_seq
= htonl(seq
);
2460 tcp
->th_ack
= htonl(ack
);
2461 tcp
->th_flags
= TH_ACK
;
2465 * set ip_len to the payload size so we can compute
2466 * the tcp checksum on the pseudoheader
2467 * XXX check this, could save a couple of words ?
2469 ip
->ip_len
= htons(sizeof(struct tcphdr
));
2470 tcp
->th_sum
= in_cksum(m
, m
->m_pkthdr
.len
);
2473 * now fill fields left out earlier
2475 ip
->ip_ttl
= ip_defttl
;
2476 ip
->ip_len
= m
->m_pkthdr
.len
;
2478 bzero(&sro
, sizeof(sro
));
2479 ip_rtaddr(ip
->ip_dst
, &sro
);
2481 m
->m_pkthdr
.fw_flags
|= IPFW_MBUF_GENERATED
;
2482 ip_output(m
, NULL
, &sro
, 0, NULL
, NULL
);
2488 * Send a reject message, consuming the mbuf passed as an argument.
2491 send_reject(struct ip_fw_args
*args
, int code
, int offset
, int ip_len
)
2493 if (code
!= ICMP_REJECT_RST
) { /* Send an ICMP unreach */
2494 /* We need the IP header in host order for icmp_error(). */
2495 if (args
->eh
!= NULL
) {
2496 struct ip
*ip
= mtod(args
->m
, struct ip
*);
2498 ip
->ip_len
= ntohs(ip
->ip_len
);
2499 ip
->ip_off
= ntohs(ip
->ip_off
);
2501 icmp_error(args
->m
, ICMP_UNREACH
, code
, 0L, 0);
2502 } else if (offset
== 0 && args
->f_id
.proto
== IPPROTO_TCP
) {
2503 struct tcphdr
*const tcp
=
2504 L3HDR(struct tcphdr
, mtod(args
->m
, struct ip
*));
2506 if ((tcp
->th_flags
& TH_RST
) == 0) {
2507 send_pkt(&args
->f_id
, ntohl(tcp
->th_seq
),
2508 ntohl(tcp
->th_ack
), tcp
->th_flags
| TH_RST
);
2518 * Given an ip_fw *, lookup_next_rule will return a pointer
2519 * to the next rule, which can be either the jump
2520 * target (for skipto instructions) or the next one in the list (in
2521 * all other cases including a missing jump target).
2522 * The result is also written in the "next_rule" field of the rule.
2523 * Backward jumps are not allowed, so start looking from the next
2526 * This never returns NULL -- in case we do not have an exact match,
2527 * the next rule is returned. When the ruleset is changed,
2528 * pointers are flushed so we are always correct.
2530 static struct ip_fw
*
2531 lookup_next_rule(struct ip_fw
*me
)
2533 struct ip_fw
*rule
= NULL
;
2536 /* look for action, in case it is a skipto */
2537 cmd
= ACTION_PTR(me
);
2538 if (cmd
->opcode
== O_LOG
)
2540 if (cmd
->opcode
== O_SKIPTO
) {
2541 for (rule
= me
->next
; rule
; rule
= rule
->next
) {
2542 if (rule
->rulenum
>= cmd
->arg1
)
2546 if (rule
== NULL
) /* failure or not a skipto */
2548 me
->next_rule
= rule
;
2553 ipfw_match_uid(const struct ipfw_flow_id
*fid
, struct ifnet
*oif
,
2554 enum ipfw_opcodes opcode
, uid_t uid
)
2556 struct in_addr src_ip
, dst_ip
;
2557 struct inpcbinfo
*pi
;
2561 if (fid
->proto
== IPPROTO_TCP
) {
2563 pi
= &tcbinfo
[mycpuid
];
2564 } else if (fid
->proto
== IPPROTO_UDP
) {
2566 pi
= &udbinfo
[mycpuid
];
2572 * Values in 'fid' are in host byte order
2574 dst_ip
.s_addr
= htonl(fid
->dst_ip
);
2575 src_ip
.s_addr
= htonl(fid
->src_ip
);
2577 pcb
= in_pcblookup_hash(pi
,
2578 dst_ip
, htons(fid
->dst_port
),
2579 src_ip
, htons(fid
->src_port
),
2582 pcb
= in_pcblookup_hash(pi
,
2583 src_ip
, htons(fid
->src_port
),
2584 dst_ip
, htons(fid
->dst_port
),
2587 if (pcb
== NULL
|| pcb
->inp_socket
== NULL
)
2590 if (opcode
== O_UID
) {
2591 #define socheckuid(a,b) ((a)->so_cred->cr_uid != (b))
2592 return !socheckuid(pcb
->inp_socket
, uid
);
2595 return groupmember(uid
, pcb
->inp_socket
->so_cred
);
2600 * The main check routine for the firewall.
2602 * All arguments are in args so we can modify them and return them
2603 * back to the caller.
2607 * args->m (in/out) The packet; we set to NULL when/if we nuke it.
2608 * Starts with the IP header.
2609 * args->eh (in) Mac header if present, or NULL for layer3 packet.
2610 * args->oif Outgoing interface, or NULL if packet is incoming.
2611 * The incoming interface is in the mbuf. (in)
2613 * args->rule Pointer to the last matching rule (in/out)
2614 * args->f_id Addresses grabbed from the packet (out)
2618 * If the packet was denied/rejected and has been dropped, *m is equal
2619 * to NULL upon return.
2621 * IP_FW_DENY the packet must be dropped.
2622 * IP_FW_PASS The packet is to be accepted and routed normally.
2623 * IP_FW_DIVERT Divert the packet to port (args->cookie)
2624 * IP_FW_TEE Tee the packet to port (args->cookie)
2625 * IP_FW_DUMMYNET Send the packet to pipe/queue (args->cookie)
2628 ipfw_chk(struct ip_fw_args
*args
)
2631 * Local variables hold state during the processing of a packet.
2633 * IMPORTANT NOTE: to speed up the processing of rules, there
2634 * are some assumption on the values of the variables, which
2635 * are documented here. Should you change them, please check
2636 * the implementation of the various instructions to make sure
2637 * that they still work.
2639 * args->eh The MAC header. It is non-null for a layer2
2640 * packet, it is NULL for a layer-3 packet.
2642 * m | args->m Pointer to the mbuf, as received from the caller.
2643 * It may change if ipfw_chk() does an m_pullup, or if it
2644 * consumes the packet because it calls send_reject().
2645 * XXX This has to change, so that ipfw_chk() never modifies
2646 * or consumes the buffer.
2647 * ip is simply an alias of the value of m, and it is kept
2648 * in sync with it (the packet is supposed to start with
2651 struct mbuf
*m
= args
->m
;
2652 struct ip
*ip
= mtod(m
, struct ip
*);
2655 * oif | args->oif If NULL, ipfw_chk has been called on the
2656 * inbound path (ether_input, ip_input).
2657 * If non-NULL, ipfw_chk has been called on the outbound path
2658 * (ether_output, ip_output).
2660 struct ifnet
*oif
= args
->oif
;
2662 struct ip_fw
*f
= NULL
; /* matching rule */
2663 int retval
= IP_FW_PASS
;
2665 struct divert_info
*divinfo
;
2668 * hlen The length of the IPv4 header.
2669 * hlen >0 means we have an IPv4 packet.
2671 u_int hlen
= 0; /* hlen >0 means we have an IP pkt */
2674 * offset The offset of a fragment. offset != 0 means that
2675 * we have a fragment at this offset of an IPv4 packet.
2676 * offset == 0 means that (if this is an IPv4 packet)
2677 * this is the first or only fragment.
2682 * Local copies of addresses. They are only valid if we have
2685 * proto The protocol. Set to 0 for non-ip packets,
2686 * or to the protocol read from the packet otherwise.
2687 * proto != 0 means that we have an IPv4 packet.
2689 * src_port, dst_port port numbers, in HOST format. Only
2690 * valid for TCP and UDP packets.
2692 * src_ip, dst_ip ip addresses, in NETWORK format.
2693 * Only valid for IPv4 packets.
2696 uint16_t src_port
= 0, dst_port
= 0; /* NOTE: host format */
2697 struct in_addr src_ip
, dst_ip
; /* NOTE: network format */
2698 uint16_t ip_len
= 0;
2701 * dyn_dir = MATCH_UNKNOWN when rules unchecked,
2702 * MATCH_NONE when checked and not matched (dyn_f = NULL),
2703 * MATCH_FORWARD or MATCH_REVERSE otherwise (dyn_f != NULL)
2705 int dyn_dir
= MATCH_UNKNOWN
;
2706 struct ip_fw
*dyn_f
= NULL
;
2707 int cpuid
= mycpuid
;
2708 struct ipfw_context
*ctx
;
2710 ASSERT_NETISR_NCPUS(cpuid
);
2711 ctx
= ipfw_ctx
[cpuid
];
2713 if (m
->m_pkthdr
.fw_flags
& IPFW_MBUF_GENERATED
)
2714 return IP_FW_PASS
; /* accept */
2716 if (args
->eh
== NULL
|| /* layer 3 packet */
2717 (m
->m_pkthdr
.len
>= sizeof(struct ip
) &&
2718 ntohs(args
->eh
->ether_type
) == ETHERTYPE_IP
))
2719 hlen
= ip
->ip_hl
<< 2;
2722 * Collect parameters into local variables for faster matching.
2724 if (hlen
== 0) { /* do not grab addresses for non-ip pkts */
2725 proto
= args
->f_id
.proto
= 0; /* mark f_id invalid */
2726 goto after_ip_checks
;
2729 proto
= args
->f_id
.proto
= ip
->ip_p
;
2730 src_ip
= ip
->ip_src
;
2731 dst_ip
= ip
->ip_dst
;
2732 if (args
->eh
!= NULL
) { /* layer 2 packets are as on the wire */
2733 offset
= ntohs(ip
->ip_off
) & IP_OFFMASK
;
2734 ip_len
= ntohs(ip
->ip_len
);
2736 offset
= ip
->ip_off
& IP_OFFMASK
;
2737 ip_len
= ip
->ip_len
;
2740 #define PULLUP_TO(len) \
2742 if (m->m_len < (len)) { \
2743 args->m = m = m_pullup(m, (len));\
2745 goto pullup_failed; \
2746 ip = mtod(m, struct ip *); \
2756 PULLUP_TO(hlen
+ sizeof(struct tcphdr
));
2757 tcp
= L3HDR(struct tcphdr
, ip
);
2758 dst_port
= tcp
->th_dport
;
2759 src_port
= tcp
->th_sport
;
2760 args
->f_id
.flags
= tcp
->th_flags
;
2768 PULLUP_TO(hlen
+ sizeof(struct udphdr
));
2769 udp
= L3HDR(struct udphdr
, ip
);
2770 dst_port
= udp
->uh_dport
;
2771 src_port
= udp
->uh_sport
;
2776 PULLUP_TO(hlen
+ 4); /* type, code and checksum. */
2777 args
->f_id
.flags
= L3HDR(struct icmp
, ip
)->icmp_type
;
2787 args
->f_id
.src_ip
= ntohl(src_ip
.s_addr
);
2788 args
->f_id
.dst_ip
= ntohl(dst_ip
.s_addr
);
2789 args
->f_id
.src_port
= src_port
= ntohs(src_port
);
2790 args
->f_id
.dst_port
= dst_port
= ntohs(dst_port
);
2795 * Packet has already been tagged. Look for the next rule
2796 * to restart processing.
2798 * If fw_one_pass != 0 then just accept it.
2799 * XXX should not happen here, but optimized out in
2805 /* This rule is being/has been flushed */
2809 KASSERT(args
->rule
->cpuid
== cpuid
,
2810 ("rule used on cpu%d", cpuid
));
2812 /* This rule was deleted */
2813 if (args
->rule
->rule_flags
& IPFW_RULE_F_INVALID
)
2816 f
= args
->rule
->next_rule
;
2818 f
= lookup_next_rule(args
->rule
);
2821 * Find the starting rule. It can be either the first
2822 * one, or the one after divert_rule if asked so.
2826 mtag
= m_tag_find(m
, PACKET_TAG_IPFW_DIVERT
, NULL
);
2828 divinfo
= m_tag_data(mtag
);
2829 skipto
= divinfo
->skipto
;
2834 f
= ctx
->ipfw_layer3_chain
;
2835 if (args
->eh
== NULL
&& skipto
!= 0) {
2836 /* No skipto during rule flushing */
2840 if (skipto
>= IPFW_DEFAULT_RULE
)
2841 return IP_FW_DENY
; /* invalid */
2843 while (f
&& f
->rulenum
<= skipto
)
2845 if (f
== NULL
) /* drop packet */
2847 } else if (ipfw_flushing
) {
2848 /* Rules are being flushed; skip to default rule */
2849 f
= ctx
->ipfw_default_rule
;
2852 if ((mtag
= m_tag_find(m
, PACKET_TAG_IPFW_DIVERT
, NULL
)) != NULL
)
2853 m_tag_delete(m
, mtag
);
2856 * Now scan the rules, and parse microinstructions for each rule.
2858 for (; f
; f
= f
->next
) {
2861 int skip_or
; /* skip rest of OR block */
2864 if (ctx
->ipfw_set_disable
& (1 << f
->set
))
2868 for (l
= f
->cmd_len
, cmd
= f
->cmd
; l
> 0;
2869 l
-= cmdlen
, cmd
+= cmdlen
) {
2873 * check_body is a jump target used when we find a
2874 * CHECK_STATE, and need to jump to the body of
2879 cmdlen
= F_LEN(cmd
);
2881 * An OR block (insn_1 || .. || insn_n) has the
2882 * F_OR bit set in all but the last instruction.
2883 * The first match will set "skip_or", and cause
2884 * the following instructions to be skipped until
2885 * past the one with the F_OR bit clear.
2887 if (skip_or
) { /* skip this instruction */
2888 if ((cmd
->len
& F_OR
) == 0)
2889 skip_or
= 0; /* next one is good */
2892 match
= 0; /* set to 1 if we succeed */
2894 switch (cmd
->opcode
) {
2896 * The first set of opcodes compares the packet's
2897 * fields with some pattern, setting 'match' if a
2898 * match is found. At the end of the loop there is
2899 * logic to deal with F_NOT and F_OR flags associated
2907 kprintf("ipfw: opcode %d unimplemented\n",
2914 * We only check offset == 0 && proto != 0,
2915 * as this ensures that we have an IPv4
2916 * packet with the ports info.
2921 match
= ipfw_match_uid(&args
->f_id
, oif
,
2923 (uid_t
)((ipfw_insn_u32
*)cmd
)->d
[0]);
2927 match
= iface_match(m
->m_pkthdr
.rcvif
,
2928 (ipfw_insn_if
*)cmd
);
2932 match
= iface_match(oif
, (ipfw_insn_if
*)cmd
);
2936 match
= iface_match(oif
? oif
:
2937 m
->m_pkthdr
.rcvif
, (ipfw_insn_if
*)cmd
);
2941 if (args
->eh
!= NULL
) { /* have MAC header */
2942 uint32_t *want
= (uint32_t *)
2943 ((ipfw_insn_mac
*)cmd
)->addr
;
2944 uint32_t *mask
= (uint32_t *)
2945 ((ipfw_insn_mac
*)cmd
)->mask
;
2946 uint32_t *hdr
= (uint32_t *)args
->eh
;
2949 (want
[0] == (hdr
[0] & mask
[0]) &&
2950 want
[1] == (hdr
[1] & mask
[1]) &&
2951 want
[2] == (hdr
[2] & mask
[2]));
2956 if (args
->eh
!= NULL
) {
2958 ntohs(args
->eh
->ether_type
);
2960 ((ipfw_insn_u16
*)cmd
)->ports
;
2963 /* Special vlan handling */
2964 if (m
->m_flags
& M_VLANTAG
)
2967 for (i
= cmdlen
- 1; !match
&& i
> 0;
2970 (t
>= p
[0] && t
<= p
[1]);
2976 match
= (hlen
> 0 && offset
!= 0);
2979 case O_IN
: /* "out" is "not in" */
2980 match
= (oif
== NULL
);
2984 match
= (args
->eh
!= NULL
);
2989 * We do not allow an arg of 0 so the
2990 * check of "proto" only suffices.
2992 match
= (proto
== cmd
->arg1
);
2996 match
= (hlen
> 0 &&
2997 ((ipfw_insn_ip
*)cmd
)->addr
.s_addr
==
3002 match
= (hlen
> 0 &&
3003 ((ipfw_insn_ip
*)cmd
)->addr
.s_addr
==
3005 ((ipfw_insn_ip
*)cmd
)->mask
.s_addr
));
3012 tif
= INADDR_TO_IFP(&src_ip
);
3013 match
= (tif
!= NULL
);
3020 uint32_t *d
= (uint32_t *)(cmd
+ 1);
3022 cmd
->opcode
== O_IP_DST_SET
?
3028 addr
-= d
[0]; /* subtract base */
3030 (addr
< cmd
->arg1
) &&
3031 (d
[1 + (addr
>> 5)] &
3032 (1 << (addr
& 0x1f)));
3037 match
= (hlen
> 0 &&
3038 ((ipfw_insn_ip
*)cmd
)->addr
.s_addr
==
3043 match
= (hlen
> 0) &&
3044 (((ipfw_insn_ip
*)cmd
)->addr
.s_addr
==
3046 ((ipfw_insn_ip
*)cmd
)->mask
.s_addr
));
3053 tif
= INADDR_TO_IFP(&dst_ip
);
3054 match
= (tif
!= NULL
);
3061 * offset == 0 && proto != 0 is enough
3062 * to guarantee that we have an IPv4
3063 * packet with port info.
3065 if ((proto
==IPPROTO_UDP
|| proto
==IPPROTO_TCP
)
3068 (cmd
->opcode
== O_IP_SRCPORT
) ?
3069 src_port
: dst_port
;
3071 ((ipfw_insn_u16
*)cmd
)->ports
;
3074 for (i
= cmdlen
- 1; !match
&& i
> 0;
3077 (x
>= p
[0] && x
<= p
[1]);
3083 match
= (offset
== 0 && proto
==IPPROTO_ICMP
&&
3084 icmptype_match(ip
, (ipfw_insn_u32
*)cmd
));
3088 match
= (hlen
> 0 && ipopts_match(ip
, cmd
));
3092 match
= (hlen
> 0 && cmd
->arg1
== ip
->ip_v
);
3096 match
= (hlen
> 0 && cmd
->arg1
== ip
->ip_ttl
);
3100 match
= (hlen
> 0 &&
3101 cmd
->arg1
== ntohs(ip
->ip_id
));
3105 match
= (hlen
> 0 && cmd
->arg1
== ip_len
);
3108 case O_IPPRECEDENCE
:
3109 match
= (hlen
> 0 &&
3110 (cmd
->arg1
== (ip
->ip_tos
& 0xe0)));
3114 match
= (hlen
> 0 &&
3115 flags_match(cmd
, ip
->ip_tos
));
3119 match
= (proto
== IPPROTO_TCP
&& offset
== 0 &&
3121 L3HDR(struct tcphdr
,ip
)->th_flags
));
3125 match
= (proto
== IPPROTO_TCP
&& offset
== 0 &&
3126 tcpopts_match(ip
, cmd
));
3130 match
= (proto
== IPPROTO_TCP
&& offset
== 0 &&
3131 ((ipfw_insn_u32
*)cmd
)->d
[0] ==
3132 L3HDR(struct tcphdr
,ip
)->th_seq
);
3136 match
= (proto
== IPPROTO_TCP
&& offset
== 0 &&
3137 ((ipfw_insn_u32
*)cmd
)->d
[0] ==
3138 L3HDR(struct tcphdr
,ip
)->th_ack
);
3142 match
= (proto
== IPPROTO_TCP
&& offset
== 0 &&
3144 L3HDR(struct tcphdr
,ip
)->th_win
);
3148 /* reject packets which have SYN only */
3149 /* XXX should i also check for TH_ACK ? */
3150 match
= (proto
== IPPROTO_TCP
&& offset
== 0 &&
3151 (L3HDR(struct tcphdr
,ip
)->th_flags
&
3152 (TH_RST
| TH_ACK
| TH_SYN
)) != TH_SYN
);
3157 ipfw_log(ctx
, f
, hlen
, args
->eh
, m
,
3164 match
= (krandom() <
3165 ((ipfw_insn_u32
*)cmd
)->d
[0]);
3169 * The second set of opcodes represents 'actions',
3170 * i.e. the terminal part of a rule once the packet
3171 * matches all previous patterns.
3172 * Typically there is only one action for each rule,
3173 * and the opcode is stored at the end of the rule
3174 * (but there are exceptions -- see below).
3176 * In general, here we set retval and terminate the
3177 * outer loop (would be a 'break 3' in some language,
3178 * but we need to do a 'goto done').
3181 * O_COUNT and O_SKIPTO actions:
3182 * instead of terminating, we jump to the next rule
3183 * ('goto next_rule', equivalent to a 'break 2'),
3184 * or to the SKIPTO target ('goto again' after
3185 * having set f, cmd and l), respectively.
3187 * O_LIMIT and O_KEEP_STATE: these opcodes are
3188 * not real 'actions', and are stored right
3189 * before the 'action' part of the rule.
3190 * These opcodes try to install an entry in the
3191 * state tables; if successful, we continue with
3192 * the next opcode (match=1; break;), otherwise
3193 * the packet must be dropped ('goto done' after
3194 * setting retval). If static rules are changed
3195 * during the state installation, the packet will
3196 * be dropped and rule's stats will not beupdated
3197 * ('return IP_FW_DENY').
3199 * O_PROBE_STATE and O_CHECK_STATE: these opcodes
3200 * cause a lookup of the state table, and a jump
3201 * to the 'action' part of the parent rule
3202 * ('goto check_body') if an entry is found, or
3203 * (CHECK_STATE only) a jump to the next rule if
3204 * the entry is not found ('goto next_rule').
3205 * The result of the lookup is cached to make
3206 * further instances of these opcodes are
3207 * effectively NOPs. If static rules are changed
3208 * during the state looking up, the packet will
3209 * be dropped and rule's stats will not be updated
3210 * ('return IP_FW_DENY').
3214 if (ipfw_state_install(ctx
, f
,
3215 (ipfw_insn_limit
*)cmd
, args
,
3216 (offset
== 0 && proto
== IPPROTO_TCP
) ?
3217 L3HDR(struct tcphdr
, ip
) : NULL
)) {
3218 retval
= IP_FW_DENY
;
3219 goto done
; /* error/limit violation */
3227 * States are checked at the first keep-state
3228 * check-state occurrence, with the result
3229 * being stored in dyn_dir. The compiler
3230 * introduces a PROBE_STATE instruction for
3231 * us when we have a KEEP_STATE/LIMIT (because
3232 * PROBE_STATE needs to be run first).
3234 if (dyn_dir
== MATCH_UNKNOWN
) {
3235 dyn_f
= ipfw_state_lookup_rule(ctx
,
3236 &args
->f_id
, &dyn_dir
,
3238 proto
== IPPROTO_TCP
) ?
3239 L3HDR(struct tcphdr
, ip
) : NULL
,
3241 if (dyn_f
!= NULL
) {
3243 * Found a rule from a state;
3244 * jump to the 'action' part
3248 cmd
= ACTION_PTR(f
);
3249 l
= f
->cmd_len
- f
->act_ofs
;
3254 * State not found. If CHECK_STATE, skip to
3255 * next rule, if PROBE_STATE just ignore and
3256 * continue with next opcode.
3258 if (cmd
->opcode
== O_CHECK_STATE
)
3264 retval
= IP_FW_PASS
; /* accept */
3269 args
->rule
= f
; /* report matching rule */
3270 args
->cookie
= cmd
->arg1
;
3271 retval
= IP_FW_DUMMYNET
;
3276 if (args
->eh
) /* not on layer 2 */
3279 mtag
= m_tag_get(PACKET_TAG_IPFW_DIVERT
,
3280 sizeof(*divinfo
), M_NOWAIT
);
3282 retval
= IP_FW_DENY
;
3285 divinfo
= m_tag_data(mtag
);
3287 divinfo
->skipto
= f
->rulenum
;
3288 divinfo
->port
= cmd
->arg1
;
3289 divinfo
->tee
= (cmd
->opcode
== O_TEE
);
3290 m_tag_prepend(m
, mtag
);
3292 args
->cookie
= cmd
->arg1
;
3293 retval
= (cmd
->opcode
== O_DIVERT
) ?
3294 IP_FW_DIVERT
: IP_FW_TEE
;
3299 f
->pcnt
++; /* update stats */
3301 f
->timestamp
= time_second
;
3302 if (cmd
->opcode
== O_COUNT
)
3305 if (f
->next_rule
== NULL
)
3306 lookup_next_rule(f
);
3312 * Drop the packet and send a reject notice
3313 * if the packet is not ICMP (or is an ICMP
3314 * query), and it is not multicast/broadcast.
3317 (proto
!= IPPROTO_ICMP
||
3318 is_icmp_query(ip
)) &&
3319 !(m
->m_flags
& (M_BCAST
|M_MCAST
)) &&
3320 !IN_MULTICAST(ntohl(dst_ip
.s_addr
))) {
3322 * Update statistics before the possible
3323 * blocking 'send_reject'
3327 f
->timestamp
= time_second
;
3329 send_reject(args
, cmd
->arg1
,
3334 * Return directly here, rule stats
3335 * have been updated above.
3341 retval
= IP_FW_DENY
;
3345 if (args
->eh
) /* not valid on layer2 pkts */
3347 if (!dyn_f
|| dyn_dir
== MATCH_FORWARD
) {
3348 struct sockaddr_in
*sin
;
3350 mtag
= m_tag_get(PACKET_TAG_IPFORWARD
,
3351 sizeof(*sin
), M_NOWAIT
);
3353 retval
= IP_FW_DENY
;
3356 sin
= m_tag_data(mtag
);
3358 /* Structure copy */
3359 *sin
= ((ipfw_insn_sa
*)cmd
)->sa
;
3361 m_tag_prepend(m
, mtag
);
3362 m
->m_pkthdr
.fw_flags
|=
3363 IPFORWARD_MBUF_TAGGED
;
3364 m
->m_pkthdr
.fw_flags
&=
3365 ~BRIDGE_MBUF_TAGGED
;
3367 retval
= IP_FW_PASS
;
3371 panic("-- unknown opcode %d", cmd
->opcode
);
3372 } /* end of switch() on opcodes */
3374 if (cmd
->len
& F_NOT
)
3378 if (cmd
->len
& F_OR
)
3381 if (!(cmd
->len
& F_OR
)) /* not an OR block, */
3382 break; /* try next rule */
3385 } /* end of inner for, scan opcodes */
3387 next_rule
:; /* try next rule */
3389 } /* end of outer for, scan rules */
3390 kprintf("+++ ipfw: ouch!, skip past end of rules, denying packet\n");
3394 /* Update statistics */
3397 f
->timestamp
= time_second
;
3402 kprintf("pullup failed\n");
3407 ipfw_dummynet_io(struct mbuf
*m
, int pipe_nr
, int dir
, struct ip_fw_args
*fwa
)
3412 const struct ipfw_flow_id
*id
;
3413 struct dn_flow_id
*fid
;
3417 mtag
= m_tag_get(PACKET_TAG_DUMMYNET
, sizeof(*pkt
), M_NOWAIT
);
3422 m_tag_prepend(m
, mtag
);
3424 pkt
= m_tag_data(mtag
);
3425 bzero(pkt
, sizeof(*pkt
));
3427 cmd
= fwa
->rule
->cmd
+ fwa
->rule
->act_ofs
;
3428 if (cmd
->opcode
== O_LOG
)
3430 KASSERT(cmd
->opcode
== O_PIPE
|| cmd
->opcode
== O_QUEUE
,
3431 ("Rule is not PIPE or QUEUE, opcode %d", cmd
->opcode
));
3434 pkt
->dn_flags
= (dir
& DN_FLAGS_DIR_MASK
);
3435 pkt
->ifp
= fwa
->oif
;
3436 pkt
->pipe_nr
= pipe_nr
;
3438 pkt
->cpuid
= mycpuid
;
3439 pkt
->msgport
= netisr_curport();
3443 fid
->fid_dst_ip
= id
->dst_ip
;
3444 fid
->fid_src_ip
= id
->src_ip
;
3445 fid
->fid_dst_port
= id
->dst_port
;
3446 fid
->fid_src_port
= id
->src_port
;
3447 fid
->fid_proto
= id
->proto
;
3448 fid
->fid_flags
= id
->flags
;
3450 ipfw_ref_rule(fwa
->rule
);
3451 pkt
->dn_priv
= fwa
->rule
;
3452 pkt
->dn_unref_priv
= ipfw_unref_rule
;
3454 if (cmd
->opcode
== O_PIPE
)
3455 pkt
->dn_flags
|= DN_FLAGS_IS_PIPE
;
3457 m
->m_pkthdr
.fw_flags
|= DUMMYNET_MBUF_TAGGED
;
3461 * When a rule is added/deleted, clear the next_rule pointers in all rules.
3462 * These will be reconstructed on the fly as packets are matched.
3465 ipfw_flush_rule_ptrs(struct ipfw_context
*ctx
)
3469 for (rule
= ctx
->ipfw_layer3_chain
; rule
; rule
= rule
->next
)
3470 rule
->next_rule
= NULL
;
3473 static __inline
void
3474 ipfw_inc_static_count(struct ip_fw
*rule
)
3476 /* Static rule's counts are updated only on CPU0 */
3477 KKASSERT(mycpuid
== 0);
3480 static_ioc_len
+= IOC_RULESIZE(rule
);
3483 static __inline
void
3484 ipfw_dec_static_count(struct ip_fw
*rule
)
3486 int l
= IOC_RULESIZE(rule
);
3488 /* Static rule's counts are updated only on CPU0 */
3489 KKASSERT(mycpuid
== 0);
3491 KASSERT(static_count
> 0, ("invalid static count %u", static_count
));
3494 KASSERT(static_ioc_len
>= l
,
3495 ("invalid static len %u", static_ioc_len
));
3496 static_ioc_len
-= l
;
3500 ipfw_link_sibling(struct netmsg_ipfw
*fwmsg
, struct ip_fw
*rule
)
3502 if (fwmsg
->sibling
!= NULL
) {
3503 KKASSERT(mycpuid
> 0 && fwmsg
->sibling
->cpuid
== mycpuid
- 1);
3504 fwmsg
->sibling
->sibling
= rule
;
3506 fwmsg
->sibling
= rule
;
3509 static struct ip_fw
*
3510 ipfw_create_rule(const struct ipfw_ioc_rule
*ioc_rule
, uint32_t rule_flags
)
3514 rule
= kmalloc(RULESIZE(ioc_rule
), M_IPFW
, M_WAITOK
| M_ZERO
);
3516 rule
->act_ofs
= ioc_rule
->act_ofs
;
3517 rule
->cmd_len
= ioc_rule
->cmd_len
;
3518 rule
->rulenum
= ioc_rule
->rulenum
;
3519 rule
->set
= ioc_rule
->set
;
3520 rule
->usr_flags
= ioc_rule
->usr_flags
;
3522 bcopy(ioc_rule
->cmd
, rule
->cmd
, rule
->cmd_len
* 4 /* XXX */);
3525 rule
->cpuid
= mycpuid
;
3526 rule
->rule_flags
= rule_flags
;
3532 ipfw_add_rule_dispatch(netmsg_t nmsg
)
3534 struct netmsg_ipfw
*fwmsg
= (struct netmsg_ipfw
*)nmsg
;
3535 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
3538 ASSERT_NETISR_NCPUS(mycpuid
);
3540 rule
= ipfw_create_rule(fwmsg
->ioc_rule
, fwmsg
->rule_flags
);
3543 * Insert rule into the pre-determined position
3545 if (fwmsg
->prev_rule
!= NULL
) {
3546 struct ip_fw
*prev
, *next
;
3548 prev
= fwmsg
->prev_rule
;
3549 KKASSERT(prev
->cpuid
== mycpuid
);
3551 next
= fwmsg
->next_rule
;
3552 KKASSERT(next
->cpuid
== mycpuid
);
3558 * Move to the position on the next CPU
3559 * before the msg is forwarded.
3561 fwmsg
->prev_rule
= prev
->sibling
;
3562 fwmsg
->next_rule
= next
->sibling
;
3564 KKASSERT(fwmsg
->next_rule
== NULL
);
3565 rule
->next
= ctx
->ipfw_layer3_chain
;
3566 ctx
->ipfw_layer3_chain
= rule
;
3569 /* Link rule CPU sibling */
3570 ipfw_link_sibling(fwmsg
, rule
);
3572 ipfw_flush_rule_ptrs(ctx
);
3575 /* Statistics only need to be updated once */
3576 ipfw_inc_static_count(rule
);
3578 /* Return the rule on CPU0 */
3579 nmsg
->lmsg
.u
.ms_resultp
= rule
;
3582 if (rule
->rule_flags
& IPFW_RULE_F_GENTRACK
)
3583 rule
->track_ruleid
= (uintptr_t)nmsg
->lmsg
.u
.ms_resultp
;
3585 netisr_forwardmsg(&nmsg
->base
, mycpuid
+ 1);
3589 * Add a new rule to the list. Copy the rule into a malloc'ed area,
3590 * then possibly create a rule number and add the rule to the list.
3591 * Update the rule_number in the input struct so the caller knows
3595 ipfw_add_rule(struct ipfw_ioc_rule
*ioc_rule
, uint32_t rule_flags
)
3597 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
3598 struct netmsg_ipfw fwmsg
;
3599 struct netmsg_base
*nmsg
;
3600 struct ip_fw
*f
, *prev
, *rule
;
3605 * If rulenum is 0, find highest numbered rule before the
3606 * default rule, and add rule number incremental step.
3608 if (ioc_rule
->rulenum
== 0) {
3609 int step
= autoinc_step
;
3611 KKASSERT(step
>= IPFW_AUTOINC_STEP_MIN
&&
3612 step
<= IPFW_AUTOINC_STEP_MAX
);
3615 * Locate the highest numbered rule before default
3617 for (f
= ctx
->ipfw_layer3_chain
; f
; f
= f
->next
) {
3618 if (f
->rulenum
== IPFW_DEFAULT_RULE
)
3620 ioc_rule
->rulenum
= f
->rulenum
;
3622 if (ioc_rule
->rulenum
< IPFW_DEFAULT_RULE
- step
)
3623 ioc_rule
->rulenum
+= step
;
3625 KASSERT(ioc_rule
->rulenum
!= IPFW_DEFAULT_RULE
&&
3626 ioc_rule
->rulenum
!= 0,
3627 ("invalid rule num %d", ioc_rule
->rulenum
));
3630 * Now find the right place for the new rule in the sorted list.
3632 for (prev
= NULL
, f
= ctx
->ipfw_layer3_chain
; f
;
3633 prev
= f
, f
= f
->next
) {
3634 if (f
->rulenum
> ioc_rule
->rulenum
) {
3635 /* Found the location */
3639 KASSERT(f
!= NULL
, ("no default rule?!"));
3642 * Duplicate the rule onto each CPU.
3643 * The rule duplicated on CPU0 will be returned.
3645 bzero(&fwmsg
, sizeof(fwmsg
));
3647 netmsg_init(nmsg
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
3648 ipfw_add_rule_dispatch
);
3649 fwmsg
.ioc_rule
= ioc_rule
;
3650 fwmsg
.prev_rule
= prev
;
3651 fwmsg
.next_rule
= prev
== NULL
? NULL
: f
;
3652 fwmsg
.rule_flags
= rule_flags
;
3654 netisr_domsg_global(nmsg
);
3655 KKASSERT(fwmsg
.prev_rule
== NULL
&& fwmsg
.next_rule
== NULL
);
3657 rule
= nmsg
->lmsg
.u
.ms_resultp
;
3658 KKASSERT(rule
!= NULL
&& rule
->cpuid
== mycpuid
);
3660 DPRINTF("++ installed rule %d, static count now %d\n",
3661 rule
->rulenum
, static_count
);
3665 * Free storage associated with a static rule (including derived
3667 * The caller is in charge of clearing rule pointers to avoid
3668 * dangling pointers.
3669 * @return a pointer to the next entry.
3670 * Arguments are not checked, so they better be correct.
3672 static struct ip_fw
*
3673 ipfw_delete_rule(struct ipfw_context
*ctx
,
3674 struct ip_fw
*prev
, struct ip_fw
*rule
)
3680 ctx
->ipfw_layer3_chain
= n
;
3684 /* Mark the rule as invalid */
3685 rule
->rule_flags
|= IPFW_RULE_F_INVALID
;
3686 rule
->next_rule
= NULL
;
3687 rule
->sibling
= NULL
;
3689 /* Don't reset cpuid here; keep various assertion working */
3693 /* Statistics only need to be updated once */
3695 ipfw_dec_static_count(rule
);
3697 /* Try to free this rule */
3698 ipfw_free_rule(rule
);
3700 /* Return the next rule */
3705 ipfw_flush_dispatch(netmsg_t nmsg
)
3707 int kill_default
= nmsg
->lmsg
.u
.ms_result
;
3708 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
3711 ASSERT_NETISR_NCPUS(mycpuid
);
3716 ipfw_state_flush(ctx
, NULL
);
3717 KASSERT(ctx
->ipfw_state_cnt
== 0,
3718 ("%d pcpu states remain", ctx
->ipfw_state_cnt
));
3719 ctx
->ipfw_state_loosecnt
= 0;
3720 ctx
->ipfw_state_lastexp
= 0;
3725 ipfw_track_flush(ctx
, NULL
);
3726 ctx
->ipfw_track_lastexp
= 0;
3727 if (ctx
->ipfw_trkcnt_spare
!= NULL
) {
3728 kfree(ctx
->ipfw_trkcnt_spare
, M_IPFW
);
3729 ctx
->ipfw_trkcnt_spare
= NULL
;
3732 ipfw_flush_rule_ptrs(ctx
); /* more efficient to do outside the loop */
3734 while ((rule
= ctx
->ipfw_layer3_chain
) != NULL
&&
3735 (kill_default
|| rule
->rulenum
!= IPFW_DEFAULT_RULE
))
3736 ipfw_delete_rule(ctx
, NULL
, rule
);
3738 netisr_forwardmsg(&nmsg
->base
, mycpuid
+ 1);
3742 * Deletes all rules from a chain (including the default rule
3743 * if the second argument is set).
3746 ipfw_flush(int kill_default
)
3748 struct netmsg_base nmsg
;
3750 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
3757 * If 'kill_default' then caller has done the necessary
3758 * msgport syncing; unnecessary to do it again.
3760 if (!kill_default
) {
3762 * Let ipfw_chk() know the rules are going to
3763 * be flushed, so it could jump directly to
3767 /* XXX use priority sync */
3768 netmsg_service_sync();
3772 * Press the 'flush' button
3774 bzero(&nmsg
, sizeof(nmsg
));
3775 netmsg_init(&nmsg
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
3776 ipfw_flush_dispatch
);
3777 nmsg
.lmsg
.u
.ms_result
= kill_default
;
3778 netisr_domsg_global(&nmsg
);
3779 ipfw_gd
.ipfw_state_loosecnt
= 0;
3780 ipfw_gd
.ipfw_state_globexp
= 0;
3781 ipfw_gd
.ipfw_track_globexp
= 0;
3784 state_cnt
= ipfw_state_cntcoll();
3785 KASSERT(state_cnt
== 0, ("%d states remain", state_cnt
));
3787 KASSERT(ipfw_gd
.ipfw_trkcnt_cnt
== 0,
3788 ("%d trkcnts remain", ipfw_gd
.ipfw_trkcnt_cnt
));
3791 KASSERT(static_count
== 0,
3792 ("%u static rules remain", static_count
));
3793 KASSERT(static_ioc_len
== 0,
3794 ("%u bytes of static rules remain", static_ioc_len
));
3796 KASSERT(static_count
== 1,
3797 ("%u static rules remain", static_count
));
3798 KASSERT(static_ioc_len
== IOC_RULESIZE(ctx
->ipfw_default_rule
),
3799 ("%u bytes of static rules remain, should be %lu",
3801 (u_long
)IOC_RULESIZE(ctx
->ipfw_default_rule
)));
3810 ipfw_alt_delete_rule_dispatch(netmsg_t nmsg
)
3812 struct netmsg_del
*dmsg
= (struct netmsg_del
*)nmsg
;
3813 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
3814 struct ip_fw
*rule
, *prev
;
3816 ASSERT_NETISR_NCPUS(mycpuid
);
3818 rule
= dmsg
->start_rule
;
3819 KKASSERT(rule
->cpuid
== mycpuid
);
3820 dmsg
->start_rule
= rule
->sibling
;
3822 prev
= dmsg
->prev_rule
;
3824 KKASSERT(prev
->cpuid
== mycpuid
);
3827 * Move to the position on the next CPU
3828 * before the msg is forwarded.
3830 dmsg
->prev_rule
= prev
->sibling
;
3834 * flush pointers outside the loop, then delete all matching
3835 * rules. 'prev' remains the same throughout the cycle.
3837 ipfw_flush_rule_ptrs(ctx
);
3838 while (rule
&& rule
->rulenum
== dmsg
->rulenum
) {
3839 if (rule
->rule_flags
& IPFW_RULE_F_GENSTATE
) {
3840 /* Flush states generated by this rule. */
3841 ipfw_state_flush(ctx
, rule
);
3843 if (rule
->rule_flags
& IPFW_RULE_F_GENTRACK
) {
3844 /* Flush tracks generated by this rule. */
3845 ipfw_track_flush(ctx
, rule
);
3847 rule
= ipfw_delete_rule(ctx
, prev
, rule
);
3850 netisr_forwardmsg(&nmsg
->base
, mycpuid
+ 1);
3854 ipfw_alt_delete_rule(uint16_t rulenum
)
3856 struct ip_fw
*prev
, *rule
;
3857 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
3858 struct netmsg_del dmsg
;
3863 * Locate first rule to delete
3865 for (prev
= NULL
, rule
= ctx
->ipfw_layer3_chain
;
3866 rule
&& rule
->rulenum
< rulenum
;
3867 prev
= rule
, rule
= rule
->next
)
3869 if (rule
->rulenum
!= rulenum
)
3873 * Get rid of the rule duplications on all CPUs
3875 bzero(&dmsg
, sizeof(dmsg
));
3876 netmsg_init(&dmsg
.base
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
3877 ipfw_alt_delete_rule_dispatch
);
3878 dmsg
.prev_rule
= prev
;
3879 dmsg
.start_rule
= rule
;
3880 dmsg
.rulenum
= rulenum
;
3882 netisr_domsg_global(&dmsg
.base
);
3883 KKASSERT(dmsg
.prev_rule
== NULL
&& dmsg
.start_rule
== NULL
);
3888 ipfw_alt_delete_ruleset_dispatch(netmsg_t nmsg
)
3890 struct netmsg_del
*dmsg
= (struct netmsg_del
*)nmsg
;
3891 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
3892 struct ip_fw
*prev
, *rule
;
3897 ASSERT_NETISR_NCPUS(mycpuid
);
3899 ipfw_flush_rule_ptrs(ctx
);
3902 rule
= ctx
->ipfw_layer3_chain
;
3903 while (rule
!= NULL
) {
3904 if (rule
->set
== dmsg
->from_set
) {
3905 if (rule
->rule_flags
& IPFW_RULE_F_GENSTATE
) {
3906 /* Flush states generated by this rule. */
3907 ipfw_state_flush(ctx
, rule
);
3909 if (rule
->rule_flags
& IPFW_RULE_F_GENTRACK
) {
3910 /* Flush tracks generated by this rule. */
3911 ipfw_track_flush(ctx
, rule
);
3913 rule
= ipfw_delete_rule(ctx
, prev
, rule
);
3922 KASSERT(del
, ("no match set?!"));
3924 netisr_forwardmsg(&nmsg
->base
, mycpuid
+ 1);
3928 ipfw_alt_delete_ruleset(uint8_t set
)
3930 struct netmsg_del dmsg
;
3933 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
3938 * Check whether the 'set' exists. If it exists,
3939 * then check whether any rules within the set will
3940 * try to create states.
3943 for (rule
= ctx
->ipfw_layer3_chain
; rule
; rule
= rule
->next
) {
3944 if (rule
->set
== set
)
3948 return 0; /* XXX EINVAL? */
3953 bzero(&dmsg
, sizeof(dmsg
));
3954 netmsg_init(&dmsg
.base
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
3955 ipfw_alt_delete_ruleset_dispatch
);
3956 dmsg
.from_set
= set
;
3957 netisr_domsg_global(&dmsg
.base
);
3963 ipfw_alt_move_rule_dispatch(netmsg_t nmsg
)
3965 struct netmsg_del
*dmsg
= (struct netmsg_del
*)nmsg
;
3968 ASSERT_NETISR_NCPUS(mycpuid
);
3970 rule
= dmsg
->start_rule
;
3971 KKASSERT(rule
->cpuid
== mycpuid
);
3974 * Move to the position on the next CPU
3975 * before the msg is forwarded.
3977 dmsg
->start_rule
= rule
->sibling
;
3979 while (rule
&& rule
->rulenum
<= dmsg
->rulenum
) {
3980 if (rule
->rulenum
== dmsg
->rulenum
)
3981 rule
->set
= dmsg
->to_set
;
3984 netisr_forwardmsg(&nmsg
->base
, mycpuid
+ 1);
3988 ipfw_alt_move_rule(uint16_t rulenum
, uint8_t set
)
3990 struct netmsg_del dmsg
;
3991 struct netmsg_base
*nmsg
;
3993 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
3998 * Locate first rule to move
4000 for (rule
= ctx
->ipfw_layer3_chain
; rule
&& rule
->rulenum
<= rulenum
;
4001 rule
= rule
->next
) {
4002 if (rule
->rulenum
== rulenum
&& rule
->set
!= set
)
4005 if (rule
== NULL
|| rule
->rulenum
> rulenum
)
4006 return 0; /* XXX error? */
4008 bzero(&dmsg
, sizeof(dmsg
));
4010 netmsg_init(nmsg
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
4011 ipfw_alt_move_rule_dispatch
);
4012 dmsg
.start_rule
= rule
;
4013 dmsg
.rulenum
= rulenum
;
4016 netisr_domsg_global(nmsg
);
4017 KKASSERT(dmsg
.start_rule
== NULL
);
4022 ipfw_alt_move_ruleset_dispatch(netmsg_t nmsg
)
4024 struct netmsg_del
*dmsg
= (struct netmsg_del
*)nmsg
;
4025 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
4028 ASSERT_NETISR_NCPUS(mycpuid
);
4030 for (rule
= ctx
->ipfw_layer3_chain
; rule
; rule
= rule
->next
) {
4031 if (rule
->set
== dmsg
->from_set
)
4032 rule
->set
= dmsg
->to_set
;
4034 netisr_forwardmsg(&nmsg
->base
, mycpuid
+ 1);
4038 ipfw_alt_move_ruleset(uint8_t from_set
, uint8_t to_set
)
4040 struct netmsg_del dmsg
;
4041 struct netmsg_base
*nmsg
;
4045 bzero(&dmsg
, sizeof(dmsg
));
4047 netmsg_init(nmsg
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
4048 ipfw_alt_move_ruleset_dispatch
);
4049 dmsg
.from_set
= from_set
;
4050 dmsg
.to_set
= to_set
;
4052 netisr_domsg_global(nmsg
);
4057 ipfw_alt_swap_ruleset_dispatch(netmsg_t nmsg
)
4059 struct netmsg_del
*dmsg
= (struct netmsg_del
*)nmsg
;
4060 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
4063 ASSERT_NETISR_NCPUS(mycpuid
);
4065 for (rule
= ctx
->ipfw_layer3_chain
; rule
; rule
= rule
->next
) {
4066 if (rule
->set
== dmsg
->from_set
)
4067 rule
->set
= dmsg
->to_set
;
4068 else if (rule
->set
== dmsg
->to_set
)
4069 rule
->set
= dmsg
->from_set
;
4071 netisr_forwardmsg(&nmsg
->base
, mycpuid
+ 1);
4075 ipfw_alt_swap_ruleset(uint8_t set1
, uint8_t set2
)
4077 struct netmsg_del dmsg
;
4078 struct netmsg_base
*nmsg
;
4082 bzero(&dmsg
, sizeof(dmsg
));
4084 netmsg_init(nmsg
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
4085 ipfw_alt_swap_ruleset_dispatch
);
4086 dmsg
.from_set
= set1
;
4089 netisr_domsg_global(nmsg
);
4094 * Remove all rules with given number, and also do set manipulation.
4096 * The argument is an uint32_t. The low 16 bit are the rule or set number,
4097 * the next 8 bits are the new set, the top 8 bits are the command:
4099 * 0 delete rules with given number
4100 * 1 delete rules with given set number
4101 * 2 move rules with given number to new set
4102 * 3 move rules with given set number to new set
4103 * 4 swap sets with given numbers
4106 ipfw_ctl_alter(uint32_t arg
)
4109 uint8_t cmd
, new_set
;
4114 rulenum
= arg
& 0xffff;
4115 cmd
= (arg
>> 24) & 0xff;
4116 new_set
= (arg
>> 16) & 0xff;
4120 if (new_set
>= IPFW_DEFAULT_SET
)
4122 if (cmd
== 0 || cmd
== 2) {
4123 if (rulenum
== IPFW_DEFAULT_RULE
)
4126 if (rulenum
>= IPFW_DEFAULT_SET
)
4131 case 0: /* delete rules with given number */
4132 error
= ipfw_alt_delete_rule(rulenum
);
4135 case 1: /* delete all rules with given set number */
4136 error
= ipfw_alt_delete_ruleset(rulenum
);
4139 case 2: /* move rules with given number to new set */
4140 error
= ipfw_alt_move_rule(rulenum
, new_set
);
4143 case 3: /* move rules with given set number to new set */
4144 error
= ipfw_alt_move_ruleset(rulenum
, new_set
);
4147 case 4: /* swap two sets */
4148 error
= ipfw_alt_swap_ruleset(rulenum
, new_set
);
4155 * Clear counters for a specific rule.
4158 clear_counters(struct ip_fw
*rule
, int log_only
)
4160 ipfw_insn_log
*l
= (ipfw_insn_log
*)ACTION_PTR(rule
);
4162 if (log_only
== 0) {
4163 rule
->bcnt
= rule
->pcnt
= 0;
4164 rule
->timestamp
= 0;
4166 if (l
->o
.opcode
== O_LOG
)
4167 l
->log_left
= l
->max_log
;
4171 ipfw_zero_entry_dispatch(netmsg_t nmsg
)
4173 struct netmsg_zent
*zmsg
= (struct netmsg_zent
*)nmsg
;
4174 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
4177 ASSERT_NETISR_NCPUS(mycpuid
);
4179 if (zmsg
->rulenum
== 0) {
4180 KKASSERT(zmsg
->start_rule
== NULL
);
4182 ctx
->ipfw_norule_counter
= 0;
4183 for (rule
= ctx
->ipfw_layer3_chain
; rule
; rule
= rule
->next
)
4184 clear_counters(rule
, zmsg
->log_only
);
4186 struct ip_fw
*start
= zmsg
->start_rule
;
4188 KKASSERT(start
->cpuid
== mycpuid
);
4189 KKASSERT(start
->rulenum
== zmsg
->rulenum
);
4192 * We can have multiple rules with the same number, so we
4193 * need to clear them all.
4195 for (rule
= start
; rule
&& rule
->rulenum
== zmsg
->rulenum
;
4197 clear_counters(rule
, zmsg
->log_only
);
4200 * Move to the position on the next CPU
4201 * before the msg is forwarded.
4203 zmsg
->start_rule
= start
->sibling
;
4205 netisr_forwardmsg(&nmsg
->base
, mycpuid
+ 1);
4209 * Reset some or all counters on firewall rules.
4210 * @arg frwl is null to clear all entries, or contains a specific
4212 * @arg log_only is 1 if we only want to reset logs, zero otherwise.
4215 ipfw_ctl_zero_entry(int rulenum
, int log_only
)
4217 struct netmsg_zent zmsg
;
4218 struct netmsg_base
*nmsg
;
4220 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
4224 bzero(&zmsg
, sizeof(zmsg
));
4226 netmsg_init(nmsg
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
4227 ipfw_zero_entry_dispatch
);
4228 zmsg
.log_only
= log_only
;
4231 msg
= log_only
? "ipfw: All logging counts reset.\n"
4232 : "ipfw: Accounting cleared.\n";
4237 * Locate the first rule with 'rulenum'
4239 for (rule
= ctx
->ipfw_layer3_chain
; rule
; rule
= rule
->next
) {
4240 if (rule
->rulenum
== rulenum
)
4243 if (rule
== NULL
) /* we did not find any matching rules */
4245 zmsg
.start_rule
= rule
;
4246 zmsg
.rulenum
= rulenum
;
4248 msg
= log_only
? "ipfw: Entry %d logging count reset.\n"
4249 : "ipfw: Entry %d cleared.\n";
4251 netisr_domsg_global(nmsg
);
4252 KKASSERT(zmsg
.start_rule
== NULL
);
4255 log(LOG_SECURITY
| LOG_NOTICE
, msg
, rulenum
);
4260 * Check validity of the structure before insert.
4261 * Fortunately rules are simple, so this mostly need to check rule sizes.
4264 ipfw_check_ioc_rule(struct ipfw_ioc_rule
*rule
, int size
, uint32_t *rule_flags
)
4267 int have_action
= 0;
4272 /* Check for valid size */
4273 if (size
< sizeof(*rule
)) {
4274 kprintf("ipfw: rule too short\n");
4277 l
= IOC_RULESIZE(rule
);
4279 kprintf("ipfw: size mismatch (have %d want %d)\n", size
, l
);
4283 /* Check rule number */
4284 if (rule
->rulenum
== IPFW_DEFAULT_RULE
) {
4285 kprintf("ipfw: invalid rule number\n");
4290 * Now go for the individual checks. Very simple ones, basically only
4291 * instruction sizes.
4293 for (l
= rule
->cmd_len
, cmd
= rule
->cmd
; l
> 0;
4294 l
-= cmdlen
, cmd
+= cmdlen
) {
4295 cmdlen
= F_LEN(cmd
);
4297 kprintf("ipfw: opcode %d size truncated\n",
4302 DPRINTF("ipfw: opcode %d\n", cmd
->opcode
);
4304 if (cmd
->opcode
== O_KEEP_STATE
|| cmd
->opcode
== O_LIMIT
) {
4305 /* This rule will generate states. */
4306 *rule_flags
|= IPFW_RULE_F_GENSTATE
;
4307 if (cmd
->opcode
== O_LIMIT
)
4308 *rule_flags
|= IPFW_RULE_F_GENTRACK
;
4311 switch (cmd
->opcode
) {
4325 case O_IPPRECEDENCE
:
4332 if (cmdlen
!= F_INSN_SIZE(ipfw_insn
))
4344 if (cmdlen
!= F_INSN_SIZE(ipfw_insn_u32
))
4349 if (cmdlen
!= F_INSN_SIZE(ipfw_insn_limit
))
4354 if (cmdlen
!= F_INSN_SIZE(ipfw_insn_log
))
4357 ((ipfw_insn_log
*)cmd
)->log_left
=
4358 ((ipfw_insn_log
*)cmd
)->max_log
;
4364 if (cmdlen
!= F_INSN_SIZE(ipfw_insn_ip
))
4366 if (((ipfw_insn_ip
*)cmd
)->mask
.s_addr
== 0) {
4367 kprintf("ipfw: opcode %d, useless rule\n",
4375 if (cmd
->arg1
== 0 || cmd
->arg1
> 256) {
4376 kprintf("ipfw: invalid set size %d\n",
4380 if (cmdlen
!= F_INSN_SIZE(ipfw_insn_u32
) +
4386 if (cmdlen
!= F_INSN_SIZE(ipfw_insn_mac
))
4392 case O_IP_DSTPORT
: /* XXX artificial limit, 30 port pairs */
4393 if (cmdlen
< 2 || cmdlen
> 31)
4400 if (cmdlen
!= F_INSN_SIZE(ipfw_insn_if
))
4406 if (cmdlen
!= F_INSN_SIZE(ipfw_insn_pipe
))
4411 if (cmdlen
!= F_INSN_SIZE(ipfw_insn_sa
)) {
4416 fwd_addr
= ((ipfw_insn_sa
*)cmd
)->
4418 if (IN_MULTICAST(ntohl(fwd_addr
))) {
4419 kprintf("ipfw: try forwarding to "
4420 "multicast address\n");
4426 case O_FORWARD_MAC
: /* XXX not implemented yet */
4435 if (cmdlen
!= F_INSN_SIZE(ipfw_insn
))
4439 kprintf("ipfw: opcode %d, multiple actions"
4446 kprintf("ipfw: opcode %d, action must be"
4453 kprintf("ipfw: opcode %d, unknown opcode\n",
4458 if (have_action
== 0) {
4459 kprintf("ipfw: missing action\n");
4465 kprintf("ipfw: opcode %d size %d wrong\n",
4466 cmd
->opcode
, cmdlen
);
4471 ipfw_ctl_add_rule(struct sockopt
*sopt
)
4473 struct ipfw_ioc_rule
*ioc_rule
;
4475 uint32_t rule_flags
;
4480 size
= sopt
->sopt_valsize
;
4481 if (size
> (sizeof(uint32_t) * IPFW_RULE_SIZE_MAX
) ||
4482 size
< sizeof(*ioc_rule
)) {
4485 if (size
!= (sizeof(uint32_t) * IPFW_RULE_SIZE_MAX
)) {
4486 sopt
->sopt_val
= krealloc(sopt
->sopt_val
, sizeof(uint32_t) *
4487 IPFW_RULE_SIZE_MAX
, M_TEMP
, M_WAITOK
);
4489 ioc_rule
= sopt
->sopt_val
;
4491 error
= ipfw_check_ioc_rule(ioc_rule
, size
, &rule_flags
);
4495 ipfw_add_rule(ioc_rule
, rule_flags
);
4497 if (sopt
->sopt_dir
== SOPT_GET
)
4498 sopt
->sopt_valsize
= IOC_RULESIZE(ioc_rule
);
4503 ipfw_copy_rule(const struct ipfw_context
*ctx
, const struct ip_fw
*rule
,
4504 struct ipfw_ioc_rule
*ioc_rule
)
4506 const struct ip_fw
*sibling
;
4512 KASSERT(rule
->cpuid
== 0, ("rule does not belong to cpu0"));
4514 ioc_rule
->act_ofs
= rule
->act_ofs
;
4515 ioc_rule
->cmd_len
= rule
->cmd_len
;
4516 ioc_rule
->rulenum
= rule
->rulenum
;
4517 ioc_rule
->set
= rule
->set
;
4518 ioc_rule
->usr_flags
= rule
->usr_flags
;
4520 ioc_rule
->set_disable
= ctx
->ipfw_set_disable
;
4521 ioc_rule
->static_count
= static_count
;
4522 ioc_rule
->static_len
= static_ioc_len
;
4525 * Visit (read-only) all of the rule's duplications to get
4526 * the necessary statistics
4533 ioc_rule
->timestamp
= 0;
4534 for (sibling
= rule
; sibling
!= NULL
; sibling
= sibling
->sibling
) {
4535 ioc_rule
->pcnt
+= sibling
->pcnt
;
4536 ioc_rule
->bcnt
+= sibling
->bcnt
;
4537 if (sibling
->timestamp
> ioc_rule
->timestamp
)
4538 ioc_rule
->timestamp
= sibling
->timestamp
;
4543 KASSERT(i
== netisr_ncpus
,
4544 ("static rule is not duplicated on netisr_ncpus %d", netisr_ncpus
));
4546 bcopy(rule
->cmd
, ioc_rule
->cmd
, ioc_rule
->cmd_len
* 4 /* XXX */);
4548 return ((uint8_t *)ioc_rule
+ IOC_RULESIZE(ioc_rule
));
4552 ipfw_track_copy(const struct ipfw_trkcnt
*trk
, struct ipfw_ioc_state
*ioc_state
)
4554 struct ipfw_ioc_flowid
*ioc_id
;
4556 if (trk
->tc_expire
== 0) {
4557 /* Not a scanned one. */
4561 ioc_state
->expire
= TIME_LEQ(trk
->tc_expire
, time_uptime
) ?
4562 0 : trk
->tc_expire
- time_uptime
;
4563 ioc_state
->pcnt
= 0;
4564 ioc_state
->bcnt
= 0;
4566 ioc_state
->dyn_type
= O_LIMIT_PARENT
;
4567 ioc_state
->count
= trk
->tc_count
;
4569 ioc_state
->rulenum
= trk
->tc_rulenum
;
4571 ioc_id
= &ioc_state
->id
;
4572 ioc_id
->type
= ETHERTYPE_IP
;
4573 ioc_id
->u
.ip
.proto
= trk
->tc_proto
;
4574 ioc_id
->u
.ip
.src_ip
= trk
->tc_saddr
;
4575 ioc_id
->u
.ip
.dst_ip
= trk
->tc_daddr
;
4576 ioc_id
->u
.ip
.src_port
= trk
->tc_sport
;
4577 ioc_id
->u
.ip
.dst_port
= trk
->tc_dport
;
4583 ipfw_state_copy(const struct ipfw_state
*s
, struct ipfw_ioc_state
*ioc_state
)
4585 struct ipfw_ioc_flowid
*ioc_id
;
4587 if (s
->st_type
== O_ANCHOR
)
4590 ioc_state
->expire
= TIME_LEQ(s
->st_expire
, time_uptime
) ?
4591 0 : s
->st_expire
- time_uptime
;
4592 ioc_state
->pcnt
= s
->st_pcnt
;
4593 ioc_state
->bcnt
= s
->st_bcnt
;
4595 ioc_state
->dyn_type
= s
->st_type
;
4596 ioc_state
->count
= 0;
4598 ioc_state
->rulenum
= s
->st_rule
->rulenum
;
4600 ioc_id
= &ioc_state
->id
;
4601 ioc_id
->type
= ETHERTYPE_IP
;
4602 ioc_id
->u
.ip
.proto
= s
->st_proto
;
4603 ipfw_key_4tuple(&s
->st_key
,
4604 &ioc_id
->u
.ip
.src_ip
, &ioc_id
->u
.ip
.src_port
,
4605 &ioc_id
->u
.ip
.dst_ip
, &ioc_id
->u
.ip
.dst_port
);
4611 ipfw_state_copy_dispatch(netmsg_t nmsg
)
4613 struct netmsg_cpstate
*nm
= (struct netmsg_cpstate
*)nmsg
;
4614 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
4615 const struct ipfw_state
*s
;
4616 const struct ipfw_track
*t
;
4618 ASSERT_NETISR_NCPUS(mycpuid
);
4619 KASSERT(nm
->state_cnt
< nm
->state_cntmax
,
4620 ("invalid state count %d, max %d",
4621 nm
->state_cnt
, nm
->state_cntmax
));
4623 TAILQ_FOREACH(s
, &ctx
->ipfw_state_list
, st_link
) {
4624 if (ipfw_state_copy(s
, nm
->ioc_state
)) {
4627 if (nm
->state_cnt
== nm
->state_cntmax
)
4633 * Prepare tracks in the global track tree for userland.
4635 TAILQ_FOREACH(t
, &ctx
->ipfw_track_list
, t_link
) {
4636 struct ipfw_trkcnt
*trk
;
4638 if (t
->t_count
== NULL
) /* anchor */
4643 * Only one netisr can run this function at
4644 * any time, and only this function accesses
4645 * trkcnt's tc_expire, so this is safe w/o
4646 * ipfw_gd.ipfw_trkcnt_token.
4648 if (trk
->tc_expire
> t
->t_expire
)
4650 trk
->tc_expire
= t
->t_expire
;
4654 * Copy tracks in the global track tree to userland in
4657 if (mycpuid
== netisr_ncpus
- 1) {
4658 struct ipfw_trkcnt
*trk
;
4660 KASSERT(nm
->state_cnt
< nm
->state_cntmax
,
4661 ("invalid state count %d, max %d",
4662 nm
->state_cnt
, nm
->state_cntmax
));
4665 RB_FOREACH(trk
, ipfw_trkcnt_tree
, &ipfw_gd
.ipfw_trkcnt_tree
) {
4666 if (ipfw_track_copy(trk
, nm
->ioc_state
)) {
4669 if (nm
->state_cnt
== nm
->state_cntmax
) {
4678 if (nm
->state_cnt
== nm
->state_cntmax
) {
4679 /* No more space; done. */
4680 netisr_replymsg(&nm
->base
, 0);
4682 netisr_forwardmsg(&nm
->base
, mycpuid
+ 1);
4687 ipfw_ctl_get_rules(struct sockopt
*sopt
)
4689 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
4698 * pass up a copy of the current rules. Static rules
4699 * come first (the last of which has number IPFW_DEFAULT_RULE),
4700 * followed by a possibly empty list of states.
4703 size
= static_ioc_len
; /* size of static rules */
4706 * Size of the states.
4707 * XXX take tracks as state for userland compat.
4709 state_cnt
= ipfw_state_cntcoll() + ipfw_gd
.ipfw_trkcnt_cnt
;
4710 state_cnt
= (state_cnt
* 5) / 4; /* leave 25% headroom */
4711 size
+= state_cnt
* sizeof(struct ipfw_ioc_state
);
4713 if (sopt
->sopt_valsize
< size
) {
4714 /* short length, no need to return incomplete rules */
4715 /* XXX: if superuser, no need to zero buffer */
4716 bzero(sopt
->sopt_val
, sopt
->sopt_valsize
);
4719 bp
= sopt
->sopt_val
;
4721 for (rule
= ctx
->ipfw_layer3_chain
; rule
; rule
= rule
->next
)
4722 bp
= ipfw_copy_rule(ctx
, rule
, bp
);
4725 struct netmsg_cpstate nm
;
4727 size_t old_size
= size
;
4730 netmsg_init(&nm
.base
, NULL
, &curthread
->td_msgport
,
4731 MSGF_PRIORITY
, ipfw_state_copy_dispatch
);
4733 nm
.state_cntmax
= state_cnt
;
4735 netisr_domsg_global(&nm
.base
);
4738 * The # of states may be shrinked after the snapshot
4739 * of the state count was taken. To give user a correct
4740 * state count, nm->state_cnt is used to recalculate
4743 size
= static_ioc_len
+
4744 (nm
.state_cnt
* sizeof(struct ipfw_ioc_state
));
4745 KKASSERT(size
<= old_size
);
4748 sopt
->sopt_valsize
= size
;
4753 ipfw_set_disable_dispatch(netmsg_t nmsg
)
4755 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
4757 ASSERT_NETISR_NCPUS(mycpuid
);
4759 ctx
->ipfw_set_disable
= nmsg
->lmsg
.u
.ms_result32
;
4760 netisr_forwardmsg(&nmsg
->base
, mycpuid
+ 1);
4764 ipfw_ctl_set_disable(uint32_t disable
, uint32_t enable
)
4766 struct netmsg_base nmsg
;
4767 uint32_t set_disable
;
4771 /* IPFW_DEFAULT_SET is always enabled */
4772 enable
|= (1 << IPFW_DEFAULT_SET
);
4773 set_disable
= (ipfw_ctx
[mycpuid
]->ipfw_set_disable
| disable
) & ~enable
;
4775 bzero(&nmsg
, sizeof(nmsg
));
4776 netmsg_init(&nmsg
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
4777 ipfw_set_disable_dispatch
);
4778 nmsg
.lmsg
.u
.ms_result32
= set_disable
;
4780 netisr_domsg_global(&nmsg
);
4784 * {set|get}sockopt parser.
4787 ipfw_ctl(struct sockopt
*sopt
)
4797 switch (sopt
->sopt_name
) {
4799 error
= ipfw_ctl_get_rules(sopt
);
4803 ipfw_flush(0 /* keep default rule */);
4807 error
= ipfw_ctl_add_rule(sopt
);
4812 * IP_FW_DEL is used for deleting single rules or sets,
4813 * and (ab)used to atomically manipulate sets.
4814 * Argument size is used to distinguish between the two:
4816 * delete single rule or set of rules,
4817 * or reassign rules (or sets) to a different set.
4818 * 2 * sizeof(uint32_t)
4819 * atomic disable/enable sets.
4820 * first uint32_t contains sets to be disabled,
4821 * second uint32_t contains sets to be enabled.
4823 masks
= sopt
->sopt_val
;
4824 size
= sopt
->sopt_valsize
;
4825 if (size
== sizeof(*masks
)) {
4827 * Delete or reassign static rule
4829 error
= ipfw_ctl_alter(masks
[0]);
4830 } else if (size
== (2 * sizeof(*masks
))) {
4832 * Set enable/disable
4834 ipfw_ctl_set_disable(masks
[0], masks
[1]);
4841 case IP_FW_RESETLOG
: /* argument is an int, the rule number */
4844 if (sopt
->sopt_val
!= 0) {
4845 error
= soopt_to_kbuf(sopt
, &rulenum
,
4846 sizeof(int), sizeof(int));
4850 error
= ipfw_ctl_zero_entry(rulenum
,
4851 sopt
->sopt_name
== IP_FW_RESETLOG
);
4855 kprintf("ipfw_ctl invalid option %d\n", sopt
->sopt_name
);
4862 ipfw_keepalive_done(struct ipfw_context
*ctx
)
4865 KASSERT(ctx
->ipfw_flags
& IPFW_FLAG_KEEPALIVE
,
4866 ("keepalive is not in progress"));
4867 ctx
->ipfw_flags
&= ~IPFW_FLAG_KEEPALIVE
;
4868 callout_reset(&ctx
->ipfw_keepalive_ch
, dyn_keepalive_period
* hz
,
4869 ipfw_keepalive
, NULL
);
4873 ipfw_keepalive_more(struct ipfw_context
*ctx
)
4875 struct netmsg_base
*nm
= &ctx
->ipfw_keepalive_more
;
4877 KASSERT(ctx
->ipfw_flags
& IPFW_FLAG_KEEPALIVE
,
4878 ("keepalive is not in progress"));
4879 KASSERT(nm
->lmsg
.ms_flags
& MSGF_DONE
,
4880 ("keepalive more did not finish"));
4881 netisr_sendmsg_oncpu(nm
);
4885 ipfw_keepalive_loop(struct ipfw_context
*ctx
, struct ipfw_state
*anchor
)
4887 struct ipfw_state
*s
;
4888 int scanned
= 0, expired
= 0, kept
= 0;
4890 KASSERT(ctx
->ipfw_flags
& IPFW_FLAG_KEEPALIVE
,
4891 ("keepalive is not in progress"));
4893 while ((s
= TAILQ_NEXT(anchor
, st_link
)) != NULL
) {
4894 uint32_t ack_rev
, ack_fwd
;
4895 struct ipfw_flow_id id
;
4897 if (scanned
++ >= ipfw_state_scan_max
) {
4898 ipfw_keepalive_more(ctx
);
4902 TAILQ_REMOVE(&ctx
->ipfw_state_list
, anchor
, st_link
);
4903 TAILQ_INSERT_AFTER(&ctx
->ipfw_state_list
, s
, anchor
, st_link
);
4905 if (s
->st_type
== O_ANCHOR
)
4908 if (TIME_LEQ(s
->st_expire
, time_uptime
)) {
4909 /* State expired. */
4910 ipfw_state_del(ctx
, s
);
4911 if (++expired
>= ipfw_state_expire_max
) {
4912 ipfw_keepalive_more(ctx
);
4919 * Keep alive processing
4922 if (s
->st_proto
!= IPPROTO_TCP
)
4924 if ((s
->st_state
& IPFW_STATE_TCPSTATES
) != BOTH_SYN
)
4926 if (TIME_LEQ(time_uptime
+ dyn_keepalive_interval
,
4928 continue; /* too early */
4930 ipfw_key_4tuple(&s
->st_key
, &id
.src_ip
, &id
.src_port
,
4931 &id
.dst_ip
, &id
.dst_port
);
4932 ack_rev
= s
->st_ack_rev
;
4933 ack_fwd
= s
->st_ack_fwd
;
4935 send_pkt(&id
, ack_rev
- 1, ack_fwd
, TH_SYN
);
4936 send_pkt(&id
, ack_fwd
- 1, ack_rev
, 0);
4938 if (++kept
>= ipfw_keepalive_max
) {
4939 ipfw_keepalive_more(ctx
);
4943 TAILQ_REMOVE(&ctx
->ipfw_state_list
, anchor
, st_link
);
4944 ipfw_keepalive_done(ctx
);
4948 ipfw_keepalive_more_dispatch(netmsg_t nm
)
4950 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
4951 struct ipfw_state
*anchor
;
4953 ASSERT_NETISR_NCPUS(mycpuid
);
4954 KASSERT(ctx
->ipfw_flags
& IPFW_FLAG_KEEPALIVE
,
4955 ("keepalive is not in progress"));
4958 netisr_replymsg(&nm
->base
, 0);
4960 anchor
= &ctx
->ipfw_keepalive_anch
;
4961 if (!dyn_keepalive
|| ctx
->ipfw_state_cnt
== 0) {
4962 TAILQ_REMOVE(&ctx
->ipfw_state_list
, anchor
, st_link
);
4963 ipfw_keepalive_done(ctx
);
4966 ipfw_keepalive_loop(ctx
, anchor
);
4970 * This procedure is only used to handle keepalives. It is invoked
4971 * every dyn_keepalive_period
4974 ipfw_keepalive_dispatch(netmsg_t nm
)
4976 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
4977 struct ipfw_state
*anchor
;
4979 ASSERT_NETISR_NCPUS(mycpuid
);
4980 KASSERT((ctx
->ipfw_flags
& IPFW_FLAG_KEEPALIVE
) == 0,
4981 ("keepalive is in progress"));
4982 ctx
->ipfw_flags
|= IPFW_FLAG_KEEPALIVE
;
4986 netisr_replymsg(&nm
->base
, 0);
4989 if (!dyn_keepalive
|| ctx
->ipfw_state_cnt
== 0) {
4990 ipfw_keepalive_done(ctx
);
4994 anchor
= &ctx
->ipfw_keepalive_anch
;
4995 TAILQ_INSERT_HEAD(&ctx
->ipfw_state_list
, anchor
, st_link
);
4996 ipfw_keepalive_loop(ctx
, anchor
);
5000 * This procedure is only used to handle keepalives. It is invoked
5001 * every dyn_keepalive_period
5004 ipfw_keepalive(void *dummy __unused
)
5006 struct netmsg_base
*msg
;
5008 KKASSERT(mycpuid
< netisr_ncpus
);
5009 msg
= &ipfw_ctx
[mycpuid
]->ipfw_keepalive_nm
;
5012 if (msg
->lmsg
.ms_flags
& MSGF_DONE
)
5013 netisr_sendmsg_oncpu(msg
);
5018 ipfw_check_in(void *arg
, struct mbuf
**m0
, struct ifnet
*ifp
, int dir
)
5020 struct ip_fw_args args
;
5021 struct mbuf
*m
= *m0
;
5023 int tee
= 0, error
= 0, ret
;
5025 if (m
->m_pkthdr
.fw_flags
& DUMMYNET_MBUF_TAGGED
) {
5026 /* Extract info from dummynet tag */
5027 mtag
= m_tag_find(m
, PACKET_TAG_DUMMYNET
, NULL
);
5028 KKASSERT(mtag
!= NULL
);
5029 args
.rule
= ((struct dn_pkt
*)m_tag_data(mtag
))->dn_priv
;
5030 KKASSERT(args
.rule
!= NULL
);
5032 m_tag_delete(m
, mtag
);
5033 m
->m_pkthdr
.fw_flags
&= ~DUMMYNET_MBUF_TAGGED
;
5041 ret
= ipfw_chk(&args
);
5059 case IP_FW_DUMMYNET
:
5060 /* Send packet to the appropriate pipe */
5061 ipfw_dummynet_io(m
, args
.cookie
, DN_TO_IP_IN
, &args
);
5070 * Must clear bridge tag when changing
5072 m
->m_pkthdr
.fw_flags
&= ~BRIDGE_MBUF_TAGGED
;
5073 if (ip_divert_p
!= NULL
) {
5074 m
= ip_divert_p(m
, tee
, 1);
5078 /* not sure this is the right error msg */
5084 panic("unknown ipfw return value: %d", ret
);
5092 ipfw_check_out(void *arg
, struct mbuf
**m0
, struct ifnet
*ifp
, int dir
)
5094 struct ip_fw_args args
;
5095 struct mbuf
*m
= *m0
;
5097 int tee
= 0, error
= 0, ret
;
5099 if (m
->m_pkthdr
.fw_flags
& DUMMYNET_MBUF_TAGGED
) {
5100 /* Extract info from dummynet tag */
5101 mtag
= m_tag_find(m
, PACKET_TAG_DUMMYNET
, NULL
);
5102 KKASSERT(mtag
!= NULL
);
5103 args
.rule
= ((struct dn_pkt
*)m_tag_data(mtag
))->dn_priv
;
5104 KKASSERT(args
.rule
!= NULL
);
5106 m_tag_delete(m
, mtag
);
5107 m
->m_pkthdr
.fw_flags
&= ~DUMMYNET_MBUF_TAGGED
;
5115 ret
= ipfw_chk(&args
);
5133 case IP_FW_DUMMYNET
:
5134 ipfw_dummynet_io(m
, args
.cookie
, DN_TO_IP_OUT
, &args
);
5142 if (ip_divert_p
!= NULL
) {
5143 m
= ip_divert_p(m
, tee
, 0);
5147 /* not sure this is the right error msg */
5153 panic("unknown ipfw return value: %d", ret
);
5163 struct pfil_head
*pfh
;
5167 pfh
= pfil_head_get(PFIL_TYPE_AF
, AF_INET
);
5171 pfil_add_hook(ipfw_check_in
, NULL
, PFIL_IN
, pfh
);
5172 pfil_add_hook(ipfw_check_out
, NULL
, PFIL_OUT
, pfh
);
5178 struct pfil_head
*pfh
;
5182 pfh
= pfil_head_get(PFIL_TYPE_AF
, AF_INET
);
5186 pfil_remove_hook(ipfw_check_in
, NULL
, PFIL_IN
, pfh
);
5187 pfil_remove_hook(ipfw_check_out
, NULL
, PFIL_OUT
, pfh
);
5191 ipfw_sysctl_dyncnt(SYSCTL_HANDLER_ARGS
)
5195 dyn_cnt
= ipfw_state_cntcoll();
5196 dyn_cnt
+= ipfw_gd
.ipfw_trkcnt_cnt
;
5198 return (sysctl_handle_int(oidp
, &dyn_cnt
, 0, req
));
5202 ipfw_sysctl_statecnt(SYSCTL_HANDLER_ARGS
)
5206 state_cnt
= ipfw_state_cntcoll();
5207 return (sysctl_handle_int(oidp
, &state_cnt
, 0, req
));
5211 ipfw_sysctl_statemax(SYSCTL_HANDLER_ARGS
)
5213 int state_max
, error
;
5215 state_max
= ipfw_state_max
;
5216 error
= sysctl_handle_int(oidp
, &state_max
, 0, req
);
5217 if (error
|| req
->newptr
== NULL
)
5223 ipfw_state_max_set(state_max
);
5228 ipfw_sysctl_dynmax(SYSCTL_HANDLER_ARGS
)
5232 dyn_max
= ipfw_state_max
+ ipfw_track_max
;
5234 error
= sysctl_handle_int(oidp
, &dyn_max
, 0, req
);
5235 if (error
|| req
->newptr
== NULL
)
5241 ipfw_state_max_set(dyn_max
/ 2);
5242 ipfw_track_max
= dyn_max
/ 2;
5247 ipfw_sysctl_enable_dispatch(netmsg_t nmsg
)
5249 int enable
= nmsg
->lmsg
.u
.ms_result
;
5253 if (fw_enable
== enable
)
5262 netisr_replymsg(&nmsg
->base
, 0);
5266 ipfw_sysctl_enable(SYSCTL_HANDLER_ARGS
)
5268 struct netmsg_base nmsg
;
5272 error
= sysctl_handle_int(oidp
, &enable
, 0, req
);
5273 if (error
|| req
->newptr
== NULL
)
5276 netmsg_init(&nmsg
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
5277 ipfw_sysctl_enable_dispatch
);
5278 nmsg
.lmsg
.u
.ms_result
= enable
;
5280 return netisr_domsg(&nmsg
, 0);
5284 ipfw_sysctl_autoinc_step(SYSCTL_HANDLER_ARGS
)
5286 return sysctl_int_range(oidp
, arg1
, arg2
, req
,
5287 IPFW_AUTOINC_STEP_MIN
, IPFW_AUTOINC_STEP_MAX
);
5291 ipfw_sysctl_scancnt(SYSCTL_HANDLER_ARGS
)
5294 return sysctl_int_range(oidp
, arg1
, arg2
, req
, 1, INT_MAX
);
5298 ipfw_sysctl_stat(SYSCTL_HANDLER_ARGS
)
5303 for (cpu
= 0; cpu
< netisr_ncpus
; ++cpu
)
5304 stat
+= *((u_long
*)((uint8_t *)ipfw_ctx
[cpu
] + arg2
));
5306 error
= sysctl_handle_long(oidp
, &stat
, 0, req
);
5307 if (error
|| req
->newptr
== NULL
)
5310 /* Zero out this stat. */
5311 for (cpu
= 0; cpu
< netisr_ncpus
; ++cpu
)
5312 *((u_long
*)((uint8_t *)ipfw_ctx
[cpu
] + arg2
)) = 0;
5317 ipfw_ctx_init_dispatch(netmsg_t nmsg
)
5319 struct netmsg_ipfw
*fwmsg
= (struct netmsg_ipfw
*)nmsg
;
5320 struct ipfw_context
*ctx
;
5321 struct ip_fw
*def_rule
;
5323 ASSERT_NETISR_NCPUS(mycpuid
);
5325 ctx
= kmalloc(sizeof(*ctx
), M_IPFW
, M_WAITOK
| M_ZERO
);
5327 RB_INIT(&ctx
->ipfw_state_tree
);
5328 TAILQ_INIT(&ctx
->ipfw_state_list
);
5330 RB_INIT(&ctx
->ipfw_track_tree
);
5331 TAILQ_INIT(&ctx
->ipfw_track_list
);
5333 callout_init_mp(&ctx
->ipfw_stateto_ch
);
5334 netmsg_init(&ctx
->ipfw_stateexp_nm
, NULL
, &netisr_adone_rport
,
5335 MSGF_DROPABLE
| MSGF_PRIORITY
, ipfw_state_expire_dispatch
);
5336 ctx
->ipfw_stateexp_anch
.st_type
= O_ANCHOR
;
5337 netmsg_init(&ctx
->ipfw_stateexp_more
, NULL
, &netisr_adone_rport
,
5338 MSGF_DROPABLE
, ipfw_state_expire_more_dispatch
);
5340 callout_init_mp(&ctx
->ipfw_trackto_ch
);
5341 netmsg_init(&ctx
->ipfw_trackexp_nm
, NULL
, &netisr_adone_rport
,
5342 MSGF_DROPABLE
| MSGF_PRIORITY
, ipfw_track_expire_dispatch
);
5343 netmsg_init(&ctx
->ipfw_trackexp_more
, NULL
, &netisr_adone_rport
,
5344 MSGF_DROPABLE
, ipfw_track_expire_more_dispatch
);
5346 callout_init_mp(&ctx
->ipfw_keepalive_ch
);
5347 netmsg_init(&ctx
->ipfw_keepalive_nm
, NULL
, &netisr_adone_rport
,
5348 MSGF_DROPABLE
| MSGF_PRIORITY
, ipfw_keepalive_dispatch
);
5349 ctx
->ipfw_keepalive_anch
.st_type
= O_ANCHOR
;
5350 netmsg_init(&ctx
->ipfw_keepalive_more
, NULL
, &netisr_adone_rport
,
5351 MSGF_DROPABLE
, ipfw_keepalive_more_dispatch
);
5353 ipfw_ctx
[mycpuid
] = ctx
;
5355 def_rule
= kmalloc(sizeof(*def_rule
), M_IPFW
, M_WAITOK
| M_ZERO
);
5357 def_rule
->act_ofs
= 0;
5358 def_rule
->rulenum
= IPFW_DEFAULT_RULE
;
5359 def_rule
->cmd_len
= 1;
5360 def_rule
->set
= IPFW_DEFAULT_SET
;
5362 def_rule
->cmd
[0].len
= 1;
5363 #ifdef IPFIREWALL_DEFAULT_TO_ACCEPT
5364 def_rule
->cmd
[0].opcode
= O_ACCEPT
;
5366 if (filters_default_to_accept
)
5367 def_rule
->cmd
[0].opcode
= O_ACCEPT
;
5369 def_rule
->cmd
[0].opcode
= O_DENY
;
5372 def_rule
->refcnt
= 1;
5373 def_rule
->cpuid
= mycpuid
;
5375 /* Install the default rule */
5376 ctx
->ipfw_default_rule
= def_rule
;
5377 ctx
->ipfw_layer3_chain
= def_rule
;
5379 /* Link rule CPU sibling */
5380 ipfw_link_sibling(fwmsg
, def_rule
);
5382 /* Statistics only need to be updated once */
5384 ipfw_inc_static_count(def_rule
);
5386 netisr_forwardmsg(&nmsg
->base
, mycpuid
+ 1);
5390 ipfw_init_dispatch(netmsg_t nmsg
)
5392 struct netmsg_ipfw fwmsg
;
5398 kprintf("IP firewall already loaded\n");
5403 /* Initialize global track tree. */
5404 RB_INIT(&ipfw_gd
.ipfw_trkcnt_tree
);
5405 IPFW_TRKCNT_TOKINIT
;
5407 ipfw_state_max_set(ipfw_state_max
);
5408 ipfw_state_headroom
= 8 * netisr_ncpus
;
5410 bzero(&fwmsg
, sizeof(fwmsg
));
5411 netmsg_init(&fwmsg
.base
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
5412 ipfw_ctx_init_dispatch
);
5413 netisr_domsg_global(&fwmsg
.base
);
5415 ip_fw_chk_ptr
= ipfw_chk
;
5416 ip_fw_ctl_ptr
= ipfw_ctl
;
5417 ip_fw_dn_io_ptr
= ipfw_dummynet_io
;
5419 kprintf("ipfw2 initialized, default to %s, logging ",
5420 ipfw_ctx
[mycpuid
]->ipfw_default_rule
->cmd
[0].opcode
==
5421 O_ACCEPT
? "accept" : "deny");
5423 #ifdef IPFIREWALL_VERBOSE
5426 #ifdef IPFIREWALL_VERBOSE_LIMIT
5427 verbose_limit
= IPFIREWALL_VERBOSE_LIMIT
;
5429 if (fw_verbose
== 0) {
5430 kprintf("disabled\n");
5431 } else if (verbose_limit
== 0) {
5432 kprintf("unlimited\n");
5434 kprintf("limited to %d packets/entry by default\n",
5439 for (cpu
= 0; cpu
< netisr_ncpus
; ++cpu
) {
5440 callout_reset_bycpu(&ipfw_ctx
[cpu
]->ipfw_stateto_ch
, hz
,
5441 ipfw_state_expire_ipifunc
, NULL
, cpu
);
5442 callout_reset_bycpu(&ipfw_ctx
[cpu
]->ipfw_trackto_ch
, hz
,
5443 ipfw_track_expire_ipifunc
, NULL
, cpu
);
5444 callout_reset_bycpu(&ipfw_ctx
[cpu
]->ipfw_keepalive_ch
, hz
,
5445 ipfw_keepalive
, NULL
, cpu
);
5451 netisr_replymsg(&nmsg
->base
, error
);
5457 struct netmsg_base smsg
;
5459 netmsg_init(&smsg
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
5460 ipfw_init_dispatch
);
5461 return netisr_domsg(&smsg
, 0);
5467 ipfw_ctx_fini_dispatch(netmsg_t nmsg
)
5469 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
5471 ASSERT_NETISR_NCPUS(mycpuid
);
5473 callout_stop_sync(&ctx
->ipfw_stateto_ch
);
5474 callout_stop_sync(&ctx
->ipfw_trackto_ch
);
5475 callout_stop_sync(&ctx
->ipfw_keepalive_ch
);
5478 netisr_dropmsg(&ctx
->ipfw_stateexp_more
);
5479 netisr_dropmsg(&ctx
->ipfw_stateexp_nm
);
5480 netisr_dropmsg(&ctx
->ipfw_trackexp_more
);
5481 netisr_dropmsg(&ctx
->ipfw_trackexp_nm
);
5482 netisr_dropmsg(&ctx
->ipfw_keepalive_more
);
5483 netisr_dropmsg(&ctx
->ipfw_keepalive_nm
);
5486 netisr_forwardmsg(&nmsg
->base
, mycpuid
+ 1);
5490 ipfw_fini_dispatch(netmsg_t nmsg
)
5492 struct netmsg_base nm
;
5497 if (ipfw_gd
.ipfw_refcnt
!= 0) {
5505 /* Synchronize any inflight state/track expire IPIs. */
5506 lwkt_synchronize_ipiqs("ipfwfini");
5508 netmsg_init(&nm
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
5509 ipfw_ctx_fini_dispatch
);
5510 netisr_domsg_global(&nm
);
5512 ip_fw_chk_ptr
= NULL
;
5513 ip_fw_ctl_ptr
= NULL
;
5514 ip_fw_dn_io_ptr
= NULL
;
5515 ipfw_flush(1 /* kill default rule */);
5517 /* Free pre-cpu context */
5518 for (cpu
= 0; cpu
< netisr_ncpus
; ++cpu
)
5519 kfree(ipfw_ctx
[cpu
], M_IPFW
);
5521 kprintf("IP firewall unloaded\n");
5523 netisr_replymsg(&nmsg
->base
, error
);
5529 struct netmsg_base smsg
;
5531 netmsg_init(&smsg
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
5532 ipfw_fini_dispatch
);
5533 return netisr_domsg(&smsg
, 0);
5536 #endif /* KLD_MODULE */
5539 ipfw_modevent(module_t mod
, int type
, void *unused
)
5550 kprintf("ipfw statically compiled, cannot unload\n");
5562 static moduledata_t ipfwmod
= {
5567 DECLARE_MODULE(ipfw
, ipfwmod
, SI_SUB_PROTO_END
, SI_ORDER_ANY
);
5568 MODULE_VERSION(ipfw
, 1);