2 * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * $FreeBSD: src/sys/netinet/ip_fw2.c,v 1.6.2.12 2003/04/08 10:42:32 maxim Exp $
29 * Implement IP packet firewall (new version)
35 #error IPFIREWALL requires INET.
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/malloc.h>
42 #include <sys/kernel.h>
44 #include <sys/socket.h>
45 #include <sys/socketvar.h>
46 #include <sys/sysctl.h>
47 #include <sys/syslog.h>
48 #include <sys/ucred.h>
49 #include <sys/in_cksum.h>
50 #include <sys/limits.h>
55 #include <net/route.h>
57 #include <net/dummynet/ip_dummynet.h>
59 #include <sys/thread2.h>
60 #include <sys/mplock2.h>
61 #include <net/netmsg2.h>
63 #include <netinet/in.h>
64 #include <netinet/in_systm.h>
65 #include <netinet/in_var.h>
66 #include <netinet/in_pcb.h>
67 #include <netinet/ip.h>
68 #include <netinet/ip_var.h>
69 #include <netinet/ip_icmp.h>
70 #include <netinet/tcp.h>
71 #include <netinet/tcp_seq.h>
72 #include <netinet/tcp_timer.h>
73 #include <netinet/tcp_var.h>
74 #include <netinet/tcpip.h>
75 #include <netinet/udp.h>
76 #include <netinet/udp_var.h>
77 #include <netinet/ip_divert.h>
78 #include <netinet/if_ether.h> /* XXX for ETHERTYPE_IP */
80 #include <net/ipfw/ip_fw2.h>
82 #ifdef IPFIREWALL_DEBUG
83 #define DPRINTF(fmt, ...) \
86 kprintf(fmt, __VA_ARGS__); \
89 #define DPRINTF(fmt, ...) ((void)0)
93 * Description about per-CPU rule duplication:
95 * Module loading/unloading and all ioctl operations are serialized
96 * by netisr0, so we don't have any ordering or locking problems.
98 * Following graph shows how operation on per-CPU rule list is
99 * performed [2 CPU case]:
103 * netisr0 <------------------------------------+
109 * forwardmsg---------->netisr1 |
114 * replymsg--------------+
118 * Rule structure [2 CPU case]
122 * layer3_chain layer3_chain
125 * +-------+ sibling +-------+ sibling
126 * | rule1 |--------->| rule1 |--------->NULL
127 * +-------+ +-------+
131 * +-------+ sibling +-------+ sibling
132 * | rule2 |--------->| rule2 |--------->NULL
133 * +-------+ +-------+
136 * 1) Ease statistics calculation during IP_FW_GET. We only need to
137 * iterate layer3_chain in netisr0; the current rule's duplication
138 * to the other CPUs could safely be read-only accessed through
140 * 2) Accelerate rule insertion and deletion, e.g. rule insertion:
141 * a) In netisr0 rule3 is determined to be inserted between rule1
142 * and rule2. To make this decision we need to iterate the
143 * layer3_chain in netisr0. The netmsg, which is used to insert
144 * the rule, will contain rule1 in netisr0 as prev_rule and rule2
145 * in netisr0 as next_rule.
146 * b) After the insertion in netisr0 is done, we will move on to
147 * netisr1. But instead of relocating the rule3's position in
148 * netisr1 by iterating the layer3_chain in netisr1, we set the
149 * netmsg's prev_rule to rule1->sibling and next_rule to
150 * rule2->sibling before the netmsg is forwarded to netisr1 from
155 * Description of states and tracks.
157 * Both states and tracks are stored in per-cpu RB trees instead of
158 * per-cpu hash tables to avoid the worst case hash degeneration.
160 * The lifetimes of states and tracks are regulated by dyn_*_lifetime,
161 * measured in seconds and depending on the flags.
163 * When a packet is received, its address fields are first masked with
164 * the mask defined for the rule, then matched against the entries in
165 * the per-cpu state RB tree. States are generated by 'keep-state'
166 * and 'limit' options.
168 * The max number of states is ipfw_state_max. When we reach the
169 * maximum number of states we do not create anymore. This is done to
170 * avoid consuming too much memory, but also too much time when
171 * searching on each packet.
173 * Each state holds a pointer to the parent ipfw rule of the current
174 * CPU so we know what action to perform. States are removed when the
175 * parent rule is deleted. XXX we should make them survive.
177 * There are some limitations with states -- we do not obey the
178 * 'randomized match', and we do not do multiple passes through the
179 * firewall. XXX check the latter!!!
181 * States grow independently on each CPU, e.g. 2 CPU case:
184 * ................... ...................
185 * : state RB tree : : state RB tree :
187 * : state1 state2 : : state3 :
189 * :.....|....|......: :........|........:
194 * +-------+ +-------+
195 * | rule1 | | rule1 |
196 * +-------+ +-------+
198 * Tracks are used to enforce limits on the number of sessions. Tracks
199 * are generated by 'limit' option.
201 * The max number of tracks is ipfw_track_max. When we reach the
202 * maximum number of tracks we do not create anymore. This is done to
203 * avoid consuming too much memory.
205 * Tracks are organized into two layers, track counter RB tree is
206 * shared between CPUs, track RB tree is per-cpu. States generated by
207 * 'limit' option are linked to the track in addition to the per-cpu
208 * state RB tree; mainly to ease expiration. e.g. 2 CPU case:
210 * ..............................
211 * : track counter RB tree :
216 * : +--->counter<----+ :
218 * : | +-----------+ | :
219 * :......|................|....:
222 * ................. |t_count | .................
223 * : track RB tree : | | : track RB tree :
225 * : +-->track1-------+ +--------track2 :
228 * :.|.....|.......: :...............:
229 * | +----------------+
230 * | .................... |
231 * | : state RB tree : |st_track
233 * +---state1 state2---+
235 * :.....|.......|....:
244 #define IPFW_AUTOINC_STEP_MIN 1
245 #define IPFW_AUTOINC_STEP_MAX 1000
246 #define IPFW_AUTOINC_STEP_DEF 100
248 #define IPFW_TABLE_MAX_DEF 64
250 #define IPFW_DEFAULT_RULE 65535 /* rulenum for the default rule */
251 #define IPFW_DEFAULT_SET 31 /* set number for the default rule */
253 #define MATCH_REVERSE 0
254 #define MATCH_FORWARD 1
256 #define MATCH_UNKNOWN 3
258 #define TIME_LEQ(a, b) ((a) - (b) <= 0)
260 #define IPFW_STATE_TCPFLAGS (TH_SYN | TH_FIN | TH_RST)
261 #define IPFW_STATE_TCPSTATES (IPFW_STATE_TCPFLAGS | \
262 (IPFW_STATE_TCPFLAGS << 8))
264 #define BOTH_SYN (TH_SYN | (TH_SYN << 8))
265 #define BOTH_FIN (TH_FIN | (TH_FIN << 8))
266 #define BOTH_RST (TH_RST | (TH_RST << 8))
267 /* TH_ACK here means FIN was ACKed. */
268 #define BOTH_FINACK (TH_ACK | (TH_ACK << 8))
270 #define IPFW_STATE_TCPCLOSED(s) ((s)->st_proto == IPPROTO_TCP && \
271 (((s)->st_state & BOTH_RST) || \
272 ((s)->st_state & BOTH_FINACK) == BOTH_FINACK))
274 #define O_ANCHOR O_NOP
276 #define IPFW_ISXLAT(type) ((type) == O_REDIRECT)
277 #define IPFW_XLAT_INVALID(s) (IPFW_ISXLAT((s)->st_type) && \
278 ((struct ipfw_xlat *)(s))->xlat_invalid)
280 #define IPFW_MBUF_XLATINS FW_MBUF_PRIVATE1
281 #define IPFW_MBUF_XLATFWD FW_MBUF_PRIVATE2
283 #define IPFW_XLATE_INSERT 0x0001
284 #define IPFW_XLATE_FORWARD 0x0002
285 #define IPFW_XLATE_OUTPUT 0x0004
288 struct netmsg_base base
;
289 const struct ipfw_ioc_rule
*ioc_rule
;
290 struct ip_fw
*next_rule
;
291 struct ip_fw
*prev_rule
;
292 struct ip_fw
*sibling
;
294 struct ip_fw
**cross_rules
;
298 struct netmsg_base base
;
299 struct ip_fw
*start_rule
;
300 struct ip_fw
*prev_rule
;
307 struct netmsg_base base
;
308 struct ip_fw
*start_rule
;
313 struct netmsg_cpstate
{
314 struct netmsg_base base
;
315 struct ipfw_ioc_state
*ioc_state
;
320 struct netmsg_tblent
{
321 struct netmsg_base base
;
322 struct sockaddr
*key
;
323 struct sockaddr
*netmask
;
324 struct ipfw_tblent
*sibling
;
328 struct netmsg_tblflush
{
329 struct netmsg_base base
;
334 struct netmsg_tblexp
{
335 struct netmsg_base base
;
340 struct radix_node_head
*rnh
;
343 struct ipfw_table_cp
{
344 struct ipfw_ioc_tblent
*te
;
351 * offset The offset of a fragment. offset != 0 means that
352 * we have a fragment at this offset of an IPv4 packet.
353 * offset == 0 means that (if this is an IPv4 packet)
354 * this is the first or only fragment.
359 * Local copies of addresses. They are only valid if we have
362 * proto The protocol. Set to 0 for non-ip packets,
363 * or to the protocol read from the packet otherwise.
364 * proto != 0 means that we have an IPv4 packet.
366 * src_port, dst_port port numbers, in HOST format. Only
367 * valid for TCP and UDP packets.
369 * src_ip, dst_ip ip addresses, in NETWORK format.
370 * Only valid for IPv4 packets.
373 uint16_t src_port
; /* NOTE: host format */
374 uint16_t dst_port
; /* NOTE: host format */
375 struct in_addr src_ip
; /* NOTE: network format */
376 struct in_addr dst_ip
; /* NOTE: network format */
382 uint32_t addr1
; /* host byte order */
383 uint32_t addr2
; /* host byte order */
387 uint16_t port1
; /* host byte order */
388 uint16_t port2
; /* host byte order */
393 struct ipfw_addrs addrs
;
397 struct ipfw_ports ports
;
401 uint8_t swap
; /* IPFW_KEY_SWAP_ */
405 #define IPFW_KEY_SWAP_ADDRS 0x1
406 #define IPFW_KEY_SWAP_PORTS 0x2
407 #define IPFW_KEY_SWAP_ALL (IPFW_KEY_SWAP_ADDRS | IPFW_KEY_SWAP_PORTS)
410 RB_ENTRY(ipfw_trkcnt
) tc_rblink
;
411 struct ipfw_key tc_key
;
415 time_t tc_expire
; /* userland get-only */
416 uint16_t tc_rulenum
; /* userland get-only */
419 #define tc_addrs tc_key.addr_u.value
420 #define tc_ports tc_key.port_u.value
421 #define tc_proto tc_key.proto
422 #define tc_saddr tc_key.addr_u.addrs.addr1
423 #define tc_daddr tc_key.addr_u.addrs.addr2
424 #define tc_sport tc_key.port_u.ports.port1
425 #define tc_dport tc_key.port_u.ports.port2
427 RB_HEAD(ipfw_trkcnt_tree
, ipfw_trkcnt
);
432 RB_ENTRY(ipfw_track
) t_rblink
;
433 struct ipfw_key t_key
;
434 struct ip_fw
*t_rule
;
436 LIST_HEAD(, ipfw_state
) t_state_list
;
438 volatile int *t_count
;
439 struct ipfw_trkcnt
*t_trkcnt
;
440 TAILQ_ENTRY(ipfw_track
) t_link
;
443 #define t_addrs t_key.addr_u.value
444 #define t_ports t_key.port_u.value
445 #define t_proto t_key.proto
446 #define t_saddr t_key.addr_u.addrs.addr1
447 #define t_daddr t_key.addr_u.addrs.addr2
448 #define t_sport t_key.port_u.ports.port1
449 #define t_dport t_key.port_u.ports.port2
451 RB_HEAD(ipfw_track_tree
, ipfw_track
);
452 TAILQ_HEAD(ipfw_track_list
, ipfw_track
);
455 RB_ENTRY(ipfw_state
) st_rblink
;
456 struct ipfw_key st_key
;
458 time_t st_expire
; /* expire time */
459 struct ip_fw
*st_rule
;
461 uint64_t st_pcnt
; /* packets */
462 uint64_t st_bcnt
; /* bytes */
466 * State of this rule, typically a combination of TCP flags.
468 * st_ack_fwd/st_ack_rev:
469 * Most recent ACKs in forward and reverse direction. They
470 * are used to generate keepalives.
473 uint32_t st_ack_fwd
; /* host byte order */
474 uint32_t st_seq_fwd
; /* host byte order */
475 uint32_t st_ack_rev
; /* host byte order */
476 uint32_t st_seq_rev
; /* host byte order */
478 uint16_t st_flags
; /* IPFW_STATE_F_ */
479 uint16_t st_type
; /* KEEP_STATE/LIMIT/RDR */
480 struct ipfw_track
*st_track
;
482 LIST_ENTRY(ipfw_state
) st_trklink
;
483 TAILQ_ENTRY(ipfw_state
) st_link
;
486 #define st_addrs st_key.addr_u.value
487 #define st_ports st_key.port_u.value
488 #define st_proto st_key.proto
489 #define st_swap st_key.swap
491 #define IPFW_STATE_F_ACKFWD 0x0001
492 #define IPFW_STATE_F_SEQFWD 0x0002
493 #define IPFW_STATE_F_ACKREV 0x0004
494 #define IPFW_STATE_F_SEQREV 0x0008
495 #define IPFW_STATE_F_XLATSRC 0x0010
496 #define IPFW_STATE_F_XLATSLAVE 0x0020
497 #define IPFW_STATE_F_LINKED 0x0040
499 #define IPFW_STATE_SCANSKIP(s) ((s)->st_type == O_ANCHOR || \
500 ((s)->st_flags & IPFW_STATE_F_XLATSLAVE))
502 /* Expired or being deleted. */
503 #define IPFW_STATE_ISDEAD(s) (TIME_LEQ((s)->st_expire, time_uptime) || \
504 IPFW_XLAT_INVALID((s)))
506 TAILQ_HEAD(ipfw_state_list
, ipfw_state
);
507 RB_HEAD(ipfw_state_tree
, ipfw_state
);
510 struct ipfw_state xlat_st
; /* MUST be the first field */
511 uint32_t xlat_addr
; /* network byte order */
512 uint16_t xlat_port
; /* network byte order */
513 uint16_t xlat_dir
; /* MATCH_ */
514 struct ifnet
*xlat_ifp
; /* matching ifnet */
515 struct ipfw_xlat
*xlat_pair
; /* paired state */
516 int xlat_pcpu
; /* paired cpu */
517 volatile int xlat_invalid
; /* invalid, but not dtor yet */
518 volatile uint64_t xlat_crefs
; /* cross references */
519 struct netmsg_base xlat_freenm
; /* for remote free */
522 #define xlat_type xlat_st.st_type
523 #define xlat_flags xlat_st.st_flags
524 #define xlat_rule xlat_st.st_rule
525 #define xlat_bcnt xlat_st.st_bcnt
526 #define xlat_pcnt xlat_st.st_pcnt
529 struct radix_node te_nodes
[2];
530 struct sockaddr_in te_key
;
533 struct ipfw_tblent
*te_sibling
;
534 volatile int te_expired
;
537 struct ipfw_context
{
538 struct ip_fw
*ipfw_layer3_chain
; /* rules for layer3 */
539 struct ip_fw
*ipfw_default_rule
; /* default rule */
540 uint64_t ipfw_norule_counter
; /* ipfw_log(NULL) stat*/
543 * ipfw_set_disable contains one bit per set value (0..31).
544 * If the bit is set, all rules with the corresponding set
545 * are disabled. Set IPDW_DEFAULT_SET is reserved for the
546 * default rule and CANNOT be disabled.
548 uint32_t ipfw_set_disable
;
550 uint8_t ipfw_flags
; /* IPFW_FLAG_ */
552 struct ip_fw
*ipfw_cont_rule
;
553 struct ipfw_xlat
*ipfw_cont_xlat
;
555 struct ipfw_state_tree ipfw_state_tree
;
556 struct ipfw_state_list ipfw_state_list
;
557 int ipfw_state_loosecnt
;
561 struct ipfw_state state
;
562 struct ipfw_track track
;
563 struct ipfw_trkcnt trkcnt
;
566 struct ipfw_track_tree ipfw_track_tree
;
567 struct ipfw_track_list ipfw_track_list
;
568 struct ipfw_trkcnt
*ipfw_trkcnt_spare
;
570 struct callout ipfw_stateto_ch
;
571 time_t ipfw_state_lastexp
;
572 struct netmsg_base ipfw_stateexp_nm
;
573 struct netmsg_base ipfw_stateexp_more
;
574 struct ipfw_state ipfw_stateexp_anch
;
576 struct callout ipfw_trackto_ch
;
577 time_t ipfw_track_lastexp
;
578 struct netmsg_base ipfw_trackexp_nm
;
579 struct netmsg_base ipfw_trackexp_more
;
580 struct ipfw_track ipfw_trackexp_anch
;
582 struct callout ipfw_keepalive_ch
;
583 struct netmsg_base ipfw_keepalive_nm
;
584 struct netmsg_base ipfw_keepalive_more
;
585 struct ipfw_state ipfw_keepalive_anch
;
587 struct callout ipfw_xlatreap_ch
;
588 struct netmsg_base ipfw_xlatreap_nm
;
589 struct ipfw_state_list ipfw_xlatreap
;
594 u_long ipfw_sts_reap
;
595 u_long ipfw_sts_reapfailed
;
596 u_long ipfw_sts_overflow
;
597 u_long ipfw_sts_nomem
;
598 u_long ipfw_sts_tcprecycled
;
600 u_long ipfw_tks_nomem
;
601 u_long ipfw_tks_reap
;
602 u_long ipfw_tks_reapfailed
;
603 u_long ipfw_tks_overflow
;
604 u_long ipfw_tks_cntnomem
;
607 u_long ipfw_defraged
;
608 u_long ipfw_defrag_remote
;
611 u_long ipfw_xlate_split
;
612 u_long ipfw_xlate_conflicts
;
613 u_long ipfw_xlate_cresolved
;
616 struct radix_node_head
*ipfw_tables
[];
619 #define IPFW_FLAG_KEEPALIVE 0x01
620 #define IPFW_FLAG_STATEEXP 0x02
621 #define IPFW_FLAG_TRACKEXP 0x04
622 #define IPFW_FLAG_STATEREAP 0x08
623 #define IPFW_FLAG_TRACKREAP 0x10
625 #define ipfw_state_tmpkey ipfw_tmpkey.state
626 #define ipfw_track_tmpkey ipfw_tmpkey.track
627 #define ipfw_trkcnt_tmpkey ipfw_tmpkey.trkcnt
630 int ipfw_state_loosecnt
; /* cache aligned */
631 time_t ipfw_state_globexp __cachealign
;
633 struct lwkt_token ipfw_trkcnt_token __cachealign
;
634 struct ipfw_trkcnt_tree ipfw_trkcnt_tree
;
636 time_t ipfw_track_globexp
;
638 /* Accessed in netisr0. */
639 struct ip_fw
*ipfw_crossref_free __cachealign
;
640 struct callout ipfw_crossref_ch
;
641 struct netmsg_base ipfw_crossref_nm
;
645 * Module can not be unloaded, if there are references to
646 * certains rules of ipfw(4), e.g. dummynet(4)
648 int ipfw_refcnt __cachealign
;
652 static struct ipfw_context
*ipfw_ctx
[MAXCPU
];
654 MALLOC_DEFINE(M_IPFW
, "IpFw/IpAcct", "IpFw/IpAcct chain's");
657 * Following two global variables are accessed and updated only
660 static uint32_t static_count
; /* # of static rules */
661 static uint32_t static_ioc_len
; /* bytes of static rules */
664 * If 1, then ipfw static rules are being flushed,
665 * ipfw_chk() will skip to the default rule.
667 static int ipfw_flushing
;
669 static int fw_verbose
;
670 static int verbose_limit
;
673 static int autoinc_step
= IPFW_AUTOINC_STEP_DEF
;
675 static int ipfw_table_max
= IPFW_TABLE_MAX_DEF
;
677 static int ipfw_sysctl_enable(SYSCTL_HANDLER_ARGS
);
678 static int ipfw_sysctl_autoinc_step(SYSCTL_HANDLER_ARGS
);
680 TUNABLE_INT("net.inet.ip.fw.table_max", &ipfw_table_max
);
682 SYSCTL_NODE(_net_inet_ip
, OID_AUTO
, fw
, CTLFLAG_RW
, 0, "Firewall");
683 SYSCTL_NODE(_net_inet_ip_fw
, OID_AUTO
, stats
, CTLFLAG_RW
, 0,
684 "Firewall statistics");
686 SYSCTL_PROC(_net_inet_ip_fw
, OID_AUTO
, enable
, CTLTYPE_INT
| CTLFLAG_RW
,
687 &fw_enable
, 0, ipfw_sysctl_enable
, "I", "Enable ipfw");
688 SYSCTL_PROC(_net_inet_ip_fw
, OID_AUTO
, autoinc_step
, CTLTYPE_INT
| CTLFLAG_RW
,
689 &autoinc_step
, 0, ipfw_sysctl_autoinc_step
, "I",
690 "Rule number autincrement step");
691 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
,one_pass
,CTLFLAG_RW
,
693 "Only do a single pass through ipfw when using dummynet(4)");
694 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, debug
, CTLFLAG_RW
,
695 &fw_debug
, 0, "Enable printing of debug ip_fw statements");
696 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, verbose
, CTLFLAG_RW
,
697 &fw_verbose
, 0, "Log matches to ipfw rules");
698 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, verbose_limit
, CTLFLAG_RW
,
699 &verbose_limit
, 0, "Set upper limit of matches of ipfw rules logged");
700 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, table_max
, CTLFLAG_RD
,
701 &ipfw_table_max
, 0, "Max # of tables");
703 static int ipfw_sysctl_dyncnt(SYSCTL_HANDLER_ARGS
);
704 static int ipfw_sysctl_dynmax(SYSCTL_HANDLER_ARGS
);
705 static int ipfw_sysctl_statecnt(SYSCTL_HANDLER_ARGS
);
706 static int ipfw_sysctl_statemax(SYSCTL_HANDLER_ARGS
);
707 static int ipfw_sysctl_scancnt(SYSCTL_HANDLER_ARGS
);
708 static int ipfw_sysctl_stat(SYSCTL_HANDLER_ARGS
);
711 * Timeouts for various events in handing states.
715 * 2 == 1~2 second(s).
717 * We use 2 seconds for FIN lifetime, so that the states will not be
718 * ripped prematurely.
720 static uint32_t dyn_ack_lifetime
= 300;
721 static uint32_t dyn_syn_lifetime
= 20;
722 static uint32_t dyn_finwait_lifetime
= 20;
723 static uint32_t dyn_fin_lifetime
= 2;
724 static uint32_t dyn_rst_lifetime
= 2;
725 static uint32_t dyn_udp_lifetime
= 10;
726 static uint32_t dyn_short_lifetime
= 5; /* used by tracks too */
729 * Keepalives are sent if dyn_keepalive is set. They are sent every
730 * dyn_keepalive_period seconds, in the last dyn_keepalive_interval
731 * seconds of lifetime of a rule.
733 static uint32_t dyn_keepalive_interval
= 20;
734 static uint32_t dyn_keepalive_period
= 5;
735 static uint32_t dyn_keepalive
= 1; /* do send keepalives */
737 static struct ipfw_global ipfw_gd
;
738 static int ipfw_state_loosecnt_updthr
;
739 static int ipfw_state_max
= 4096; /* max # of states */
740 static int ipfw_track_max
= 4096; /* max # of tracks */
742 static int ipfw_state_headroom
; /* setup at module load time */
743 static int ipfw_state_reap_min
= 8;
744 static int ipfw_state_expire_max
= 32;
745 static int ipfw_state_scan_max
= 256;
746 static int ipfw_keepalive_max
= 8;
747 static int ipfw_track_reap_max
= 4;
748 static int ipfw_track_expire_max
= 16;
749 static int ipfw_track_scan_max
= 128;
751 static eventhandler_tag ipfw_ifaddr_event
;
754 SYSCTL_PROC(_net_inet_ip_fw
, OID_AUTO
, dyn_count
,
755 CTLTYPE_INT
| CTLFLAG_RD
, NULL
, 0, ipfw_sysctl_dyncnt
, "I",
756 "Number of states and tracks");
757 SYSCTL_PROC(_net_inet_ip_fw
, OID_AUTO
, dyn_max
,
758 CTLTYPE_INT
| CTLFLAG_RW
, NULL
, 0, ipfw_sysctl_dynmax
, "I",
759 "Max number of states and tracks");
761 SYSCTL_PROC(_net_inet_ip_fw
, OID_AUTO
, state_cnt
,
762 CTLTYPE_INT
| CTLFLAG_RD
, NULL
, 0, ipfw_sysctl_statecnt
, "I",
764 SYSCTL_PROC(_net_inet_ip_fw
, OID_AUTO
, state_max
,
765 CTLTYPE_INT
| CTLFLAG_RW
, NULL
, 0, ipfw_sysctl_statemax
, "I",
766 "Max number of states");
767 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, state_headroom
, CTLFLAG_RW
,
768 &ipfw_state_headroom
, 0, "headroom for state reap");
769 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, track_cnt
, CTLFLAG_RD
,
770 &ipfw_gd
.ipfw_trkcnt_cnt
, 0, "Number of tracks");
771 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, track_max
, CTLFLAG_RW
,
772 &ipfw_track_max
, 0, "Max number of tracks");
773 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, static_count
, CTLFLAG_RD
,
774 &static_count
, 0, "Number of static rules");
775 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, dyn_ack_lifetime
, CTLFLAG_RW
,
776 &dyn_ack_lifetime
, 0, "Lifetime of dyn. rules for acks");
777 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, dyn_syn_lifetime
, CTLFLAG_RW
,
778 &dyn_syn_lifetime
, 0, "Lifetime of dyn. rules for syn");
779 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, dyn_fin_lifetime
, CTLFLAG_RW
,
780 &dyn_fin_lifetime
, 0, "Lifetime of dyn. rules for fin");
781 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, dyn_finwait_lifetime
, CTLFLAG_RW
,
782 &dyn_finwait_lifetime
, 0, "Lifetime of dyn. rules for fin wait");
783 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, dyn_rst_lifetime
, CTLFLAG_RW
,
784 &dyn_rst_lifetime
, 0, "Lifetime of dyn. rules for rst");
785 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, dyn_udp_lifetime
, CTLFLAG_RW
,
786 &dyn_udp_lifetime
, 0, "Lifetime of dyn. rules for UDP");
787 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, dyn_short_lifetime
, CTLFLAG_RW
,
788 &dyn_short_lifetime
, 0, "Lifetime of dyn. rules for other situations");
789 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, dyn_keepalive
, CTLFLAG_RW
,
790 &dyn_keepalive
, 0, "Enable keepalives for dyn. rules");
791 SYSCTL_PROC(_net_inet_ip_fw
, OID_AUTO
, state_scan_max
,
792 CTLTYPE_INT
| CTLFLAG_RW
, &ipfw_state_scan_max
, 0, ipfw_sysctl_scancnt
,
793 "I", "# of states to scan for each expire iteration");
794 SYSCTL_PROC(_net_inet_ip_fw
, OID_AUTO
, state_expire_max
,
795 CTLTYPE_INT
| CTLFLAG_RW
, &ipfw_state_expire_max
, 0, ipfw_sysctl_scancnt
,
796 "I", "# of states to expire for each expire iteration");
797 SYSCTL_PROC(_net_inet_ip_fw
, OID_AUTO
, keepalive_max
,
798 CTLTYPE_INT
| CTLFLAG_RW
, &ipfw_keepalive_max
, 0, ipfw_sysctl_scancnt
,
799 "I", "# of states to expire for each expire iteration");
800 SYSCTL_PROC(_net_inet_ip_fw
, OID_AUTO
, state_reap_min
,
801 CTLTYPE_INT
| CTLFLAG_RW
, &ipfw_state_reap_min
, 0, ipfw_sysctl_scancnt
,
802 "I", "# of states to reap for state shortage");
803 SYSCTL_PROC(_net_inet_ip_fw
, OID_AUTO
, track_scan_max
,
804 CTLTYPE_INT
| CTLFLAG_RW
, &ipfw_track_scan_max
, 0, ipfw_sysctl_scancnt
,
805 "I", "# of tracks to scan for each expire iteration");
806 SYSCTL_PROC(_net_inet_ip_fw
, OID_AUTO
, track_expire_max
,
807 CTLTYPE_INT
| CTLFLAG_RW
, &ipfw_track_expire_max
, 0, ipfw_sysctl_scancnt
,
808 "I", "# of tracks to expire for each expire iteration");
809 SYSCTL_PROC(_net_inet_ip_fw
, OID_AUTO
, track_reap_max
,
810 CTLTYPE_INT
| CTLFLAG_RW
, &ipfw_track_reap_max
, 0, ipfw_sysctl_scancnt
,
811 "I", "# of tracks to reap for track shortage");
813 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, state_reap
,
814 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
815 __offsetof(struct ipfw_context
, ipfw_sts_reap
), ipfw_sysctl_stat
,
816 "LU", "# of state reaps due to states shortage");
817 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, state_reapfailed
,
818 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
819 __offsetof(struct ipfw_context
, ipfw_sts_reapfailed
), ipfw_sysctl_stat
,
820 "LU", "# of state reap failure");
821 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, state_overflow
,
822 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
823 __offsetof(struct ipfw_context
, ipfw_sts_overflow
), ipfw_sysctl_stat
,
824 "LU", "# of state overflow");
825 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, state_nomem
,
826 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
827 __offsetof(struct ipfw_context
, ipfw_sts_nomem
), ipfw_sysctl_stat
,
828 "LU", "# of state allocation failure");
829 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, state_tcprecycled
,
830 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
831 __offsetof(struct ipfw_context
, ipfw_sts_tcprecycled
), ipfw_sysctl_stat
,
832 "LU", "# of state deleted due to fast TCP port recycling");
834 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, track_nomem
,
835 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
836 __offsetof(struct ipfw_context
, ipfw_tks_nomem
), ipfw_sysctl_stat
,
837 "LU", "# of track allocation failure");
838 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, track_reap
,
839 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
840 __offsetof(struct ipfw_context
, ipfw_tks_reap
), ipfw_sysctl_stat
,
841 "LU", "# of track reap due to tracks shortage");
842 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, track_reapfailed
,
843 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
844 __offsetof(struct ipfw_context
, ipfw_tks_reapfailed
), ipfw_sysctl_stat
,
845 "LU", "# of track reap failure");
846 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, track_overflow
,
847 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
848 __offsetof(struct ipfw_context
, ipfw_tks_overflow
), ipfw_sysctl_stat
,
849 "LU", "# of track overflow");
850 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, track_cntnomem
,
851 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
852 __offsetof(struct ipfw_context
, ipfw_tks_cntnomem
), ipfw_sysctl_stat
,
853 "LU", "# of track counter allocation failure");
854 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, frags
,
855 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
856 __offsetof(struct ipfw_context
, ipfw_frags
), ipfw_sysctl_stat
,
857 "LU", "# of IP fragements defraged");
858 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, defraged
,
859 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
860 __offsetof(struct ipfw_context
, ipfw_defraged
), ipfw_sysctl_stat
,
861 "LU", "# of IP packets after defrag");
862 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, defrag_remote
,
863 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
864 __offsetof(struct ipfw_context
, ipfw_defrag_remote
), ipfw_sysctl_stat
,
865 "LU", "# of IP packets after defrag dispatched to remote cpus");
866 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, xlated
,
867 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
868 __offsetof(struct ipfw_context
, ipfw_xlated
), ipfw_sysctl_stat
,
869 "LU", "# address/port translations");
870 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, xlate_split
,
871 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
872 __offsetof(struct ipfw_context
, ipfw_xlate_split
), ipfw_sysctl_stat
,
873 "LU", "# address/port translations split between different cpus");
874 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, xlate_conflicts
,
875 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
876 __offsetof(struct ipfw_context
, ipfw_xlate_conflicts
), ipfw_sysctl_stat
,
877 "LU", "# address/port translations conflicts on remote cpu");
878 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, xlate_cresolved
,
879 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
880 __offsetof(struct ipfw_context
, ipfw_xlate_cresolved
), ipfw_sysctl_stat
,
881 "LU", "# address/port translations conflicts resolved on remote cpu");
883 static int ipfw_state_cmp(struct ipfw_state
*,
884 struct ipfw_state
*);
885 static int ipfw_trkcnt_cmp(struct ipfw_trkcnt
*,
886 struct ipfw_trkcnt
*);
887 static int ipfw_track_cmp(struct ipfw_track
*,
888 struct ipfw_track
*);
890 RB_PROTOTYPE(ipfw_state_tree
, ipfw_state
, st_rblink
, ipfw_state_cmp
);
891 RB_GENERATE(ipfw_state_tree
, ipfw_state
, st_rblink
, ipfw_state_cmp
);
893 RB_PROTOTYPE(ipfw_trkcnt_tree
, ipfw_trkcnt
, tc_rblink
, ipfw_trkcnt_cmp
);
894 RB_GENERATE(ipfw_trkcnt_tree
, ipfw_trkcnt
, tc_rblink
, ipfw_trkcnt_cmp
);
896 RB_PROTOTYPE(ipfw_track_tree
, ipfw_track
, t_rblink
, ipfw_track_cmp
);
897 RB_GENERATE(ipfw_track_tree
, ipfw_track
, t_rblink
, ipfw_track_cmp
);
899 static int ipfw_chk(struct ip_fw_args
*);
900 static void ipfw_track_expire_ipifunc(void *);
901 static void ipfw_state_expire_ipifunc(void *);
902 static void ipfw_keepalive(void *);
903 static int ipfw_state_expire_start(struct ipfw_context
*,
905 static void ipfw_crossref_timeo(void *);
906 static void ipfw_state_remove(struct ipfw_context
*,
907 struct ipfw_state
*);
908 static void ipfw_xlat_reap_timeo(void *);
909 static void ipfw_defrag_redispatch(struct mbuf
*, int,
912 #define IPFW_TRKCNT_TOKGET lwkt_gettoken(&ipfw_gd.ipfw_trkcnt_token)
913 #define IPFW_TRKCNT_TOKREL lwkt_reltoken(&ipfw_gd.ipfw_trkcnt_token)
914 #define IPFW_TRKCNT_TOKINIT \
915 lwkt_token_init(&ipfw_gd.ipfw_trkcnt_token, "ipfw_trkcnt");
918 sa_maskedcopy(const struct sockaddr
*src
, struct sockaddr
*dst
,
919 const struct sockaddr
*netmask
)
921 const u_char
*cp1
= (const u_char
*)src
;
922 u_char
*cp2
= (u_char
*)dst
;
923 const u_char
*cp3
= (const u_char
*)netmask
;
924 u_char
*cplim
= cp2
+ *cp3
;
925 u_char
*cplim2
= cp2
+ *cp1
;
927 *cp2
++ = *cp1
++; *cp2
++ = *cp1
++; /* copies sa_len & sa_family */
932 *cp2
++ = *cp1
++ & *cp3
++;
934 bzero(cp2
, cplim2
- cp2
);
937 static __inline
uint16_t
938 pfil_cksum_fixup(uint16_t cksum
, uint16_t old
, uint16_t new, uint8_t udp
)
944 l
= cksum
+ old
- new;
945 l
= (l
>> 16) + (l
& 65535);
953 ipfw_key_build(struct ipfw_key
*key
, in_addr_t saddr
, uint16_t sport
,
954 in_addr_t daddr
, uint16_t dport
, uint8_t proto
)
961 key
->addr_u
.addrs
.addr1
= daddr
;
962 key
->addr_u
.addrs
.addr2
= saddr
;
963 key
->swap
|= IPFW_KEY_SWAP_ADDRS
;
965 key
->addr_u
.addrs
.addr1
= saddr
;
966 key
->addr_u
.addrs
.addr2
= daddr
;
970 key
->port_u
.ports
.port1
= dport
;
971 key
->port_u
.ports
.port2
= sport
;
972 key
->swap
|= IPFW_KEY_SWAP_PORTS
;
974 key
->port_u
.ports
.port1
= sport
;
975 key
->port_u
.ports
.port2
= dport
;
978 if (sport
== dport
&& (key
->swap
& IPFW_KEY_SWAP_ADDRS
))
979 key
->swap
|= IPFW_KEY_SWAP_PORTS
;
980 if (saddr
== daddr
&& (key
->swap
& IPFW_KEY_SWAP_PORTS
))
981 key
->swap
|= IPFW_KEY_SWAP_ADDRS
;
985 ipfw_key_4tuple(const struct ipfw_key
*key
, in_addr_t
*saddr
, uint16_t *sport
,
986 in_addr_t
*daddr
, uint16_t *dport
)
989 if (key
->swap
& IPFW_KEY_SWAP_ADDRS
) {
990 *saddr
= key
->addr_u
.addrs
.addr2
;
991 *daddr
= key
->addr_u
.addrs
.addr1
;
993 *saddr
= key
->addr_u
.addrs
.addr1
;
994 *daddr
= key
->addr_u
.addrs
.addr2
;
997 if (key
->swap
& IPFW_KEY_SWAP_PORTS
) {
998 *sport
= key
->port_u
.ports
.port2
;
999 *dport
= key
->port_u
.ports
.port1
;
1001 *sport
= key
->port_u
.ports
.port1
;
1002 *dport
= key
->port_u
.ports
.port2
;
1007 ipfw_state_cmp(struct ipfw_state
*s1
, struct ipfw_state
*s2
)
1010 if (s1
->st_proto
> s2
->st_proto
)
1012 if (s1
->st_proto
< s2
->st_proto
)
1015 if (s1
->st_addrs
> s2
->st_addrs
)
1017 if (s1
->st_addrs
< s2
->st_addrs
)
1020 if (s1
->st_ports
> s2
->st_ports
)
1022 if (s1
->st_ports
< s2
->st_ports
)
1025 if (s1
->st_swap
== s2
->st_swap
||
1026 (s1
->st_swap
^ s2
->st_swap
) == IPFW_KEY_SWAP_ALL
)
1029 if (s1
->st_swap
> s2
->st_swap
)
1036 ipfw_trkcnt_cmp(struct ipfw_trkcnt
*t1
, struct ipfw_trkcnt
*t2
)
1039 if (t1
->tc_proto
> t2
->tc_proto
)
1041 if (t1
->tc_proto
< t2
->tc_proto
)
1044 if (t1
->tc_addrs
> t2
->tc_addrs
)
1046 if (t1
->tc_addrs
< t2
->tc_addrs
)
1049 if (t1
->tc_ports
> t2
->tc_ports
)
1051 if (t1
->tc_ports
< t2
->tc_ports
)
1054 if (t1
->tc_ruleid
> t2
->tc_ruleid
)
1056 if (t1
->tc_ruleid
< t2
->tc_ruleid
)
1063 ipfw_track_cmp(struct ipfw_track
*t1
, struct ipfw_track
*t2
)
1066 if (t1
->t_proto
> t2
->t_proto
)
1068 if (t1
->t_proto
< t2
->t_proto
)
1071 if (t1
->t_addrs
> t2
->t_addrs
)
1073 if (t1
->t_addrs
< t2
->t_addrs
)
1076 if (t1
->t_ports
> t2
->t_ports
)
1078 if (t1
->t_ports
< t2
->t_ports
)
1081 if ((uintptr_t)t1
->t_rule
> (uintptr_t)t2
->t_rule
)
1083 if ((uintptr_t)t1
->t_rule
< (uintptr_t)t2
->t_rule
)
1089 static __inline
struct ipfw_state
*
1090 ipfw_state_link(struct ipfw_context
*ctx
, struct ipfw_state
*s
)
1092 struct ipfw_state
*dup
;
1094 KASSERT((s
->st_flags
& IPFW_STATE_F_LINKED
) == 0,
1095 ("state %p was linked", s
));
1096 dup
= RB_INSERT(ipfw_state_tree
, &ctx
->ipfw_state_tree
, s
);
1098 TAILQ_INSERT_TAIL(&ctx
->ipfw_state_list
, s
, st_link
);
1099 s
->st_flags
|= IPFW_STATE_F_LINKED
;
1104 static __inline
void
1105 ipfw_state_unlink(struct ipfw_context
*ctx
, struct ipfw_state
*s
)
1108 KASSERT(s
->st_flags
& IPFW_STATE_F_LINKED
,
1109 ("state %p was not linked", s
));
1110 RB_REMOVE(ipfw_state_tree
, &ctx
->ipfw_state_tree
, s
);
1111 TAILQ_REMOVE(&ctx
->ipfw_state_list
, s
, st_link
);
1112 s
->st_flags
&= ~IPFW_STATE_F_LINKED
;
1116 ipfw_state_max_set(int state_max
)
1119 ipfw_state_max
= state_max
;
1120 /* Allow 5% states over-allocation. */
1121 ipfw_state_loosecnt_updthr
= (state_max
/ 20) / netisr_ncpus
;
1125 ipfw_state_cntcoll(void)
1127 int cpu
, state_cnt
= 0;
1129 for (cpu
= 0; cpu
< netisr_ncpus
; ++cpu
)
1130 state_cnt
+= ipfw_ctx
[cpu
]->ipfw_state_cnt
;
1135 ipfw_state_cntsync(void)
1139 state_cnt
= ipfw_state_cntcoll();
1140 ipfw_gd
.ipfw_state_loosecnt
= state_cnt
;
1145 ipfw_free_rule(struct ip_fw
*rule
)
1147 KASSERT(rule
->cpuid
== mycpuid
, ("rule freed on cpu%d", mycpuid
));
1148 KASSERT(rule
->refcnt
> 0, ("invalid refcnt %u", rule
->refcnt
));
1150 if (rule
->refcnt
== 0) {
1151 if (rule
->cross_rules
!= NULL
)
1152 kfree(rule
->cross_rules
, M_IPFW
);
1153 kfree(rule
, M_IPFW
);
1160 ipfw_unref_rule(void *priv
)
1162 ipfw_free_rule(priv
);
1164 KASSERT(ipfw_gd
.ipfw_refcnt
> 0,
1165 ("invalid ipfw_refcnt %d", ipfw_gd
.ipfw_refcnt
));
1166 atomic_subtract_int(&ipfw_gd
.ipfw_refcnt
, 1);
1170 static __inline
void
1171 ipfw_ref_rule(struct ip_fw
*rule
)
1173 KASSERT(rule
->cpuid
== mycpuid
, ("rule used on cpu%d", mycpuid
));
1175 atomic_add_int(&ipfw_gd
.ipfw_refcnt
, 1);
1181 * This macro maps an ip pointer into a layer3 header pointer of type T
1183 #define L3HDR(T, ip) ((T *)((uint32_t *)(ip) + (ip)->ip_hl))
1186 icmptype_match(struct ip
*ip
, ipfw_insn_u32
*cmd
)
1188 int type
= L3HDR(struct icmp
,ip
)->icmp_type
;
1189 int idx_max
= F_LEN(&cmd
->o
) - F_INSN_SIZE(ipfw_insn
);
1190 int idx
= type
/ 32;
1194 return (cmd
->d
[idx
] & (1 << (type
% 32)));
1198 icmpcode_match(struct ip
*ip
, ipfw_insn_u32
*cmd
)
1200 int code
= L3HDR(struct icmp
,ip
)->icmp_code
;
1201 int idx_max
= F_LEN(&cmd
->o
) - F_INSN_SIZE(ipfw_insn
);
1202 int idx
= code
/ 32;
1206 return (cmd
->d
[idx
] & (1 << (code
% 32)));
1209 #define TT ((1 << ICMP_ECHO) | \
1210 (1 << ICMP_ROUTERSOLICIT) | \
1211 (1 << ICMP_TSTAMP) | \
1212 (1 << ICMP_IREQ) | \
1213 (1 << ICMP_MASKREQ))
1216 is_icmp_query(struct ip
*ip
)
1218 int type
= L3HDR(struct icmp
, ip
)->icmp_type
;
1220 return (type
< 32 && (TT
& (1 << type
)));
1226 * The following checks use two arrays of 8 or 16 bits to store the
1227 * bits that we want set or clear, respectively. They are in the
1228 * low and high half of cmd->arg1 or cmd->d[0].
1230 * We scan options and store the bits we find set. We succeed if
1232 * (want_set & ~bits) == 0 && (want_clear & ~bits) == want_clear
1234 * The code is sometimes optimized not to store additional variables.
1237 flags_match(ipfw_insn
*cmd
, uint8_t bits
)
1242 if (((cmd
->arg1
& 0xff) & bits
) != 0)
1243 return 0; /* some bits we want set were clear */
1245 want_clear
= (cmd
->arg1
>> 8) & 0xff;
1246 if ((want_clear
& bits
) != want_clear
)
1247 return 0; /* some bits we want clear were set */
1252 ipopts_match(struct ip
*ip
, ipfw_insn
*cmd
)
1254 int optlen
, bits
= 0;
1255 u_char
*cp
= (u_char
*)(ip
+ 1);
1256 int x
= (ip
->ip_hl
<< 2) - sizeof(struct ip
);
1258 for (; x
> 0; x
-= optlen
, cp
+= optlen
) {
1259 int opt
= cp
[IPOPT_OPTVAL
];
1261 if (opt
== IPOPT_EOL
)
1264 if (opt
== IPOPT_NOP
) {
1267 optlen
= cp
[IPOPT_OLEN
];
1268 if (optlen
<= 0 || optlen
> x
)
1269 return 0; /* invalid or truncated */
1274 bits
|= IP_FW_IPOPT_LSRR
;
1278 bits
|= IP_FW_IPOPT_SSRR
;
1282 bits
|= IP_FW_IPOPT_RR
;
1286 bits
|= IP_FW_IPOPT_TS
;
1293 return (flags_match(cmd
, bits
));
1297 tcpopts_match(struct ip
*ip
, ipfw_insn
*cmd
)
1299 int optlen
, bits
= 0;
1300 struct tcphdr
*tcp
= L3HDR(struct tcphdr
,ip
);
1301 u_char
*cp
= (u_char
*)(tcp
+ 1);
1302 int x
= (tcp
->th_off
<< 2) - sizeof(struct tcphdr
);
1304 for (; x
> 0; x
-= optlen
, cp
+= optlen
) {
1307 if (opt
== TCPOPT_EOL
)
1310 if (opt
== TCPOPT_NOP
) {
1320 bits
|= IP_FW_TCPOPT_MSS
;
1324 bits
|= IP_FW_TCPOPT_WINDOW
;
1327 case TCPOPT_SACK_PERMITTED
:
1329 bits
|= IP_FW_TCPOPT_SACK
;
1332 case TCPOPT_TIMESTAMP
:
1333 bits
|= IP_FW_TCPOPT_TS
;
1339 bits
|= IP_FW_TCPOPT_CC
;
1346 return (flags_match(cmd
, bits
));
1350 iface_match(struct ifnet
*ifp
, ipfw_insn_if
*cmd
)
1352 if (ifp
== NULL
) /* no iface with this packet, match fails */
1355 /* Check by name or by IP address */
1356 if (cmd
->name
[0] != '\0') { /* match by name */
1359 if (kfnmatch(cmd
->name
, ifp
->if_xname
, 0) == 0)
1362 if (strncmp(ifp
->if_xname
, cmd
->name
, IFNAMSIZ
) == 0)
1366 struct ifaddr_container
*ifac
;
1368 TAILQ_FOREACH(ifac
, &ifp
->if_addrheads
[mycpuid
], ifa_link
) {
1369 struct ifaddr
*ia
= ifac
->ifa
;
1371 if (ia
->ifa_addr
== NULL
)
1373 if (ia
->ifa_addr
->sa_family
!= AF_INET
)
1375 if (cmd
->p
.ip
.s_addr
== ((struct sockaddr_in
*)
1376 (ia
->ifa_addr
))->sin_addr
.s_addr
)
1377 return(1); /* match */
1380 return(0); /* no match, fail ... */
1383 #define SNPARGS(buf, len) buf + len, sizeof(buf) > len ? sizeof(buf) - len : 0
1386 * We enter here when we have a rule with O_LOG.
1387 * XXX this function alone takes about 2Kbytes of code!
1390 ipfw_log(struct ipfw_context
*ctx
, struct ip_fw
*f
, u_int hlen
,
1391 struct ether_header
*eh
, struct mbuf
*m
, struct ifnet
*oif
)
1394 int limit_reached
= 0;
1395 char action2
[40], proto
[48], fragment
[28], abuf
[INET_ADDRSTRLEN
];
1400 if (f
== NULL
) { /* bogus pkt */
1401 if (verbose_limit
!= 0 &&
1402 ctx
->ipfw_norule_counter
>= verbose_limit
)
1404 ctx
->ipfw_norule_counter
++;
1405 if (ctx
->ipfw_norule_counter
== verbose_limit
)
1406 limit_reached
= verbose_limit
;
1408 } else { /* O_LOG is the first action, find the real one */
1409 ipfw_insn
*cmd
= ACTION_PTR(f
);
1410 ipfw_insn_log
*l
= (ipfw_insn_log
*)cmd
;
1412 if (l
->max_log
!= 0 && l
->log_left
== 0)
1415 if (l
->log_left
== 0)
1416 limit_reached
= l
->max_log
;
1417 cmd
+= F_LEN(cmd
); /* point to first action */
1418 if (cmd
->opcode
== O_PROB
)
1422 switch (cmd
->opcode
) {
1428 if (cmd
->arg1
==ICMP_REJECT_RST
) {
1430 } else if (cmd
->arg1
==ICMP_UNREACH_HOST
) {
1433 ksnprintf(SNPARGS(action2
, 0), "Unreach %d",
1447 ksnprintf(SNPARGS(action2
, 0), "Divert %d", cmd
->arg1
);
1451 ksnprintf(SNPARGS(action2
, 0), "Tee %d", cmd
->arg1
);
1455 ksnprintf(SNPARGS(action2
, 0), "SkipTo %d", cmd
->arg1
);
1459 ksnprintf(SNPARGS(action2
, 0), "Pipe %d", cmd
->arg1
);
1463 ksnprintf(SNPARGS(action2
, 0), "Queue %d", cmd
->arg1
);
1468 ipfw_insn_sa
*sa
= (ipfw_insn_sa
*)cmd
;
1471 len
= ksnprintf(SNPARGS(action2
, 0),
1473 kinet_ntoa(sa
->sa
.sin_addr
, abuf
));
1474 if (sa
->sa
.sin_port
) {
1475 ksnprintf(SNPARGS(action2
, len
), ":%d",
1487 if (hlen
== 0) { /* non-ip */
1488 ksnprintf(SNPARGS(proto
, 0), "MAC");
1490 struct ip
*ip
= mtod(m
, struct ip
*);
1491 /* these three are all aliases to the same thing */
1492 struct icmp
*const icmp
= L3HDR(struct icmp
, ip
);
1493 struct tcphdr
*const tcp
= (struct tcphdr
*)icmp
;
1494 struct udphdr
*const udp
= (struct udphdr
*)icmp
;
1496 int ip_off
, offset
, ip_len
;
1499 if (eh
!= NULL
) { /* layer 2 packets are as on the wire */
1500 ip_off
= ntohs(ip
->ip_off
);
1501 ip_len
= ntohs(ip
->ip_len
);
1503 ip_off
= ip
->ip_off
;
1504 ip_len
= ip
->ip_len
;
1506 offset
= ip_off
& IP_OFFMASK
;
1509 len
= ksnprintf(SNPARGS(proto
, 0), "TCP %s",
1510 kinet_ntoa(ip
->ip_src
, abuf
));
1512 ksnprintf(SNPARGS(proto
, len
), ":%d %s:%d",
1513 ntohs(tcp
->th_sport
),
1514 kinet_ntoa(ip
->ip_dst
, abuf
),
1515 ntohs(tcp
->th_dport
));
1517 ksnprintf(SNPARGS(proto
, len
), " %s",
1518 kinet_ntoa(ip
->ip_dst
, abuf
));
1523 len
= ksnprintf(SNPARGS(proto
, 0), "UDP %s",
1524 kinet_ntoa(ip
->ip_src
, abuf
));
1526 ksnprintf(SNPARGS(proto
, len
), ":%d %s:%d",
1527 ntohs(udp
->uh_sport
),
1528 kinet_ntoa(ip
->ip_dst
, abuf
),
1529 ntohs(udp
->uh_dport
));
1531 ksnprintf(SNPARGS(proto
, len
), " %s",
1532 kinet_ntoa(ip
->ip_dst
, abuf
));
1538 len
= ksnprintf(SNPARGS(proto
, 0),
1543 len
= ksnprintf(SNPARGS(proto
, 0), "ICMP ");
1545 len
+= ksnprintf(SNPARGS(proto
, len
), "%s",
1546 kinet_ntoa(ip
->ip_src
, abuf
));
1547 ksnprintf(SNPARGS(proto
, len
), " %s",
1548 kinet_ntoa(ip
->ip_dst
, abuf
));
1552 len
= ksnprintf(SNPARGS(proto
, 0), "P:%d %s", ip
->ip_p
,
1553 kinet_ntoa(ip
->ip_src
, abuf
));
1554 ksnprintf(SNPARGS(proto
, len
), " %s",
1555 kinet_ntoa(ip
->ip_dst
, abuf
));
1559 if (ip_off
& (IP_MF
| IP_OFFMASK
)) {
1560 ksnprintf(SNPARGS(fragment
, 0), " (frag %d:%d@%d%s)",
1561 ntohs(ip
->ip_id
), ip_len
- (ip
->ip_hl
<< 2),
1562 offset
<< 3, (ip_off
& IP_MF
) ? "+" : "");
1566 if (oif
|| m
->m_pkthdr
.rcvif
) {
1567 log(LOG_SECURITY
| LOG_INFO
,
1568 "ipfw: %d %s %s %s via %s%s\n",
1569 f
? f
->rulenum
: -1,
1570 action
, proto
, oif
? "out" : "in",
1571 oif
? oif
->if_xname
: m
->m_pkthdr
.rcvif
->if_xname
,
1574 log(LOG_SECURITY
| LOG_INFO
,
1575 "ipfw: %d %s %s [no if info]%s\n",
1576 f
? f
->rulenum
: -1,
1577 action
, proto
, fragment
);
1580 if (limit_reached
) {
1581 log(LOG_SECURITY
| LOG_NOTICE
,
1582 "ipfw: limit %d reached on entry %d\n",
1583 limit_reached
, f
? f
->rulenum
: -1);
1590 ipfw_xlat_reap(struct ipfw_xlat
*x
, struct ipfw_xlat
*slave_x
)
1592 struct ip_fw
*rule
= slave_x
->xlat_rule
;
1594 KKASSERT(rule
->cpuid
== mycpuid
);
1596 /* No more cross references; free this pair now. */
1598 kfree(slave_x
, M_IPFW
);
1600 /* See the comment in ipfw_ip_xlate_dispatch(). */
1605 ipfw_xlat_reap_dispatch(netmsg_t nm
)
1607 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
1608 struct ipfw_state
*s
, *ns
;
1610 ASSERT_NETISR_NCPUS(mycpuid
);
1614 netisr_replymsg(&ctx
->ipfw_xlatreap_nm
, 0);
1617 /* TODO: limit scanning depth */
1618 TAILQ_FOREACH_MUTABLE(s
, &ctx
->ipfw_xlatreap
, st_link
, ns
) {
1619 struct ipfw_xlat
*x
= (struct ipfw_xlat
*)s
;
1620 struct ipfw_xlat
*slave_x
= x
->xlat_pair
;
1623 crefs
= slave_x
->xlat_crefs
+ x
->xlat_crefs
;
1625 TAILQ_REMOVE(&ctx
->ipfw_xlatreap
, &x
->xlat_st
, st_link
);
1626 ipfw_xlat_reap(x
, slave_x
);
1629 if (!TAILQ_EMPTY(&ctx
->ipfw_xlatreap
)) {
1630 callout_reset(&ctx
->ipfw_xlatreap_ch
, 2, ipfw_xlat_reap_timeo
,
1631 &ctx
->ipfw_xlatreap_nm
);
1636 ipfw_xlat_reap_timeo(void *xnm
)
1638 struct netmsg_base
*nm
= xnm
;
1640 KKASSERT(mycpuid
< netisr_ncpus
);
1643 if (nm
->lmsg
.ms_flags
& MSGF_DONE
)
1644 netisr_sendmsg_oncpu(nm
);
1649 ipfw_xlat_free_dispatch(netmsg_t nmsg
)
1651 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
1652 struct ipfw_xlat
*x
= nmsg
->lmsg
.u
.ms_resultp
;
1653 struct ipfw_xlat
*slave_x
= x
->xlat_pair
;
1656 ASSERT_NETISR_NCPUS(mycpuid
);
1658 KKASSERT(slave_x
!= NULL
);
1659 KKASSERT(slave_x
->xlat_invalid
&& x
->xlat_invalid
);
1661 KASSERT((x
->xlat_flags
& IPFW_STATE_F_LINKED
) == 0,
1662 ("master xlat is still linked"));
1663 if (slave_x
->xlat_flags
& IPFW_STATE_F_LINKED
)
1664 ipfw_state_unlink(ctx
, &slave_x
->xlat_st
);
1666 /* See the comment in ipfw_ip_xlate_dispatch(). */
1667 slave_x
->xlat_crefs
--;
1669 crefs
= slave_x
->xlat_crefs
+ x
->xlat_crefs
;
1671 ipfw_xlat_reap(x
, slave_x
);
1675 if (TAILQ_EMPTY(&ctx
->ipfw_xlatreap
)) {
1676 callout_reset(&ctx
->ipfw_xlatreap_ch
, 2, ipfw_xlat_reap_timeo
,
1677 &ctx
->ipfw_xlatreap_nm
);
1681 * This pair is still referenced; defer its destruction.
1682 * YYY reuse st_link.
1684 TAILQ_INSERT_TAIL(&ctx
->ipfw_xlatreap
, &x
->xlat_st
, st_link
);
1687 static __inline
void
1688 ipfw_xlat_invalidate(struct ipfw_xlat
*x
)
1691 x
->xlat_invalid
= 1;
1692 x
->xlat_pair
->xlat_invalid
= 1;
1696 ipfw_state_del(struct ipfw_context
*ctx
, struct ipfw_state
*s
)
1698 struct ipfw_xlat
*x
, *slave_x
;
1699 struct netmsg_base
*nm
;
1701 KASSERT(s
->st_type
== O_KEEP_STATE
|| s
->st_type
== O_LIMIT
||
1702 IPFW_ISXLAT(s
->st_type
), ("invalid state type %u", s
->st_type
));
1703 KASSERT((s
->st_flags
& IPFW_STATE_F_XLATSLAVE
) == 0,
1704 ("delete slave xlat"));
1706 KASSERT(ctx
->ipfw_state_cnt
> 0,
1707 ("invalid state count %d", ctx
->ipfw_state_cnt
));
1708 ctx
->ipfw_state_cnt
--;
1709 if (ctx
->ipfw_state_loosecnt
> 0)
1710 ctx
->ipfw_state_loosecnt
--;
1713 * Unhook this state.
1715 if (s
->st_track
!= NULL
) {
1716 struct ipfw_track
*t
= s
->st_track
;
1718 KASSERT(!LIST_EMPTY(&t
->t_state_list
),
1719 ("track state list is empty"));
1720 LIST_REMOVE(s
, st_trklink
);
1722 KASSERT(*t
->t_count
> 0,
1723 ("invalid track count %d", *t
->t_count
));
1724 atomic_subtract_int(t
->t_count
, 1);
1726 ipfw_state_unlink(ctx
, s
);
1729 * Free this state. Xlat requires special processing,
1730 * since xlat are paired state and they could be on
1734 if (!IPFW_ISXLAT(s
->st_type
)) {
1735 /* Not xlat; free now. */
1740 x
= (struct ipfw_xlat
*)s
;
1742 if (x
->xlat_pair
== NULL
) {
1743 /* Not setup yet; free now. */
1748 slave_x
= x
->xlat_pair
;
1749 KKASSERT(slave_x
->xlat_flags
& IPFW_STATE_F_XLATSLAVE
);
1751 if (x
->xlat_pcpu
== mycpuid
) {
1753 * Paired states are on the same cpu; delete this
1756 KKASSERT(x
->xlat_crefs
== 0);
1757 KKASSERT(slave_x
->xlat_crefs
== 0);
1758 if (slave_x
->xlat_flags
& IPFW_STATE_F_LINKED
)
1759 ipfw_state_unlink(ctx
, &slave_x
->xlat_st
);
1761 kfree(slave_x
, M_IPFW
);
1766 * Free the paired states on the cpu owning the slave xlat.
1770 * Mark the state pair invalid; completely deleting them
1771 * may take some time.
1773 ipfw_xlat_invalidate(x
);
1775 nm
= &x
->xlat_freenm
;
1776 netmsg_init(nm
, NULL
, &netisr_apanic_rport
, MSGF_PRIORITY
,
1777 ipfw_xlat_free_dispatch
);
1778 nm
->lmsg
.u
.ms_resultp
= x
;
1780 /* See the comment in ipfw_xlate_redispatch(). */
1781 x
->xlat_rule
->cross_refs
++;
1784 netisr_sendmsg(nm
, x
->xlat_pcpu
);
1788 ipfw_state_remove(struct ipfw_context
*ctx
, struct ipfw_state
*s
)
1791 if (s
->st_flags
& IPFW_STATE_F_XLATSLAVE
) {
1792 KKASSERT(IPFW_ISXLAT(s
->st_type
));
1793 ipfw_xlat_invalidate((struct ipfw_xlat
*)s
);
1794 ipfw_state_unlink(ctx
, s
);
1797 ipfw_state_del(ctx
, s
);
1801 ipfw_state_reap(struct ipfw_context
*ctx
, int reap_max
)
1803 struct ipfw_state
*s
, *anchor
;
1806 if (reap_max
< ipfw_state_reap_min
)
1807 reap_max
= ipfw_state_reap_min
;
1809 if ((ctx
->ipfw_flags
& IPFW_FLAG_STATEEXP
) == 0) {
1811 * Kick start state expiring. Ignore scan limit,
1812 * we are short of states.
1814 ctx
->ipfw_flags
|= IPFW_FLAG_STATEREAP
;
1815 expired
= ipfw_state_expire_start(ctx
, INT_MAX
, reap_max
);
1816 ctx
->ipfw_flags
&= ~IPFW_FLAG_STATEREAP
;
1821 * States are being expired.
1824 if (ctx
->ipfw_state_cnt
== 0)
1828 anchor
= &ctx
->ipfw_stateexp_anch
;
1829 while ((s
= TAILQ_NEXT(anchor
, st_link
)) != NULL
) {
1831 * Ignore scan limit; we are short of states.
1834 TAILQ_REMOVE(&ctx
->ipfw_state_list
, anchor
, st_link
);
1835 TAILQ_INSERT_AFTER(&ctx
->ipfw_state_list
, s
, anchor
, st_link
);
1837 if (IPFW_STATE_SCANSKIP(s
))
1840 if (IPFW_STATE_ISDEAD(s
) || IPFW_STATE_TCPCLOSED(s
)) {
1841 ipfw_state_del(ctx
, s
);
1842 if (++expired
>= reap_max
)
1844 if ((expired
& 0xff) == 0 &&
1845 ipfw_state_cntcoll() + ipfw_state_headroom
<=
1852 * Leave the anchor on the list, even if the end of the list has
1853 * been reached. ipfw_state_expire_more_dispatch() will handle
1860 ipfw_state_flush(struct ipfw_context
*ctx
, const struct ip_fw
*rule
)
1862 struct ipfw_state
*s
, *sn
;
1864 TAILQ_FOREACH_MUTABLE(s
, &ctx
->ipfw_state_list
, st_link
, sn
) {
1865 if (IPFW_STATE_SCANSKIP(s
))
1867 if (rule
!= NULL
&& s
->st_rule
!= rule
)
1869 ipfw_state_del(ctx
, s
);
1874 ipfw_state_expire_done(struct ipfw_context
*ctx
)
1877 KASSERT(ctx
->ipfw_flags
& IPFW_FLAG_STATEEXP
,
1878 ("stateexp is not in progress"));
1879 ctx
->ipfw_flags
&= ~IPFW_FLAG_STATEEXP
;
1880 callout_reset(&ctx
->ipfw_stateto_ch
, hz
,
1881 ipfw_state_expire_ipifunc
, NULL
);
1885 ipfw_state_expire_more(struct ipfw_context
*ctx
)
1887 struct netmsg_base
*nm
= &ctx
->ipfw_stateexp_more
;
1889 KASSERT(ctx
->ipfw_flags
& IPFW_FLAG_STATEEXP
,
1890 ("stateexp is not in progress"));
1891 KASSERT(nm
->lmsg
.ms_flags
& MSGF_DONE
,
1892 ("stateexp more did not finish"));
1893 netisr_sendmsg_oncpu(nm
);
1897 ipfw_state_expire_loop(struct ipfw_context
*ctx
, struct ipfw_state
*anchor
,
1898 int scan_max
, int expire_max
)
1900 struct ipfw_state
*s
;
1901 int scanned
= 0, expired
= 0;
1903 KASSERT(ctx
->ipfw_flags
& IPFW_FLAG_STATEEXP
,
1904 ("stateexp is not in progress"));
1906 while ((s
= TAILQ_NEXT(anchor
, st_link
)) != NULL
) {
1907 if (scanned
++ >= scan_max
) {
1908 ipfw_state_expire_more(ctx
);
1912 TAILQ_REMOVE(&ctx
->ipfw_state_list
, anchor
, st_link
);
1913 TAILQ_INSERT_AFTER(&ctx
->ipfw_state_list
, s
, anchor
, st_link
);
1915 if (IPFW_STATE_SCANSKIP(s
))
1918 if (IPFW_STATE_ISDEAD(s
) ||
1919 ((ctx
->ipfw_flags
& IPFW_FLAG_STATEREAP
) &&
1920 IPFW_STATE_TCPCLOSED(s
))) {
1921 ipfw_state_del(ctx
, s
);
1922 if (++expired
>= expire_max
) {
1923 ipfw_state_expire_more(ctx
);
1926 if ((ctx
->ipfw_flags
& IPFW_FLAG_STATEREAP
) &&
1927 (expired
& 0xff) == 0 &&
1928 ipfw_state_cntcoll() + ipfw_state_headroom
<=
1930 ipfw_state_expire_more(ctx
);
1935 TAILQ_REMOVE(&ctx
->ipfw_state_list
, anchor
, st_link
);
1936 ipfw_state_expire_done(ctx
);
1941 ipfw_state_expire_more_dispatch(netmsg_t nm
)
1943 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
1944 struct ipfw_state
*anchor
;
1946 ASSERT_NETISR_NCPUS(mycpuid
);
1947 KASSERT(ctx
->ipfw_flags
& IPFW_FLAG_STATEEXP
,
1948 ("statexp is not in progress"));
1951 netisr_replymsg(&nm
->base
, 0);
1953 anchor
= &ctx
->ipfw_stateexp_anch
;
1954 if (ctx
->ipfw_state_cnt
== 0) {
1955 TAILQ_REMOVE(&ctx
->ipfw_state_list
, anchor
, st_link
);
1956 ipfw_state_expire_done(ctx
);
1959 ipfw_state_expire_loop(ctx
, anchor
,
1960 ipfw_state_scan_max
, ipfw_state_expire_max
);
1964 ipfw_state_expire_start(struct ipfw_context
*ctx
, int scan_max
, int expire_max
)
1966 struct ipfw_state
*anchor
;
1968 KASSERT((ctx
->ipfw_flags
& IPFW_FLAG_STATEEXP
) == 0,
1969 ("stateexp is in progress"));
1970 ctx
->ipfw_flags
|= IPFW_FLAG_STATEEXP
;
1972 if (ctx
->ipfw_state_cnt
== 0) {
1973 ipfw_state_expire_done(ctx
);
1978 * Do not expire more than once per second, it is useless.
1980 if ((ctx
->ipfw_flags
& IPFW_FLAG_STATEREAP
) == 0 &&
1981 ctx
->ipfw_state_lastexp
== time_uptime
) {
1982 ipfw_state_expire_done(ctx
);
1985 ctx
->ipfw_state_lastexp
= time_uptime
;
1987 anchor
= &ctx
->ipfw_stateexp_anch
;
1988 TAILQ_INSERT_HEAD(&ctx
->ipfw_state_list
, anchor
, st_link
);
1989 return (ipfw_state_expire_loop(ctx
, anchor
, scan_max
, expire_max
));
1993 ipfw_state_expire_dispatch(netmsg_t nm
)
1995 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
1997 ASSERT_NETISR_NCPUS(mycpuid
);
2001 netisr_replymsg(&nm
->base
, 0);
2004 if (ctx
->ipfw_flags
& IPFW_FLAG_STATEEXP
) {
2005 /* Running; done. */
2008 ipfw_state_expire_start(ctx
,
2009 ipfw_state_scan_max
, ipfw_state_expire_max
);
2013 ipfw_state_expire_ipifunc(void *dummy __unused
)
2015 struct netmsg_base
*msg
;
2017 KKASSERT(mycpuid
< netisr_ncpus
);
2018 msg
= &ipfw_ctx
[mycpuid
]->ipfw_stateexp_nm
;
2021 if (msg
->lmsg
.ms_flags
& MSGF_DONE
)
2022 netisr_sendmsg_oncpu(msg
);
2027 ipfw_state_update_tcp(struct ipfw_state
*s
, int dir
, const struct tcphdr
*tcp
)
2029 uint32_t seq
= ntohl(tcp
->th_seq
);
2030 uint32_t ack
= ntohl(tcp
->th_ack
);
2032 if (tcp
->th_flags
& TH_RST
)
2035 if (dir
== MATCH_FORWARD
) {
2036 if ((s
->st_flags
& IPFW_STATE_F_SEQFWD
) == 0) {
2037 s
->st_flags
|= IPFW_STATE_F_SEQFWD
;
2038 s
->st_seq_fwd
= seq
;
2039 } else if (SEQ_GEQ(seq
, s
->st_seq_fwd
)) {
2040 s
->st_seq_fwd
= seq
;
2042 /* Out-of-sequence; done. */
2045 if (tcp
->th_flags
& TH_ACK
) {
2046 if ((s
->st_flags
& IPFW_STATE_F_ACKFWD
) == 0) {
2047 s
->st_flags
|= IPFW_STATE_F_ACKFWD
;
2048 s
->st_ack_fwd
= ack
;
2049 } else if (SEQ_GEQ(ack
, s
->st_ack_fwd
)) {
2050 s
->st_ack_fwd
= ack
;
2052 /* Out-of-sequence; done. */
2056 if ((s
->st_state
& ((TH_FIN
| TH_ACK
) << 8)) ==
2057 (TH_FIN
<< 8) && s
->st_ack_fwd
== s
->st_seq_rev
+ 1)
2058 s
->st_state
|= (TH_ACK
<< 8);
2061 if ((s
->st_flags
& IPFW_STATE_F_SEQREV
) == 0) {
2062 s
->st_flags
|= IPFW_STATE_F_SEQREV
;
2063 s
->st_seq_rev
= seq
;
2064 } else if (SEQ_GEQ(seq
, s
->st_seq_rev
)) {
2065 s
->st_seq_rev
= seq
;
2067 /* Out-of-sequence; done. */
2070 if (tcp
->th_flags
& TH_ACK
) {
2071 if ((s
->st_flags
& IPFW_STATE_F_ACKREV
) == 0) {
2072 s
->st_flags
|= IPFW_STATE_F_ACKREV
;
2074 } else if (SEQ_GEQ(ack
, s
->st_ack_rev
)) {
2075 s
->st_ack_rev
= ack
;
2077 /* Out-of-sequence; done. */
2081 if ((s
->st_state
& (TH_FIN
| TH_ACK
)) == TH_FIN
&&
2082 s
->st_ack_rev
== s
->st_seq_fwd
+ 1)
2083 s
->st_state
|= TH_ACK
;
2090 ipfw_state_update(const struct ipfw_flow_id
*pkt
, int dir
,
2091 const struct tcphdr
*tcp
, struct ipfw_state
*s
)
2094 if (pkt
->proto
== IPPROTO_TCP
) { /* update state according to flags */
2095 u_char flags
= pkt
->flags
& IPFW_STATE_TCPFLAGS
;
2097 if (tcp
!= NULL
&& !ipfw_state_update_tcp(s
, dir
, tcp
))
2100 s
->st_state
|= (dir
== MATCH_FORWARD
) ? flags
: (flags
<< 8);
2101 switch (s
->st_state
& IPFW_STATE_TCPSTATES
) {
2102 case TH_SYN
: /* opening */
2103 s
->st_expire
= time_uptime
+ dyn_syn_lifetime
;
2106 case BOTH_SYN
: /* move to established */
2107 case BOTH_SYN
| TH_FIN
: /* one side tries to close */
2108 case BOTH_SYN
| (TH_FIN
<< 8):
2109 s
->st_expire
= time_uptime
+ dyn_ack_lifetime
;
2112 case BOTH_SYN
| BOTH_FIN
: /* both sides closed */
2113 if ((s
->st_state
& BOTH_FINACK
) == BOTH_FINACK
) {
2114 /* And both FINs were ACKed. */
2115 s
->st_expire
= time_uptime
+ dyn_fin_lifetime
;
2117 s
->st_expire
= time_uptime
+
2118 dyn_finwait_lifetime
;
2125 * reset or some invalid combination, but can also
2126 * occur if we use keep-state the wrong way.
2128 if ((s
->st_state
& ((TH_RST
<< 8) | TH_RST
)) == 0)
2129 kprintf("invalid state: 0x%x\n", s
->st_state
);
2131 s
->st_expire
= time_uptime
+ dyn_rst_lifetime
;
2134 } else if (pkt
->proto
== IPPROTO_UDP
) {
2135 s
->st_expire
= time_uptime
+ dyn_udp_lifetime
;
2137 /* other protocols */
2138 s
->st_expire
= time_uptime
+ dyn_short_lifetime
;
2145 static struct ipfw_state
*
2146 ipfw_state_lookup(struct ipfw_context
*ctx
, const struct ipfw_flow_id
*pkt
,
2147 int *match_direction
, const struct tcphdr
*tcp
)
2149 struct ipfw_state
*key
, *s
;
2150 int dir
= MATCH_NONE
;
2152 key
= &ctx
->ipfw_state_tmpkey
;
2153 ipfw_key_build(&key
->st_key
, pkt
->src_ip
, pkt
->src_port
,
2154 pkt
->dst_ip
, pkt
->dst_port
, pkt
->proto
);
2155 s
= RB_FIND(ipfw_state_tree
, &ctx
->ipfw_state_tree
, key
);
2157 goto done
; /* not found. */
2158 if (IPFW_STATE_ISDEAD(s
)) {
2159 ipfw_state_remove(ctx
, s
);
2163 if ((pkt
->flags
& TH_SYN
) && IPFW_STATE_TCPCLOSED(s
)) {
2164 /* TCP ports recycling is too fast. */
2165 ctx
->ipfw_sts_tcprecycled
++;
2166 ipfw_state_remove(ctx
, s
);
2171 if (s
->st_swap
== key
->st_swap
) {
2172 dir
= MATCH_FORWARD
;
2174 KASSERT((s
->st_swap
& key
->st_swap
) == 0,
2175 ("found mismatch state"));
2176 dir
= MATCH_REVERSE
;
2179 /* Update this state. */
2180 ipfw_state_update(pkt
, dir
, tcp
, s
);
2182 if (s
->st_track
!= NULL
) {
2183 /* This track has been used. */
2184 s
->st_track
->t_expire
= time_uptime
+ dyn_short_lifetime
;
2187 if (match_direction
)
2188 *match_direction
= dir
;
2192 static struct ipfw_state
*
2193 ipfw_state_alloc(struct ipfw_context
*ctx
, const struct ipfw_flow_id
*id
,
2194 uint16_t type
, struct ip_fw
*rule
, const struct tcphdr
*tcp
)
2196 struct ipfw_state
*s
;
2199 KASSERT(type
== O_KEEP_STATE
|| type
== O_LIMIT
|| IPFW_ISXLAT(type
),
2200 ("invalid state type %u", type
));
2202 sz
= sizeof(struct ipfw_state
);
2203 if (IPFW_ISXLAT(type
))
2204 sz
= sizeof(struct ipfw_xlat
);
2206 s
= kmalloc(sz
, M_IPFW
, M_INTWAIT
| M_NULLOK
| M_ZERO
);
2208 ctx
->ipfw_sts_nomem
++;
2212 ipfw_key_build(&s
->st_key
, id
->src_ip
, id
->src_port
,
2213 id
->dst_ip
, id
->dst_port
, id
->proto
);
2217 if (IPFW_ISXLAT(type
)) {
2218 struct ipfw_xlat
*x
= (struct ipfw_xlat
*)s
;
2220 x
->xlat_dir
= MATCH_NONE
;
2225 * Update this state:
2226 * Set st_expire and st_state.
2228 ipfw_state_update(id
, MATCH_FORWARD
, tcp
, s
);
2233 static struct ipfw_state
*
2234 ipfw_state_add(struct ipfw_context
*ctx
, const struct ipfw_flow_id
*id
,
2235 uint16_t type
, struct ip_fw
*rule
, struct ipfw_track
*t
,
2236 const struct tcphdr
*tcp
)
2238 struct ipfw_state
*s
, *dup
;
2240 s
= ipfw_state_alloc(ctx
, id
, type
, rule
, tcp
);
2244 ctx
->ipfw_state_cnt
++;
2245 ctx
->ipfw_state_loosecnt
++;
2246 if (ctx
->ipfw_state_loosecnt
>= ipfw_state_loosecnt_updthr
) {
2247 ipfw_gd
.ipfw_state_loosecnt
+= ctx
->ipfw_state_loosecnt
;
2248 ctx
->ipfw_state_loosecnt
= 0;
2251 dup
= ipfw_state_link(ctx
, s
);
2253 panic("ipfw: %u state exists %p", type
, dup
);
2256 /* Keep the track referenced. */
2257 LIST_INSERT_HEAD(&t
->t_state_list
, s
, st_trklink
);
2264 ipfw_track_free(struct ipfw_context
*ctx
, struct ipfw_track
*t
)
2266 struct ipfw_trkcnt
*trk
;
2267 boolean_t trk_freed
= FALSE
;
2269 KASSERT(t
->t_count
!= NULL
, ("track anchor"));
2270 KASSERT(LIST_EMPTY(&t
->t_state_list
),
2271 ("invalid track is still referenced"));
2274 KASSERT(trk
!= NULL
, ("track has no trkcnt"));
2276 RB_REMOVE(ipfw_track_tree
, &ctx
->ipfw_track_tree
, t
);
2277 TAILQ_REMOVE(&ctx
->ipfw_track_list
, t
, t_link
);
2281 * fdrop() style reference counting.
2282 * See kern/kern_descrip.c fdrop().
2285 int refs
= trk
->tc_refs
;
2288 KASSERT(refs
> 0, ("invalid trkcnt refs %d", refs
));
2291 if (atomic_cmpset_int(&trk
->tc_refs
, refs
, 0)) {
2292 KASSERT(trk
->tc_count
== 0,
2293 ("%d states reference this trkcnt",
2295 RB_REMOVE(ipfw_trkcnt_tree
,
2296 &ipfw_gd
.ipfw_trkcnt_tree
, trk
);
2298 KASSERT(ipfw_gd
.ipfw_trkcnt_cnt
> 0,
2299 ("invalid trkcnt cnt %d",
2300 ipfw_gd
.ipfw_trkcnt_cnt
));
2301 ipfw_gd
.ipfw_trkcnt_cnt
--;
2304 if (ctx
->ipfw_trkcnt_spare
== NULL
)
2305 ctx
->ipfw_trkcnt_spare
= trk
;
2313 } else if (atomic_cmpset_int(&trk
->tc_refs
, refs
, refs
- 1)) {
2322 ipfw_track_flush(struct ipfw_context
*ctx
, struct ip_fw
*rule
)
2324 struct ipfw_track
*t
, *tn
;
2326 TAILQ_FOREACH_MUTABLE(t
, &ctx
->ipfw_track_list
, t_link
, tn
) {
2327 if (t
->t_count
== NULL
) /* anchor */
2329 if (rule
!= NULL
&& t
->t_rule
!= rule
)
2331 ipfw_track_free(ctx
, t
);
2336 ipfw_track_state_expire(struct ipfw_context
*ctx
, struct ipfw_track
*t
,
2339 struct ipfw_state
*s
, *sn
;
2340 boolean_t ret
= FALSE
;
2342 KASSERT(t
->t_count
!= NULL
, ("track anchor"));
2344 if (LIST_EMPTY(&t
->t_state_list
))
2348 * Do not expire more than once per second, it is useless.
2350 if (t
->t_lastexp
== time_uptime
)
2352 t
->t_lastexp
= time_uptime
;
2354 LIST_FOREACH_MUTABLE(s
, &t
->t_state_list
, st_trklink
, sn
) {
2355 if (IPFW_STATE_ISDEAD(s
) || (reap
&& IPFW_STATE_TCPCLOSED(s
))) {
2356 KASSERT(s
->st_track
== t
,
2357 ("state track %p does not match %p",
2359 ipfw_state_del(ctx
, s
);
2366 static __inline
struct ipfw_trkcnt
*
2367 ipfw_trkcnt_alloc(struct ipfw_context
*ctx
)
2369 struct ipfw_trkcnt
*trk
;
2371 if (ctx
->ipfw_trkcnt_spare
!= NULL
) {
2372 trk
= ctx
->ipfw_trkcnt_spare
;
2373 ctx
->ipfw_trkcnt_spare
= NULL
;
2375 trk
= kmalloc_cachealign(sizeof(*trk
), M_IPFW
,
2376 M_INTWAIT
| M_NULLOK
);
2382 ipfw_track_expire_done(struct ipfw_context
*ctx
)
2385 KASSERT(ctx
->ipfw_flags
& IPFW_FLAG_TRACKEXP
,
2386 ("trackexp is not in progress"));
2387 ctx
->ipfw_flags
&= ~IPFW_FLAG_TRACKEXP
;
2388 callout_reset(&ctx
->ipfw_trackto_ch
, hz
,
2389 ipfw_track_expire_ipifunc
, NULL
);
2393 ipfw_track_expire_more(struct ipfw_context
*ctx
)
2395 struct netmsg_base
*nm
= &ctx
->ipfw_trackexp_more
;
2397 KASSERT(ctx
->ipfw_flags
& IPFW_FLAG_TRACKEXP
,
2398 ("trackexp is not in progress"));
2399 KASSERT(nm
->lmsg
.ms_flags
& MSGF_DONE
,
2400 ("trackexp more did not finish"));
2401 netisr_sendmsg_oncpu(nm
);
2405 ipfw_track_expire_loop(struct ipfw_context
*ctx
, struct ipfw_track
*anchor
,
2406 int scan_max
, int expire_max
)
2408 struct ipfw_track
*t
;
2409 int scanned
= 0, expired
= 0;
2410 boolean_t reap
= FALSE
;
2412 KASSERT(ctx
->ipfw_flags
& IPFW_FLAG_TRACKEXP
,
2413 ("trackexp is not in progress"));
2415 if (ctx
->ipfw_flags
& IPFW_FLAG_TRACKREAP
)
2418 while ((t
= TAILQ_NEXT(anchor
, t_link
)) != NULL
) {
2419 if (scanned
++ >= scan_max
) {
2420 ipfw_track_expire_more(ctx
);
2424 TAILQ_REMOVE(&ctx
->ipfw_track_list
, anchor
, t_link
);
2425 TAILQ_INSERT_AFTER(&ctx
->ipfw_track_list
, t
, anchor
, t_link
);
2427 if (t
->t_count
== NULL
) /* anchor */
2430 ipfw_track_state_expire(ctx
, t
, reap
);
2431 if (!LIST_EMPTY(&t
->t_state_list
)) {
2432 /* There are states referencing this track. */
2436 if (TIME_LEQ(t
->t_expire
, time_uptime
) || reap
) {
2438 if (ipfw_track_free(ctx
, t
)) {
2439 if (++expired
>= expire_max
) {
2440 ipfw_track_expire_more(ctx
);
2446 TAILQ_REMOVE(&ctx
->ipfw_track_list
, anchor
, t_link
);
2447 ipfw_track_expire_done(ctx
);
2452 ipfw_track_expire_start(struct ipfw_context
*ctx
, int scan_max
, int expire_max
)
2454 struct ipfw_track
*anchor
;
2456 KASSERT((ctx
->ipfw_flags
& IPFW_FLAG_TRACKEXP
) == 0,
2457 ("trackexp is in progress"));
2458 ctx
->ipfw_flags
|= IPFW_FLAG_TRACKEXP
;
2460 if (RB_EMPTY(&ctx
->ipfw_track_tree
)) {
2461 ipfw_track_expire_done(ctx
);
2466 * Do not expire more than once per second, it is useless.
2468 if ((ctx
->ipfw_flags
& IPFW_FLAG_TRACKREAP
) == 0 &&
2469 ctx
->ipfw_track_lastexp
== time_uptime
) {
2470 ipfw_track_expire_done(ctx
);
2473 ctx
->ipfw_track_lastexp
= time_uptime
;
2475 anchor
= &ctx
->ipfw_trackexp_anch
;
2476 TAILQ_INSERT_HEAD(&ctx
->ipfw_track_list
, anchor
, t_link
);
2477 return (ipfw_track_expire_loop(ctx
, anchor
, scan_max
, expire_max
));
2481 ipfw_track_expire_more_dispatch(netmsg_t nm
)
2483 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
2484 struct ipfw_track
*anchor
;
2486 ASSERT_NETISR_NCPUS(mycpuid
);
2487 KASSERT(ctx
->ipfw_flags
& IPFW_FLAG_TRACKEXP
,
2488 ("trackexp is not in progress"));
2491 netisr_replymsg(&nm
->base
, 0);
2493 anchor
= &ctx
->ipfw_trackexp_anch
;
2494 if (RB_EMPTY(&ctx
->ipfw_track_tree
)) {
2495 TAILQ_REMOVE(&ctx
->ipfw_track_list
, anchor
, t_link
);
2496 ipfw_track_expire_done(ctx
);
2499 ipfw_track_expire_loop(ctx
, anchor
,
2500 ipfw_track_scan_max
, ipfw_track_expire_max
);
2504 ipfw_track_expire_dispatch(netmsg_t nm
)
2506 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
2508 ASSERT_NETISR_NCPUS(mycpuid
);
2512 netisr_replymsg(&nm
->base
, 0);
2515 if (ctx
->ipfw_flags
& IPFW_FLAG_TRACKEXP
) {
2516 /* Running; done. */
2519 ipfw_track_expire_start(ctx
,
2520 ipfw_track_scan_max
, ipfw_track_expire_max
);
2524 ipfw_track_expire_ipifunc(void *dummy __unused
)
2526 struct netmsg_base
*msg
;
2528 KKASSERT(mycpuid
< netisr_ncpus
);
2529 msg
= &ipfw_ctx
[mycpuid
]->ipfw_trackexp_nm
;
2532 if (msg
->lmsg
.ms_flags
& MSGF_DONE
)
2533 netisr_sendmsg_oncpu(msg
);
2538 ipfw_track_reap(struct ipfw_context
*ctx
)
2540 struct ipfw_track
*t
, *anchor
;
2543 if ((ctx
->ipfw_flags
& IPFW_FLAG_TRACKEXP
) == 0) {
2545 * Kick start track expiring. Ignore scan limit,
2546 * we are short of tracks.
2548 ctx
->ipfw_flags
|= IPFW_FLAG_TRACKREAP
;
2549 expired
= ipfw_track_expire_start(ctx
, INT_MAX
,
2550 ipfw_track_reap_max
);
2551 ctx
->ipfw_flags
&= ~IPFW_FLAG_TRACKREAP
;
2556 * Tracks are being expired.
2559 if (RB_EMPTY(&ctx
->ipfw_track_tree
))
2563 anchor
= &ctx
->ipfw_trackexp_anch
;
2564 while ((t
= TAILQ_NEXT(anchor
, t_link
)) != NULL
) {
2566 * Ignore scan limit; we are short of tracks.
2569 TAILQ_REMOVE(&ctx
->ipfw_track_list
, anchor
, t_link
);
2570 TAILQ_INSERT_AFTER(&ctx
->ipfw_track_list
, t
, anchor
, t_link
);
2572 if (t
->t_count
== NULL
) /* anchor */
2575 ipfw_track_state_expire(ctx
, t
, TRUE
);
2576 if (!LIST_EMPTY(&t
->t_state_list
)) {
2577 /* There are states referencing this track. */
2581 if (ipfw_track_free(ctx
, t
)) {
2582 if (++expired
>= ipfw_track_reap_max
) {
2583 ipfw_track_expire_more(ctx
);
2590 * Leave the anchor on the list, even if the end of the list has
2591 * been reached. ipfw_track_expire_more_dispatch() will handle
2597 static struct ipfw_track
*
2598 ipfw_track_alloc(struct ipfw_context
*ctx
, const struct ipfw_flow_id
*id
,
2599 uint16_t limit_mask
, struct ip_fw
*rule
)
2601 struct ipfw_track
*key
, *t
, *dup
;
2602 struct ipfw_trkcnt
*trk
, *ret
;
2603 boolean_t do_expire
= FALSE
;
2605 KASSERT(rule
->track_ruleid
!= 0,
2606 ("rule %u has no track ruleid", rule
->rulenum
));
2608 key
= &ctx
->ipfw_track_tmpkey
;
2609 key
->t_proto
= id
->proto
;
2613 if (limit_mask
& DYN_SRC_ADDR
)
2614 key
->t_saddr
= id
->src_ip
;
2615 if (limit_mask
& DYN_DST_ADDR
)
2616 key
->t_daddr
= id
->dst_ip
;
2617 if (limit_mask
& DYN_SRC_PORT
)
2618 key
->t_sport
= id
->src_port
;
2619 if (limit_mask
& DYN_DST_PORT
)
2620 key
->t_dport
= id
->dst_port
;
2622 t
= RB_FIND(ipfw_track_tree
, &ctx
->ipfw_track_tree
, key
);
2626 t
= kmalloc(sizeof(*t
), M_IPFW
, M_INTWAIT
| M_NULLOK
);
2628 ctx
->ipfw_tks_nomem
++;
2632 t
->t_key
= key
->t_key
;
2635 LIST_INIT(&t
->t_state_list
);
2637 if (ipfw_gd
.ipfw_trkcnt_cnt
>= ipfw_track_max
) {
2638 time_t globexp
, uptime
;
2644 * Do not expire globally more than once per second,
2647 uptime
= time_uptime
;
2648 globexp
= ipfw_gd
.ipfw_track_globexp
;
2649 if (globexp
!= uptime
&&
2650 atomic_cmpset_long(&ipfw_gd
.ipfw_track_globexp
,
2654 /* Expire tracks on other CPUs. */
2655 for (cpu
= 0; cpu
< netisr_ncpus
; ++cpu
) {
2658 lwkt_send_ipiq(globaldata_find(cpu
),
2659 ipfw_track_expire_ipifunc
, NULL
);
2663 trk
= ipfw_trkcnt_alloc(ctx
);
2666 struct ipfw_trkcnt
*tkey
;
2668 tkey
= &ctx
->ipfw_trkcnt_tmpkey
;
2669 key
= NULL
; /* tkey overlaps key */
2671 tkey
->tc_key
= t
->t_key
;
2672 tkey
->tc_ruleid
= rule
->track_ruleid
;
2675 trk
= RB_FIND(ipfw_trkcnt_tree
, &ipfw_gd
.ipfw_trkcnt_tree
,
2680 ctx
->ipfw_tks_reap
++;
2681 if (ipfw_track_reap(ctx
) > 0) {
2682 if (ipfw_gd
.ipfw_trkcnt_cnt
<
2684 trk
= ipfw_trkcnt_alloc(ctx
);
2687 ctx
->ipfw_tks_cntnomem
++;
2689 ctx
->ipfw_tks_overflow
++;
2692 ctx
->ipfw_tks_reapfailed
++;
2693 ctx
->ipfw_tks_overflow
++;
2696 ctx
->ipfw_tks_cntnomem
++;
2701 KASSERT(trk
->tc_refs
> 0 && trk
->tc_refs
< netisr_ncpus
,
2702 ("invalid trkcnt refs %d", trk
->tc_refs
));
2703 atomic_add_int(&trk
->tc_refs
, 1);
2707 trk
->tc_key
= t
->t_key
;
2708 trk
->tc_ruleid
= rule
->track_ruleid
;
2712 trk
->tc_rulenum
= rule
->rulenum
;
2715 ret
= RB_INSERT(ipfw_trkcnt_tree
, &ipfw_gd
.ipfw_trkcnt_tree
,
2718 KASSERT(ret
->tc_refs
> 0 &&
2719 ret
->tc_refs
< netisr_ncpus
,
2720 ("invalid trkcnt refs %d", ret
->tc_refs
));
2721 KASSERT(ctx
->ipfw_trkcnt_spare
== NULL
,
2722 ("trkcnt spare was installed"));
2723 ctx
->ipfw_trkcnt_spare
= trk
;
2726 ipfw_gd
.ipfw_trkcnt_cnt
++;
2728 atomic_add_int(&trk
->tc_refs
, 1);
2731 t
->t_count
= &trk
->tc_count
;
2734 dup
= RB_INSERT(ipfw_track_tree
, &ctx
->ipfw_track_tree
, t
);
2736 panic("ipfw: track exists");
2737 TAILQ_INSERT_TAIL(&ctx
->ipfw_track_list
, t
, t_link
);
2739 t
->t_expire
= time_uptime
+ dyn_short_lifetime
;
2744 * Install state for rule type cmd->o.opcode
2746 * Returns NULL if state is not installed because of errors or because
2747 * states limitations are enforced.
2749 static struct ipfw_state
*
2750 ipfw_state_install(struct ipfw_context
*ctx
, struct ip_fw
*rule
,
2751 ipfw_insn_limit
*cmd
, struct ip_fw_args
*args
, const struct tcphdr
*tcp
)
2753 struct ipfw_state
*s
;
2754 struct ipfw_track
*t
;
2757 if (ipfw_gd
.ipfw_state_loosecnt
>= ipfw_state_max
&&
2758 (diff
= (ipfw_state_cntsync() - ipfw_state_max
)) >= 0) {
2759 boolean_t overflow
= TRUE
;
2761 ctx
->ipfw_sts_reap
++;
2762 if (ipfw_state_reap(ctx
, diff
) == 0)
2763 ctx
->ipfw_sts_reapfailed
++;
2764 if (ipfw_state_cntsync() < ipfw_state_max
)
2768 time_t globexp
, uptime
;
2772 * Do not expire globally more than once per second,
2775 uptime
= time_uptime
;
2776 globexp
= ipfw_gd
.ipfw_state_globexp
;
2777 if (globexp
== uptime
||
2778 !atomic_cmpset_long(&ipfw_gd
.ipfw_state_globexp
,
2780 ctx
->ipfw_sts_overflow
++;
2784 /* Expire states on other CPUs. */
2785 for (cpu
= 0; cpu
< netisr_ncpus
; ++cpu
) {
2788 lwkt_send_ipiq(globaldata_find(cpu
),
2789 ipfw_state_expire_ipifunc
, NULL
);
2791 ctx
->ipfw_sts_overflow
++;
2796 switch (cmd
->o
.opcode
) {
2797 case O_KEEP_STATE
: /* bidir rule */
2799 s
= ipfw_state_add(ctx
, &args
->f_id
, cmd
->o
.opcode
, rule
, NULL
,
2805 case O_LIMIT
: /* limit number of sessions */
2806 t
= ipfw_track_alloc(ctx
, &args
->f_id
, cmd
->limit_mask
, rule
);
2810 if (*t
->t_count
>= cmd
->conn_limit
) {
2811 if (!ipfw_track_state_expire(ctx
, t
, TRUE
))
2815 count
= *t
->t_count
;
2816 if (count
>= cmd
->conn_limit
)
2818 if (atomic_cmpset_int(t
->t_count
, count
, count
+ 1))
2822 s
= ipfw_state_add(ctx
, &args
->f_id
, O_LIMIT
, rule
, t
, tcp
);
2825 atomic_subtract_int(t
->t_count
, 1);
2831 panic("unknown state type %u\n", cmd
->o
.opcode
);
2834 if (s
->st_type
== O_REDIRECT
) {
2835 struct ipfw_xlat
*x
= (struct ipfw_xlat
*)s
;
2836 ipfw_insn_rdr
*r
= (ipfw_insn_rdr
*)cmd
;
2838 x
->xlat_addr
= r
->addr
.s_addr
;
2839 x
->xlat_port
= r
->port
;
2840 x
->xlat_ifp
= args
->m
->m_pkthdr
.rcvif
;
2841 x
->xlat_dir
= MATCH_FORWARD
;
2842 KKASSERT(x
->xlat_ifp
!= NULL
);
2848 ipfw_table_lookup(struct ipfw_context
*ctx
, uint16_t tableid
,
2849 const struct in_addr
*in
)
2851 struct radix_node_head
*rnh
;
2852 struct sockaddr_in sin
;
2853 struct ipfw_tblent
*te
;
2855 KASSERT(tableid
< ipfw_table_max
, ("invalid tableid %u", tableid
));
2856 rnh
= ctx
->ipfw_tables
[tableid
];
2858 return (0); /* no match */
2860 memset(&sin
, 0, sizeof(sin
));
2861 sin
.sin_family
= AF_INET
;
2862 sin
.sin_len
= sizeof(sin
);
2865 te
= (struct ipfw_tblent
*)rnh
->rnh_matchaddr((char *)&sin
, rnh
);
2867 return (0); /* no match */
2870 te
->te_lastuse
= time_second
;
2871 return (1); /* match */
2875 * Transmit a TCP packet, containing either a RST or a keepalive.
2876 * When flags & TH_RST, we are sending a RST packet, because of a
2877 * "reset" action matched the packet.
2878 * Otherwise we are sending a keepalive, and flags & TH_
2880 * Only {src,dst}_{ip,port} of "id" are used.
2883 send_pkt(const struct ipfw_flow_id
*id
, uint32_t seq
, uint32_t ack
, int flags
)
2888 struct route sro
; /* fake route */
2890 MGETHDR(m
, M_NOWAIT
, MT_HEADER
);
2893 m
->m_pkthdr
.rcvif
= NULL
;
2894 m
->m_pkthdr
.len
= m
->m_len
= sizeof(struct ip
) + sizeof(struct tcphdr
);
2895 m
->m_data
+= max_linkhdr
;
2897 ip
= mtod(m
, struct ip
*);
2898 bzero(ip
, m
->m_len
);
2899 tcp
= (struct tcphdr
*)(ip
+ 1); /* no IP options */
2900 ip
->ip_p
= IPPROTO_TCP
;
2904 * Assume we are sending a RST (or a keepalive in the reverse
2905 * direction), swap src and destination addresses and ports.
2907 ip
->ip_src
.s_addr
= htonl(id
->dst_ip
);
2908 ip
->ip_dst
.s_addr
= htonl(id
->src_ip
);
2909 tcp
->th_sport
= htons(id
->dst_port
);
2910 tcp
->th_dport
= htons(id
->src_port
);
2911 if (flags
& TH_RST
) { /* we are sending a RST */
2912 if (flags
& TH_ACK
) {
2913 tcp
->th_seq
= htonl(ack
);
2914 tcp
->th_ack
= htonl(0);
2915 tcp
->th_flags
= TH_RST
;
2919 tcp
->th_seq
= htonl(0);
2920 tcp
->th_ack
= htonl(seq
);
2921 tcp
->th_flags
= TH_RST
| TH_ACK
;
2925 * We are sending a keepalive. flags & TH_SYN determines
2926 * the direction, forward if set, reverse if clear.
2927 * NOTE: seq and ack are always assumed to be correct
2928 * as set by the caller. This may be confusing...
2930 if (flags
& TH_SYN
) {
2932 * we have to rewrite the correct addresses!
2934 ip
->ip_dst
.s_addr
= htonl(id
->dst_ip
);
2935 ip
->ip_src
.s_addr
= htonl(id
->src_ip
);
2936 tcp
->th_dport
= htons(id
->dst_port
);
2937 tcp
->th_sport
= htons(id
->src_port
);
2939 tcp
->th_seq
= htonl(seq
);
2940 tcp
->th_ack
= htonl(ack
);
2941 tcp
->th_flags
= TH_ACK
;
2945 * set ip_len to the payload size so we can compute
2946 * the tcp checksum on the pseudoheader
2947 * XXX check this, could save a couple of words ?
2949 ip
->ip_len
= htons(sizeof(struct tcphdr
));
2950 tcp
->th_sum
= in_cksum(m
, m
->m_pkthdr
.len
);
2953 * now fill fields left out earlier
2955 ip
->ip_ttl
= ip_defttl
;
2956 ip
->ip_len
= m
->m_pkthdr
.len
;
2958 bzero(&sro
, sizeof(sro
));
2959 ip_rtaddr(ip
->ip_dst
, &sro
);
2961 m
->m_pkthdr
.fw_flags
|= IPFW_MBUF_GENERATED
;
2962 ip_output(m
, NULL
, &sro
, 0, NULL
, NULL
);
2968 * Send a reject message, consuming the mbuf passed as an argument.
2971 send_reject(struct ip_fw_args
*args
, int code
, int offset
, int ip_len
)
2973 if (code
!= ICMP_REJECT_RST
) { /* Send an ICMP unreach */
2974 /* We need the IP header in host order for icmp_error(). */
2975 if (args
->eh
!= NULL
) {
2976 struct ip
*ip
= mtod(args
->m
, struct ip
*);
2978 ip
->ip_len
= ntohs(ip
->ip_len
);
2979 ip
->ip_off
= ntohs(ip
->ip_off
);
2981 icmp_error(args
->m
, ICMP_UNREACH
, code
, 0L, 0);
2982 } else if (offset
== 0 && args
->f_id
.proto
== IPPROTO_TCP
) {
2983 struct tcphdr
*const tcp
=
2984 L3HDR(struct tcphdr
, mtod(args
->m
, struct ip
*));
2986 if ((tcp
->th_flags
& TH_RST
) == 0) {
2987 send_pkt(&args
->f_id
, ntohl(tcp
->th_seq
),
2988 ntohl(tcp
->th_ack
), tcp
->th_flags
| TH_RST
);
2998 * Given an ip_fw *, lookup_next_rule will return a pointer
2999 * to the next rule, which can be either the jump
3000 * target (for skipto instructions) or the next one in the list (in
3001 * all other cases including a missing jump target).
3002 * The result is also written in the "next_rule" field of the rule.
3003 * Backward jumps are not allowed, so start looking from the next
3006 * This never returns NULL -- in case we do not have an exact match,
3007 * the next rule is returned. When the ruleset is changed,
3008 * pointers are flushed so we are always correct.
3010 static struct ip_fw
*
3011 lookup_next_rule(struct ip_fw
*me
)
3013 struct ip_fw
*rule
= NULL
;
3016 /* look for action, in case it is a skipto */
3017 cmd
= ACTION_PTR(me
);
3018 if (cmd
->opcode
== O_LOG
)
3020 if (cmd
->opcode
== O_SKIPTO
) {
3021 for (rule
= me
->next
; rule
; rule
= rule
->next
) {
3022 if (rule
->rulenum
>= cmd
->arg1
)
3026 if (rule
== NULL
) /* failure or not a skipto */
3028 me
->next_rule
= rule
;
3033 ipfw_match_uid(const struct ipfw_flow_id
*fid
, struct ifnet
*oif
,
3034 enum ipfw_opcodes opcode
, uid_t uid
)
3036 struct in_addr src_ip
, dst_ip
;
3037 struct inpcbinfo
*pi
;
3041 if (fid
->proto
== IPPROTO_TCP
) {
3043 pi
= &tcbinfo
[mycpuid
];
3044 } else if (fid
->proto
== IPPROTO_UDP
) {
3046 pi
= &udbinfo
[mycpuid
];
3052 * Values in 'fid' are in host byte order
3054 dst_ip
.s_addr
= htonl(fid
->dst_ip
);
3055 src_ip
.s_addr
= htonl(fid
->src_ip
);
3057 pcb
= in_pcblookup_hash(pi
,
3058 dst_ip
, htons(fid
->dst_port
),
3059 src_ip
, htons(fid
->src_port
),
3062 pcb
= in_pcblookup_hash(pi
,
3063 src_ip
, htons(fid
->src_port
),
3064 dst_ip
, htons(fid
->dst_port
),
3067 if (pcb
== NULL
|| pcb
->inp_socket
== NULL
)
3070 if (opcode
== O_UID
) {
3071 #define socheckuid(a,b) ((a)->so_cred->cr_uid != (b))
3072 return !socheckuid(pcb
->inp_socket
, uid
);
3075 return groupmember(uid
, pcb
->inp_socket
->so_cred
);
3080 ipfw_match_ifip(ipfw_insn_ifip
*cmd
, const struct in_addr
*ip
)
3083 if (__predict_false((cmd
->o
.arg1
& IPFW_IFIP_VALID
) == 0)) {
3084 struct ifaddr_container
*ifac
;
3087 ifp
= ifunit_netisr(cmd
->ifname
);
3091 TAILQ_FOREACH(ifac
, &ifp
->if_addrheads
[mycpuid
], ifa_link
) {
3092 struct ifaddr
*ia
= ifac
->ifa
;
3094 if (ia
->ifa_addr
== NULL
)
3096 if (ia
->ifa_addr
->sa_family
!= AF_INET
)
3099 cmd
->mask
.s_addr
= INADDR_ANY
;
3100 if (cmd
->o
.arg1
& IPFW_IFIP_NET
) {
3101 cmd
->mask
= ((struct sockaddr_in
*)
3102 ia
->ifa_netmask
)->sin_addr
;
3104 if (cmd
->mask
.s_addr
== INADDR_ANY
)
3105 cmd
->mask
.s_addr
= INADDR_BROADCAST
;
3108 ((struct sockaddr_in
*)ia
->ifa_addr
)->sin_addr
;
3109 cmd
->addr
.s_addr
&= cmd
->mask
.s_addr
;
3111 cmd
->o
.arg1
|= IPFW_IFIP_VALID
;
3114 if ((cmd
->o
.arg1
& IPFW_IFIP_VALID
) == 0)
3117 return ((ip
->s_addr
& cmd
->mask
.s_addr
) == cmd
->addr
.s_addr
);
3121 ipfw_xlate(const struct ipfw_xlat
*x
, struct mbuf
*m
,
3122 struct in_addr
*old_addr
, uint16_t *old_port
)
3124 struct ip
*ip
= mtod(m
, struct ip
*);
3125 struct in_addr
*addr
;
3126 uint16_t *port
, *csum
, dlen
= 0;
3128 boolean_t pseudo
= FALSE
;
3130 if (x
->xlat_flags
& IPFW_STATE_F_XLATSRC
) {
3134 port
= &L3HDR(struct tcphdr
, ip
)->th_sport
;
3135 csum
= &L3HDR(struct tcphdr
, ip
)->th_sum
;
3138 port
= &L3HDR(struct udphdr
, ip
)->uh_sport
;
3139 csum
= &L3HDR(struct udphdr
, ip
)->uh_sum
;
3143 panic("ipfw: unsupported src xlate proto %u", ip
->ip_p
);
3149 port
= &L3HDR(struct tcphdr
, ip
)->th_dport
;
3150 csum
= &L3HDR(struct tcphdr
, ip
)->th_sum
;
3153 port
= &L3HDR(struct udphdr
, ip
)->uh_dport
;
3154 csum
= &L3HDR(struct udphdr
, ip
)->uh_sum
;
3158 panic("ipfw: unsupported dst xlate proto %u", ip
->ip_p
);
3161 if (old_addr
!= NULL
)
3163 if (old_port
!= NULL
) {
3164 if (x
->xlat_port
!= 0)
3170 if (m
->m_pkthdr
.csum_flags
& (CSUM_UDP
| CSUM_TCP
| CSUM_TSO
)) {
3171 if ((m
->m_pkthdr
.csum_flags
& CSUM_TSO
) == 0)
3172 dlen
= ip
->ip_len
- (ip
->ip_hl
<< 2);
3177 const uint16_t *oaddr
, *naddr
;
3179 oaddr
= (const uint16_t *)&addr
->s_addr
;
3180 naddr
= (const uint16_t *)&x
->xlat_addr
;
3182 ip
->ip_sum
= pfil_cksum_fixup(pfil_cksum_fixup(ip
->ip_sum
,
3183 oaddr
[0], naddr
[0], 0), oaddr
[1], naddr
[1], 0);
3184 *csum
= pfil_cksum_fixup(pfil_cksum_fixup(*csum
,
3185 oaddr
[0], naddr
[0], udp
), oaddr
[1], naddr
[1], udp
);
3187 addr
->s_addr
= x
->xlat_addr
;
3189 if (x
->xlat_port
!= 0) {
3191 *csum
= pfil_cksum_fixup(*csum
, *port
, x
->xlat_port
,
3194 *port
= x
->xlat_port
;
3198 *csum
= in_pseudo(ip
->ip_src
.s_addr
, ip
->ip_dst
.s_addr
,
3199 htons(dlen
+ ip
->ip_p
));
3204 ipfw_ip_xlate_dispatch(netmsg_t nmsg
)
3206 struct netmsg_genpkt
*nm
= (struct netmsg_genpkt
*)nmsg
;
3207 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
3208 struct mbuf
*m
= nm
->m
;
3209 struct ipfw_xlat
*x
= nm
->arg1
;
3210 struct ip_fw
*rule
= x
->xlat_rule
;
3212 ASSERT_NETISR_NCPUS(mycpuid
);
3213 KASSERT(rule
->cpuid
== mycpuid
,
3214 ("rule does not belong to cpu%d", mycpuid
));
3215 KASSERT(m
->m_pkthdr
.fw_flags
& IPFW_MBUF_CONTINUE
,
3216 ("mbuf does not have ipfw continue rule"));
3218 KASSERT(ctx
->ipfw_cont_rule
== NULL
,
3219 ("pending ipfw continue rule"));
3220 KASSERT(ctx
->ipfw_cont_xlat
== NULL
,
3221 ("pending ipfw continue xlat"));
3222 ctx
->ipfw_cont_rule
= rule
;
3223 ctx
->ipfw_cont_xlat
= x
;
3228 ip_output(m
, NULL
, NULL
, IP_FORWARDING
, NULL
, NULL
);
3230 /* May not be cleared, if ipfw was unload/disabled. */
3231 ctx
->ipfw_cont_rule
= NULL
;
3232 ctx
->ipfw_cont_xlat
= NULL
;
3235 * This state is no longer used; decrement its xlat_crefs,
3236 * so this state can be deleted.
3240 * This rule is no longer used; decrement its cross_refs,
3241 * so this rule can be deleted.
3244 * Decrement cross_refs in the last step of this function,
3245 * so that the module could be unloaded safely.
3251 ipfw_xlate_redispatch(struct mbuf
*m
, int cpuid
, struct ipfw_xlat
*x
,
3254 struct netmsg_genpkt
*nm
;
3256 KASSERT(x
->xlat_pcpu
== cpuid
, ("xlat paired cpu%d, target cpu%d",
3257 x
->xlat_pcpu
, cpuid
));
3260 * Bump cross_refs to prevent this rule and its siblings
3261 * from being deleted, while this mbuf is inflight. The
3262 * cross_refs of the sibling rule on the target cpu will
3263 * be decremented, once this mbuf is going to be filtered
3264 * on the target cpu.
3266 x
->xlat_rule
->cross_refs
++;
3268 * Bump xlat_crefs to prevent this state and its paired
3269 * state from being deleted, while this mbuf is inflight.
3270 * The xlat_crefs of the paired state on the target cpu
3271 * will be decremented, once this mbuf is going to be
3272 * filtered on the target cpu.
3276 m
->m_pkthdr
.fw_flags
|= IPFW_MBUF_CONTINUE
;
3277 if (flags
& IPFW_XLATE_INSERT
)
3278 m
->m_pkthdr
.fw_flags
|= IPFW_MBUF_XLATINS
;
3279 if (flags
& IPFW_XLATE_FORWARD
)
3280 m
->m_pkthdr
.fw_flags
|= IPFW_MBUF_XLATFWD
;
3282 if ((flags
& IPFW_XLATE_OUTPUT
) == 0) {
3283 struct ip
*ip
= mtod(m
, struct ip
*);
3287 * ip_input() expects ip_len/ip_off are in network
3290 ip
->ip_len
= htons(ip
->ip_len
);
3291 ip
->ip_off
= htons(ip
->ip_off
);
3294 nm
= &m
->m_hdr
.mh_genmsg
;
3295 netmsg_init(&nm
->base
, NULL
, &netisr_apanic_rport
, 0,
3296 ipfw_ip_xlate_dispatch
);
3298 nm
->arg1
= x
->xlat_pair
;
3300 if (flags
& IPFW_XLATE_OUTPUT
)
3302 netisr_sendmsg(&nm
->base
, cpuid
);
3305 static struct mbuf
*
3306 ipfw_setup_local(struct mbuf
*m
, const int hlen
, struct ip_fw_args
*args
,
3307 struct ip_fw_local
*local
, struct ip
**ip0
)
3309 struct ip
*ip
= mtod(m
, struct ip
*);
3314 * Collect parameters into local variables for faster matching.
3316 if (hlen
== 0) { /* do not grab addresses for non-ip pkts */
3317 local
->proto
= args
->f_id
.proto
= 0; /* mark f_id invalid */
3321 local
->proto
= args
->f_id
.proto
= ip
->ip_p
;
3322 local
->src_ip
= ip
->ip_src
;
3323 local
->dst_ip
= ip
->ip_dst
;
3324 if (args
->eh
!= NULL
) { /* layer 2 packets are as on the wire */
3325 local
->offset
= ntohs(ip
->ip_off
) & IP_OFFMASK
;
3326 local
->ip_len
= ntohs(ip
->ip_len
);
3328 local
->offset
= ip
->ip_off
& IP_OFFMASK
;
3329 local
->ip_len
= ip
->ip_len
;
3332 #define PULLUP_TO(len) \
3334 if (m->m_len < (len)) { \
3335 args->m = m = m_pullup(m, (len)); \
3340 ip = mtod(m, struct ip *); \
3344 if (local
->offset
== 0) {
3345 switch (local
->proto
) {
3347 PULLUP_TO(hlen
+ sizeof(struct tcphdr
));
3348 local
->tcp
= tcp
= L3HDR(struct tcphdr
, ip
);
3349 local
->dst_port
= tcp
->th_dport
;
3350 local
->src_port
= tcp
->th_sport
;
3351 args
->f_id
.flags
= tcp
->th_flags
;
3355 PULLUP_TO(hlen
+ sizeof(struct udphdr
));
3356 udp
= L3HDR(struct udphdr
, ip
);
3357 local
->dst_port
= udp
->uh_dport
;
3358 local
->src_port
= udp
->uh_sport
;
3362 PULLUP_TO(hlen
+ 4); /* type, code and checksum. */
3363 args
->f_id
.flags
= L3HDR(struct icmp
, ip
)->icmp_type
;
3373 args
->f_id
.src_ip
= ntohl(local
->src_ip
.s_addr
);
3374 args
->f_id
.dst_ip
= ntohl(local
->dst_ip
.s_addr
);
3375 args
->f_id
.src_port
= local
->src_port
= ntohs(local
->src_port
);
3376 args
->f_id
.dst_port
= local
->dst_port
= ntohs(local
->dst_port
);
3382 static struct mbuf
*
3383 ipfw_rehashm(struct mbuf
*m
, const int hlen
, struct ip_fw_args
*args
,
3384 struct ip_fw_local
*local
, struct ip
**ip0
)
3386 struct ip
*ip
= mtod(m
, struct ip
*);
3388 ip
->ip_len
= htons(ip
->ip_len
);
3389 ip
->ip_off
= htons(ip
->ip_off
);
3391 m
->m_flags
&= ~M_HASH
;
3398 KASSERT(m
->m_flags
& M_HASH
, ("no hash"));
3400 /* 'm' might be changed by ip_hashfn(). */
3401 ip
= mtod(m
, struct ip
*);
3402 ip
->ip_len
= ntohs(ip
->ip_len
);
3403 ip
->ip_off
= ntohs(ip
->ip_off
);
3405 return (ipfw_setup_local(m
, hlen
, args
, local
, ip0
));
3409 * The main check routine for the firewall.
3411 * All arguments are in args so we can modify them and return them
3412 * back to the caller.
3416 * args->m (in/out) The packet; we set to NULL when/if we nuke it.
3417 * Starts with the IP header.
3418 * args->eh (in) Mac header if present, or NULL for layer3 packet.
3419 * args->oif Outgoing interface, or NULL if packet is incoming.
3420 * The incoming interface is in the mbuf. (in)
3422 * args->rule Pointer to the last matching rule (in/out)
3423 * args->f_id Addresses grabbed from the packet (out)
3427 * If the packet was denied/rejected and has been dropped, *m is equal
3428 * to NULL upon return.
3430 * IP_FW_DENY the packet must be dropped.
3431 * IP_FW_PASS The packet is to be accepted and routed normally.
3432 * IP_FW_DIVERT Divert the packet to port (args->cookie)
3433 * IP_FW_TEE Tee the packet to port (args->cookie)
3434 * IP_FW_DUMMYNET Send the packet to pipe/queue (args->cookie)
3435 * IP_FW_CONTINUE Continue processing on another cpu.
3438 ipfw_chk(struct ip_fw_args
*args
)
3441 * Local variables hold state during the processing of a packet.
3443 * IMPORTANT NOTE: to speed up the processing of rules, there
3444 * are some assumption on the values of the variables, which
3445 * are documented here. Should you change them, please check
3446 * the implementation of the various instructions to make sure
3447 * that they still work.
3449 * args->eh The MAC header. It is non-null for a layer2
3450 * packet, it is NULL for a layer-3 packet.
3452 * m | args->m Pointer to the mbuf, as received from the caller.
3453 * It may change if ipfw_chk() does an m_pullup, or if it
3454 * consumes the packet because it calls send_reject().
3455 * XXX This has to change, so that ipfw_chk() never modifies
3456 * or consumes the buffer.
3457 * ip is simply an alias of the value of m, and it is kept
3458 * in sync with it (the packet is supposed to start with
3461 struct mbuf
*m
= args
->m
;
3462 struct ip
*ip
= mtod(m
, struct ip
*);
3465 * oif | args->oif If NULL, ipfw_chk has been called on the
3466 * inbound path (ether_input, ip_input).
3467 * If non-NULL, ipfw_chk has been called on the outbound path
3468 * (ether_output, ip_output).
3470 struct ifnet
*oif
= args
->oif
;
3472 struct ip_fw
*f
= NULL
; /* matching rule */
3473 int retval
= IP_FW_PASS
;
3475 struct divert_info
*divinfo
;
3476 struct ipfw_state
*s
;
3479 * hlen The length of the IPv4 header.
3480 * hlen >0 means we have an IPv4 packet.
3482 u_int hlen
= 0; /* hlen >0 means we have an IP pkt */
3484 struct ip_fw_local lc
;
3487 * dyn_dir = MATCH_UNKNOWN when rules unchecked,
3488 * MATCH_NONE when checked and not matched (dyn_f = NULL),
3489 * MATCH_FORWARD or MATCH_REVERSE otherwise (dyn_f != NULL)
3491 int dyn_dir
= MATCH_UNKNOWN
;
3492 struct ip_fw
*dyn_f
= NULL
;
3493 int cpuid
= mycpuid
;
3494 struct ipfw_context
*ctx
;
3496 ASSERT_NETISR_NCPUS(cpuid
);
3497 ctx
= ipfw_ctx
[cpuid
];
3499 if (m
->m_pkthdr
.fw_flags
& IPFW_MBUF_GENERATED
)
3500 return IP_FW_PASS
; /* accept */
3502 if (args
->eh
== NULL
|| /* layer 3 packet */
3503 (m
->m_pkthdr
.len
>= sizeof(struct ip
) &&
3504 ntohs(args
->eh
->ether_type
) == ETHERTYPE_IP
))
3505 hlen
= ip
->ip_hl
<< 2;
3507 memset(&lc
, 0, sizeof(lc
));
3509 m
= ipfw_setup_local(m
, hlen
, args
, &lc
, &ip
);
3515 * Packet has already been tagged. Look for the next rule
3516 * to restart processing.
3518 * If fw_one_pass != 0 then just accept it.
3519 * XXX should not happen here, but optimized out in
3522 if (fw_one_pass
&& (args
->flags
& IP_FWARG_F_CONT
) == 0)
3524 args
->flags
&= ~IP_FWARG_F_CONT
;
3526 /* This rule is being/has been flushed */
3530 KASSERT(args
->rule
->cpuid
== cpuid
,
3531 ("rule used on cpu%d", cpuid
));
3533 /* This rule was deleted */
3534 if (args
->rule
->rule_flags
& IPFW_RULE_F_INVALID
)
3537 if (args
->xlat
!= NULL
) {
3538 struct ipfw_xlat
*x
= args
->xlat
;
3540 /* This xlat is being deleted. */
3541 if (x
->xlat_invalid
)
3547 dyn_dir
= (args
->flags
& IP_FWARG_F_XLATFWD
) ?
3548 MATCH_FORWARD
: MATCH_REVERSE
;
3550 if (args
->flags
& IP_FWARG_F_XLATINS
) {
3551 KASSERT(x
->xlat_flags
& IPFW_STATE_F_XLATSLAVE
,
3552 ("not slave %u state", x
->xlat_type
));
3553 s
= ipfw_state_link(ctx
, &x
->xlat_st
);
3555 ctx
->ipfw_xlate_conflicts
++;
3556 if (IPFW_STATE_ISDEAD(s
)) {
3557 ipfw_state_remove(ctx
, s
);
3558 s
= ipfw_state_link(ctx
,
3565 "conflicts %u state\n",
3569 ipfw_xlat_invalidate(x
);
3572 ctx
->ipfw_xlate_cresolved
++;
3575 ipfw_state_update(&args
->f_id
, dyn_dir
,
3576 lc
.tcp
, &x
->xlat_st
);
3579 /* TODO: setup dyn_f, dyn_dir */
3581 f
= args
->rule
->next_rule
;
3583 f
= lookup_next_rule(args
->rule
);
3587 * Find the starting rule. It can be either the first
3588 * one, or the one after divert_rule if asked so.
3592 KKASSERT((args
->flags
&
3593 (IP_FWARG_F_XLATINS
| IP_FWARG_F_CONT
)) == 0);
3594 KKASSERT(args
->xlat
== NULL
);
3596 mtag
= m_tag_find(m
, PACKET_TAG_IPFW_DIVERT
, NULL
);
3598 divinfo
= m_tag_data(mtag
);
3599 skipto
= divinfo
->skipto
;
3604 f
= ctx
->ipfw_layer3_chain
;
3605 if (args
->eh
== NULL
&& skipto
!= 0) {
3606 /* No skipto during rule flushing */
3610 if (skipto
>= IPFW_DEFAULT_RULE
)
3611 return IP_FW_DENY
; /* invalid */
3613 while (f
&& f
->rulenum
<= skipto
)
3615 if (f
== NULL
) /* drop packet */
3617 } else if (ipfw_flushing
) {
3618 /* Rules are being flushed; skip to default rule */
3619 f
= ctx
->ipfw_default_rule
;
3622 if ((mtag
= m_tag_find(m
, PACKET_TAG_IPFW_DIVERT
, NULL
)) != NULL
)
3623 m_tag_delete(m
, mtag
);
3626 * Now scan the rules, and parse microinstructions for each rule.
3628 for (; f
; f
= f
->next
) {
3631 int skip_or
; /* skip rest of OR block */
3634 if (ctx
->ipfw_set_disable
& (1 << f
->set
)) {
3639 if (args
->xlat
!= NULL
) {
3641 l
= f
->cmd_len
- f
->act_ofs
;
3642 cmd
= ACTION_PTR(f
);
3649 for (; l
> 0; l
-= cmdlen
, cmd
+= cmdlen
) {
3653 * check_body is a jump target used when we find a
3654 * CHECK_STATE, and need to jump to the body of
3658 cmdlen
= F_LEN(cmd
);
3660 * An OR block (insn_1 || .. || insn_n) has the
3661 * F_OR bit set in all but the last instruction.
3662 * The first match will set "skip_or", and cause
3663 * the following instructions to be skipped until
3664 * past the one with the F_OR bit clear.
3666 if (skip_or
) { /* skip this instruction */
3667 if ((cmd
->len
& F_OR
) == 0)
3668 skip_or
= 0; /* next one is good */
3671 match
= 0; /* set to 1 if we succeed */
3673 switch (cmd
->opcode
) {
3675 * The first set of opcodes compares the packet's
3676 * fields with some pattern, setting 'match' if a
3677 * match is found. At the end of the loop there is
3678 * logic to deal with F_NOT and F_OR flags associated
3686 kprintf("ipfw: opcode %d unimplemented\n",
3693 * We only check offset == 0 && proto != 0,
3694 * as this ensures that we have an IPv4
3695 * packet with the ports info.
3700 match
= ipfw_match_uid(&args
->f_id
, oif
,
3702 (uid_t
)((ipfw_insn_u32
*)cmd
)->d
[0]);
3706 match
= iface_match(m
->m_pkthdr
.rcvif
,
3707 (ipfw_insn_if
*)cmd
);
3711 match
= iface_match(oif
, (ipfw_insn_if
*)cmd
);
3715 match
= iface_match(oif
? oif
:
3716 m
->m_pkthdr
.rcvif
, (ipfw_insn_if
*)cmd
);
3720 if (args
->eh
!= NULL
) { /* have MAC header */
3721 uint32_t *want
= (uint32_t *)
3722 ((ipfw_insn_mac
*)cmd
)->addr
;
3723 uint32_t *mask
= (uint32_t *)
3724 ((ipfw_insn_mac
*)cmd
)->mask
;
3725 uint32_t *hdr
= (uint32_t *)args
->eh
;
3728 (want
[0] == (hdr
[0] & mask
[0]) &&
3729 want
[1] == (hdr
[1] & mask
[1]) &&
3730 want
[2] == (hdr
[2] & mask
[2]));
3735 if (args
->eh
!= NULL
) {
3737 ntohs(args
->eh
->ether_type
);
3739 ((ipfw_insn_u16
*)cmd
)->ports
;
3742 /* Special vlan handling */
3743 if (m
->m_flags
& M_VLANTAG
)
3746 for (i
= cmdlen
- 1; !match
&& i
> 0;
3749 (t
>= p
[0] && t
<= p
[1]);
3755 match
= (hlen
> 0 && lc
.offset
!= 0);
3762 if (args
->eh
!= NULL
)
3763 off
= ntohs(ip
->ip_off
);
3766 if (off
& (IP_MF
| IP_OFFMASK
))
3771 case O_IN
: /* "out" is "not in" */
3772 match
= (oif
== NULL
);
3776 match
= (args
->eh
!= NULL
);
3781 * We do not allow an arg of 0 so the
3782 * check of "proto" only suffices.
3784 match
= (lc
.proto
== cmd
->arg1
);
3788 match
= (hlen
> 0 &&
3789 ((ipfw_insn_ip
*)cmd
)->addr
.s_addr
==
3794 match
= (hlen
> 0 &&
3795 ((ipfw_insn_ip
*)cmd
)->addr
.s_addr
==
3797 ((ipfw_insn_ip
*)cmd
)->mask
.s_addr
));
3804 tif
= INADDR_TO_IFP(&lc
.src_ip
);
3805 match
= (tif
!= NULL
);
3809 case O_IP_SRC_TABLE
:
3810 match
= ipfw_table_lookup(ctx
, cmd
->arg1
,
3815 match
= ipfw_match_ifip((ipfw_insn_ifip
*)cmd
,
3822 uint32_t *d
= (uint32_t *)(cmd
+ 1);
3824 cmd
->opcode
== O_IP_DST_SET
?
3830 addr
-= d
[0]; /* subtract base */
3832 (addr
< cmd
->arg1
) &&
3833 (d
[1 + (addr
>> 5)] &
3834 (1 << (addr
& 0x1f)));
3839 match
= (hlen
> 0 &&
3840 ((ipfw_insn_ip
*)cmd
)->addr
.s_addr
==
3845 match
= (hlen
> 0) &&
3846 (((ipfw_insn_ip
*)cmd
)->addr
.s_addr
==
3848 ((ipfw_insn_ip
*)cmd
)->mask
.s_addr
));
3855 tif
= INADDR_TO_IFP(&lc
.dst_ip
);
3856 match
= (tif
!= NULL
);
3860 case O_IP_DST_TABLE
:
3861 match
= ipfw_table_lookup(ctx
, cmd
->arg1
,
3866 match
= ipfw_match_ifip((ipfw_insn_ifip
*)cmd
,
3873 * offset == 0 && proto != 0 is enough
3874 * to guarantee that we have an IPv4
3875 * packet with port info.
3877 if ((lc
.proto
==IPPROTO_UDP
||
3878 lc
.proto
==IPPROTO_TCP
)
3879 && lc
.offset
== 0) {
3881 (cmd
->opcode
== O_IP_SRCPORT
) ?
3882 lc
.src_port
: lc
.dst_port
;
3884 ((ipfw_insn_u16
*)cmd
)->ports
;
3887 for (i
= cmdlen
- 1; !match
&& i
> 0;
3890 (x
>= p
[0] && x
<= p
[1]);
3896 match
= (lc
.offset
== 0 &&
3897 lc
.proto
==IPPROTO_ICMP
&&
3898 icmpcode_match(ip
, (ipfw_insn_u32
*)cmd
));
3902 match
= (lc
.offset
== 0 &&
3903 lc
.proto
==IPPROTO_ICMP
&&
3904 icmptype_match(ip
, (ipfw_insn_u32
*)cmd
));
3908 match
= (hlen
> 0 && ipopts_match(ip
, cmd
));
3912 match
= (hlen
> 0 && cmd
->arg1
== ip
->ip_v
);
3916 match
= (hlen
> 0 && cmd
->arg1
== ip
->ip_ttl
);
3920 match
= (hlen
> 0 &&
3921 cmd
->arg1
== ntohs(ip
->ip_id
));
3925 match
= (hlen
> 0 && cmd
->arg1
== lc
.ip_len
);
3928 case O_IPPRECEDENCE
:
3929 match
= (hlen
> 0 &&
3930 (cmd
->arg1
== (ip
->ip_tos
& 0xe0)));
3934 match
= (hlen
> 0 &&
3935 flags_match(cmd
, ip
->ip_tos
));
3939 match
= (lc
.proto
== IPPROTO_TCP
&&
3942 L3HDR(struct tcphdr
,ip
)->th_flags
));
3946 match
= (lc
.proto
== IPPROTO_TCP
&&
3947 lc
.offset
== 0 && tcpopts_match(ip
, cmd
));
3951 match
= (lc
.proto
== IPPROTO_TCP
&&
3953 ((ipfw_insn_u32
*)cmd
)->d
[0] ==
3954 L3HDR(struct tcphdr
,ip
)->th_seq
);
3958 match
= (lc
.proto
== IPPROTO_TCP
&&
3960 ((ipfw_insn_u32
*)cmd
)->d
[0] ==
3961 L3HDR(struct tcphdr
,ip
)->th_ack
);
3965 match
= (lc
.proto
== IPPROTO_TCP
&&
3968 L3HDR(struct tcphdr
,ip
)->th_win
);
3972 /* reject packets which have SYN only */
3973 /* XXX should i also check for TH_ACK ? */
3974 match
= (lc
.proto
== IPPROTO_TCP
&&
3976 (L3HDR(struct tcphdr
,ip
)->th_flags
&
3977 (TH_RST
| TH_ACK
| TH_SYN
)) != TH_SYN
);
3982 ipfw_log(ctx
, f
, hlen
, args
->eh
, m
,
3989 match
= (krandom() <
3990 ((ipfw_insn_u32
*)cmd
)->d
[0]);
3994 * The second set of opcodes represents 'actions',
3995 * i.e. the terminal part of a rule once the packet
3996 * matches all previous patterns.
3997 * Typically there is only one action for each rule,
3998 * and the opcode is stored at the end of the rule
3999 * (but there are exceptions -- see below).
4001 * In general, here we set retval and terminate the
4002 * outer loop (would be a 'break 3' in some language,
4003 * but we need to do a 'goto done').
4006 * O_COUNT and O_SKIPTO actions:
4007 * instead of terminating, we jump to the next rule
4008 * ('goto next_rule', equivalent to a 'break 2'),
4009 * or to the SKIPTO target ('goto again' after
4010 * having set f, cmd and l), respectively.
4012 * O_LIMIT and O_KEEP_STATE, O_REDIRECT: these opcodes
4013 * are not real 'actions', and are stored right
4014 * before the 'action' part of the rule.
4015 * These opcodes try to install an entry in the
4016 * state tables; if successful, we continue with
4017 * the next opcode (match=1; break;), otherwise
4018 * the packet must be dropped ('goto done' after
4019 * setting retval). If static rules are changed
4020 * during the state installation, the packet will
4021 * be dropped and rule's stats will not beupdated
4022 * ('return IP_FW_DENY').
4024 * O_PROBE_STATE and O_CHECK_STATE: these opcodes
4025 * cause a lookup of the state table, and a jump
4026 * to the 'action' part of the parent rule
4027 * ('goto check_body') if an entry is found, or
4028 * (CHECK_STATE only) a jump to the next rule if
4029 * the entry is not found ('goto next_rule').
4030 * The result of the lookup is cached to make
4031 * further instances of these opcodes are
4032 * effectively NOPs. If static rules are changed
4033 * during the state looking up, the packet will
4034 * be dropped and rule's stats will not be updated
4035 * ('return IP_FW_DENY').
4038 if (f
->cross_rules
== NULL
) {
4040 * This rule was not completely setup;
4041 * move on to the next rule.
4046 * Apply redirect only on input path and
4047 * only to non-fragment TCP segments or
4050 * Does _not_ work with layer2 filtering.
4052 if (oif
!= NULL
|| args
->eh
!= NULL
||
4053 (ip
->ip_off
& (IP_MF
| IP_OFFMASK
)) ||
4054 (lc
.proto
!= IPPROTO_TCP
&&
4055 lc
.proto
!= IPPROTO_UDP
))
4062 s
= ipfw_state_install(ctx
, f
,
4063 (ipfw_insn_limit
*)cmd
, args
, lc
.tcp
);
4065 retval
= IP_FW_DENY
;
4066 goto done
; /* error/limit violation */
4069 s
->st_bcnt
+= lc
.ip_len
;
4071 if (s
->st_type
== O_REDIRECT
) {
4072 struct in_addr oaddr
;
4074 struct ipfw_xlat
*slave_x
, *x
;
4075 struct ipfw_state
*dup
;
4077 x
= (struct ipfw_xlat
*)s
;
4078 ipfw_xlate(x
, m
, &oaddr
, &oport
);
4079 m
= ipfw_rehashm(m
, hlen
, args
, &lc
,
4082 ipfw_state_del(ctx
, s
);
4086 cpuid
= netisr_hashcpu(
4089 slave_x
= (struct ipfw_xlat
*)
4090 ipfw_state_alloc(ctx
, &args
->f_id
,
4091 O_REDIRECT
, f
->cross_rules
[cpuid
],
4093 if (slave_x
== NULL
) {
4094 ipfw_state_del(ctx
, s
);
4095 retval
= IP_FW_DENY
;
4098 slave_x
->xlat_addr
= oaddr
.s_addr
;
4099 slave_x
->xlat_port
= oport
;
4100 slave_x
->xlat_dir
= MATCH_REVERSE
;
4101 slave_x
->xlat_flags
|=
4102 IPFW_STATE_F_XLATSRC
|
4103 IPFW_STATE_F_XLATSLAVE
;
4105 slave_x
->xlat_pair
= x
;
4106 slave_x
->xlat_pcpu
= mycpuid
;
4107 x
->xlat_pair
= slave_x
;
4108 x
->xlat_pcpu
= cpuid
;
4111 if (cpuid
!= mycpuid
) {
4112 ctx
->ipfw_xlate_split
++;
4113 ipfw_xlate_redispatch(
4116 IPFW_XLATE_FORWARD
);
4118 return (IP_FW_REDISPATCH
);
4121 dup
= ipfw_state_link(ctx
,
4124 ctx
->ipfw_xlate_conflicts
++;
4125 if (IPFW_STATE_ISDEAD(dup
)) {
4126 ipfw_state_remove(ctx
,
4128 dup
= ipfw_state_link(
4129 ctx
, &slave_x
->xlat_st
);
4140 ipfw_state_del(ctx
, s
);
4141 return (IP_FW_DENY
);
4143 ctx
->ipfw_xlate_cresolved
++;
4152 * States are checked at the first keep-state
4153 * check-state occurrence, with the result
4154 * being stored in dyn_dir. The compiler
4155 * introduces a PROBE_STATE instruction for
4156 * us when we have a KEEP_STATE/LIMIT/RDR
4157 * (because PROBE_STATE needs to be run first).
4160 if (dyn_dir
== MATCH_UNKNOWN
) {
4161 s
= ipfw_state_lookup(ctx
,
4162 &args
->f_id
, &dyn_dir
, lc
.tcp
);
4165 (s
->st_type
== O_REDIRECT
&&
4166 (args
->eh
!= NULL
||
4167 (ip
->ip_off
& (IP_MF
| IP_OFFMASK
)) ||
4168 (lc
.proto
!= IPPROTO_TCP
&&
4169 lc
.proto
!= IPPROTO_UDP
)))) {
4171 * State not found. If CHECK_STATE,
4172 * skip to next rule, if PROBE_STATE
4173 * just ignore and continue with next
4176 if (cmd
->opcode
== O_CHECK_STATE
)
4183 s
->st_bcnt
+= lc
.ip_len
;
4185 if (s
->st_type
== O_REDIRECT
) {
4186 struct ipfw_xlat
*x
=
4187 (struct ipfw_xlat
*)s
;
4190 x
->xlat_ifp
== NULL
) {
4191 KASSERT(x
->xlat_flags
&
4192 IPFW_STATE_F_XLATSLAVE
,
4193 ("master rdr state "
4197 (oif
!= NULL
&& x
->xlat_ifp
!=oif
) ||
4199 x
->xlat_ifp
!=m
->m_pkthdr
.rcvif
)) {
4200 retval
= IP_FW_DENY
;
4203 if (x
->xlat_dir
!= dyn_dir
)
4206 ipfw_xlate(x
, m
, NULL
, NULL
);
4207 m
= ipfw_rehashm(m
, hlen
, args
, &lc
,
4212 cpuid
= netisr_hashcpu(
4214 if (cpuid
!= mycpuid
) {
4221 if (dyn_dir
== MATCH_FORWARD
) {
4225 ipfw_xlate_redispatch(m
, cpuid
,
4228 return (IP_FW_REDISPATCH
);
4231 KKASSERT(x
->xlat_pcpu
== mycpuid
);
4232 ipfw_state_update(&args
->f_id
, dyn_dir
,
4233 lc
.tcp
, &x
->xlat_pair
->xlat_st
);
4237 * Found a rule from a state; jump to the
4238 * 'action' part of the rule.
4241 KKASSERT(f
->cpuid
== mycpuid
);
4243 cmd
= ACTION_PTR(f
);
4244 l
= f
->cmd_len
- f
->act_ofs
;
4249 retval
= IP_FW_PASS
; /* accept */
4253 if (f
->cross_rules
== NULL
) {
4255 * This rule was not completely setup;
4256 * move on to the next rule.
4262 * Don't defrag for l2 packets, output packets
4265 if (oif
!= NULL
|| args
->eh
!= NULL
||
4266 (ip
->ip_off
& (IP_MF
| IP_OFFMASK
)) == 0)
4273 retval
= IP_FW_PASS
;
4276 ctx
->ipfw_defraged
++;
4277 KASSERT((m
->m_flags
& M_HASH
) == 0,
4278 ("hash not cleared"));
4280 /* Update statistics */
4282 f
->bcnt
+= lc
.ip_len
;
4283 f
->timestamp
= time_second
;
4285 ip
= mtod(m
, struct ip
*);
4286 hlen
= ip
->ip_hl
<< 2;
4289 ip
->ip_len
= htons(ip
->ip_len
);
4290 ip
->ip_off
= htons(ip
->ip_off
);
4297 KASSERT(m
->m_flags
& M_HASH
, ("no hash"));
4298 cpuid
= netisr_hashcpu(m
->m_pkthdr
.hash
);
4299 if (cpuid
!= mycpuid
) {
4302 * ip_len/ip_off are in network byte
4305 ctx
->ipfw_defrag_remote
++;
4306 ipfw_defrag_redispatch(m
, cpuid
, f
);
4308 return (IP_FW_REDISPATCH
);
4311 /* 'm' might be changed by ip_hashfn(). */
4312 ip
= mtod(m
, struct ip
*);
4313 ip
->ip_len
= ntohs(ip
->ip_len
);
4314 ip
->ip_off
= ntohs(ip
->ip_off
);
4316 m
= ipfw_setup_local(m
, hlen
, args
, &lc
, &ip
);
4325 args
->rule
= f
; /* report matching rule */
4326 args
->cookie
= cmd
->arg1
;
4327 retval
= IP_FW_DUMMYNET
;
4332 if (args
->eh
) /* not on layer 2 */
4335 mtag
= m_tag_get(PACKET_TAG_IPFW_DIVERT
,
4336 sizeof(*divinfo
), M_INTWAIT
| M_NULLOK
);
4338 retval
= IP_FW_DENY
;
4341 divinfo
= m_tag_data(mtag
);
4343 divinfo
->skipto
= f
->rulenum
;
4344 divinfo
->port
= cmd
->arg1
;
4345 divinfo
->tee
= (cmd
->opcode
== O_TEE
);
4346 m_tag_prepend(m
, mtag
);
4348 args
->cookie
= cmd
->arg1
;
4349 retval
= (cmd
->opcode
== O_DIVERT
) ?
4350 IP_FW_DIVERT
: IP_FW_TEE
;
4355 f
->pcnt
++; /* update stats */
4356 f
->bcnt
+= lc
.ip_len
;
4357 f
->timestamp
= time_second
;
4358 if (cmd
->opcode
== O_COUNT
)
4361 if (f
->next_rule
== NULL
)
4362 lookup_next_rule(f
);
4368 * Drop the packet and send a reject notice
4369 * if the packet is not ICMP (or is an ICMP
4370 * query), and it is not multicast/broadcast.
4373 (lc
.proto
!= IPPROTO_ICMP
||
4374 is_icmp_query(ip
)) &&
4375 !(m
->m_flags
& (M_BCAST
|M_MCAST
)) &&
4376 !IN_MULTICAST(ntohl(lc
.dst_ip
.s_addr
))) {
4377 send_reject(args
, cmd
->arg1
,
4378 lc
.offset
, lc
.ip_len
);
4379 retval
= IP_FW_DENY
;
4384 retval
= IP_FW_DENY
;
4388 if (args
->eh
) /* not valid on layer2 pkts */
4390 if (!dyn_f
|| dyn_dir
== MATCH_FORWARD
) {
4391 struct sockaddr_in
*sin
;
4393 mtag
= m_tag_get(PACKET_TAG_IPFORWARD
,
4394 sizeof(*sin
), M_INTWAIT
| M_NULLOK
);
4396 retval
= IP_FW_DENY
;
4399 sin
= m_tag_data(mtag
);
4401 /* Structure copy */
4402 *sin
= ((ipfw_insn_sa
*)cmd
)->sa
;
4404 m_tag_prepend(m
, mtag
);
4405 m
->m_pkthdr
.fw_flags
|=
4406 IPFORWARD_MBUF_TAGGED
;
4407 m
->m_pkthdr
.fw_flags
&=
4408 ~BRIDGE_MBUF_TAGGED
;
4410 retval
= IP_FW_PASS
;
4414 panic("-- unknown opcode %d", cmd
->opcode
);
4415 } /* end of switch() on opcodes */
4417 if (cmd
->len
& F_NOT
)
4421 if (cmd
->len
& F_OR
)
4424 if (!(cmd
->len
& F_OR
)) /* not an OR block, */
4425 break; /* try next rule */
4428 } /* end of inner for, scan opcodes */
4430 next_rule
:; /* try next rule */
4432 } /* end of outer for, scan rules */
4433 kprintf("+++ ipfw: ouch!, skip past end of rules, denying packet\n");
4437 /* Update statistics */
4439 f
->bcnt
+= lc
.ip_len
;
4440 f
->timestamp
= time_second
;
4445 kprintf("pullup failed\n");
4449 static struct mbuf
*
4450 ipfw_dummynet_io(struct mbuf
*m
, int pipe_nr
, int dir
, struct ip_fw_args
*fwa
)
4455 const struct ipfw_flow_id
*id
;
4456 struct dn_flow_id
*fid
;
4460 mtag
= m_tag_get(PACKET_TAG_DUMMYNET
, sizeof(*pkt
),
4461 M_INTWAIT
| M_NULLOK
);
4466 m_tag_prepend(m
, mtag
);
4468 pkt
= m_tag_data(mtag
);
4469 bzero(pkt
, sizeof(*pkt
));
4471 cmd
= fwa
->rule
->cmd
+ fwa
->rule
->act_ofs
;
4472 if (cmd
->opcode
== O_LOG
)
4474 KASSERT(cmd
->opcode
== O_PIPE
|| cmd
->opcode
== O_QUEUE
,
4475 ("Rule is not PIPE or QUEUE, opcode %d", cmd
->opcode
));
4478 pkt
->dn_flags
= (dir
& DN_FLAGS_DIR_MASK
);
4479 pkt
->ifp
= fwa
->oif
;
4480 pkt
->pipe_nr
= pipe_nr
;
4482 pkt
->cpuid
= mycpuid
;
4483 pkt
->msgport
= netisr_curport();
4487 fid
->fid_dst_ip
= id
->dst_ip
;
4488 fid
->fid_src_ip
= id
->src_ip
;
4489 fid
->fid_dst_port
= id
->dst_port
;
4490 fid
->fid_src_port
= id
->src_port
;
4491 fid
->fid_proto
= id
->proto
;
4492 fid
->fid_flags
= id
->flags
;
4494 ipfw_ref_rule(fwa
->rule
);
4495 pkt
->dn_priv
= fwa
->rule
;
4496 pkt
->dn_unref_priv
= ipfw_unref_rule
;
4498 if (cmd
->opcode
== O_PIPE
)
4499 pkt
->dn_flags
|= DN_FLAGS_IS_PIPE
;
4501 m
->m_pkthdr
.fw_flags
|= DUMMYNET_MBUF_TAGGED
;
4506 * When a rule is added/deleted, clear the next_rule pointers in all rules.
4507 * These will be reconstructed on the fly as packets are matched.
4510 ipfw_flush_rule_ptrs(struct ipfw_context
*ctx
)
4514 for (rule
= ctx
->ipfw_layer3_chain
; rule
; rule
= rule
->next
)
4515 rule
->next_rule
= NULL
;
4519 ipfw_inc_static_count(struct ip_fw
*rule
)
4521 /* Static rule's counts are updated only on CPU0 */
4522 KKASSERT(mycpuid
== 0);
4525 static_ioc_len
+= IOC_RULESIZE(rule
);
4529 ipfw_dec_static_count(struct ip_fw
*rule
)
4531 int l
= IOC_RULESIZE(rule
);
4533 /* Static rule's counts are updated only on CPU0 */
4534 KKASSERT(mycpuid
== 0);
4536 KASSERT(static_count
> 0, ("invalid static count %u", static_count
));
4539 KASSERT(static_ioc_len
>= l
,
4540 ("invalid static len %u", static_ioc_len
));
4541 static_ioc_len
-= l
;
4545 ipfw_link_sibling(struct netmsg_ipfw
*fwmsg
, struct ip_fw
*rule
)
4547 if (fwmsg
->sibling
!= NULL
) {
4548 KKASSERT(mycpuid
> 0 && fwmsg
->sibling
->cpuid
== mycpuid
- 1);
4549 fwmsg
->sibling
->sibling
= rule
;
4551 fwmsg
->sibling
= rule
;
4554 static struct ip_fw
*
4555 ipfw_create_rule(const struct ipfw_ioc_rule
*ioc_rule
, uint32_t rule_flags
)
4559 rule
= kmalloc(RULESIZE(ioc_rule
), M_IPFW
, M_WAITOK
| M_ZERO
);
4561 rule
->act_ofs
= ioc_rule
->act_ofs
;
4562 rule
->cmd_len
= ioc_rule
->cmd_len
;
4563 rule
->rulenum
= ioc_rule
->rulenum
;
4564 rule
->set
= ioc_rule
->set
;
4565 rule
->usr_flags
= ioc_rule
->usr_flags
;
4567 bcopy(ioc_rule
->cmd
, rule
->cmd
, rule
->cmd_len
* 4 /* XXX */);
4570 rule
->cpuid
= mycpuid
;
4571 rule
->rule_flags
= rule_flags
;
4577 ipfw_add_rule_dispatch(netmsg_t nmsg
)
4579 struct netmsg_ipfw
*fwmsg
= (struct netmsg_ipfw
*)nmsg
;
4580 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
4583 ASSERT_NETISR_NCPUS(mycpuid
);
4585 rule
= ipfw_create_rule(fwmsg
->ioc_rule
, fwmsg
->rule_flags
);
4588 * Insert rule into the pre-determined position
4590 if (fwmsg
->prev_rule
!= NULL
) {
4591 struct ip_fw
*prev
, *next
;
4593 prev
= fwmsg
->prev_rule
;
4594 KKASSERT(prev
->cpuid
== mycpuid
);
4596 next
= fwmsg
->next_rule
;
4597 KKASSERT(next
->cpuid
== mycpuid
);
4603 * Move to the position on the next CPU
4604 * before the msg is forwarded.
4606 fwmsg
->prev_rule
= prev
->sibling
;
4607 fwmsg
->next_rule
= next
->sibling
;
4609 KKASSERT(fwmsg
->next_rule
== NULL
);
4610 rule
->next
= ctx
->ipfw_layer3_chain
;
4611 ctx
->ipfw_layer3_chain
= rule
;
4614 /* Link rule CPU sibling */
4615 ipfw_link_sibling(fwmsg
, rule
);
4617 ipfw_flush_rule_ptrs(ctx
);
4620 /* Statistics only need to be updated once */
4621 ipfw_inc_static_count(rule
);
4623 /* Return the rule on CPU0 */
4624 nmsg
->lmsg
.u
.ms_resultp
= rule
;
4627 if (rule
->rule_flags
& IPFW_RULE_F_GENTRACK
)
4628 rule
->track_ruleid
= (uintptr_t)nmsg
->lmsg
.u
.ms_resultp
;
4630 if (fwmsg
->cross_rules
!= NULL
) {
4631 /* Save rules for later use. */
4632 fwmsg
->cross_rules
[mycpuid
] = rule
;
4635 netisr_forwardmsg(&nmsg
->base
, mycpuid
+ 1);
4639 ipfw_crossref_rule_dispatch(netmsg_t nmsg
)
4641 struct netmsg_ipfw
*fwmsg
= (struct netmsg_ipfw
*)nmsg
;
4642 struct ip_fw
*rule
= fwmsg
->sibling
;
4643 int sz
= sizeof(struct ip_fw
*) * netisr_ncpus
;
4645 ASSERT_NETISR_NCPUS(mycpuid
);
4646 KASSERT(rule
->rule_flags
& IPFW_RULE_F_CROSSREF
,
4647 ("not crossref rule"));
4649 rule
->cross_rules
= kmalloc(sz
, M_IPFW
, M_WAITOK
);
4650 memcpy(rule
->cross_rules
, fwmsg
->cross_rules
, sz
);
4652 fwmsg
->sibling
= rule
->sibling
;
4653 netisr_forwardmsg(&fwmsg
->base
, mycpuid
+ 1);
4657 * Add a new rule to the list. Copy the rule into a malloc'ed area,
4658 * then possibly create a rule number and add the rule to the list.
4659 * Update the rule_number in the input struct so the caller knows
4663 ipfw_add_rule(struct ipfw_ioc_rule
*ioc_rule
, uint32_t rule_flags
)
4665 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
4666 struct netmsg_ipfw fwmsg
;
4667 struct ip_fw
*f
, *prev
, *rule
;
4672 * If rulenum is 0, find highest numbered rule before the
4673 * default rule, and add rule number incremental step.
4675 if (ioc_rule
->rulenum
== 0) {
4676 int step
= autoinc_step
;
4678 KKASSERT(step
>= IPFW_AUTOINC_STEP_MIN
&&
4679 step
<= IPFW_AUTOINC_STEP_MAX
);
4682 * Locate the highest numbered rule before default
4684 for (f
= ctx
->ipfw_layer3_chain
; f
; f
= f
->next
) {
4685 if (f
->rulenum
== IPFW_DEFAULT_RULE
)
4687 ioc_rule
->rulenum
= f
->rulenum
;
4689 if (ioc_rule
->rulenum
< IPFW_DEFAULT_RULE
- step
)
4690 ioc_rule
->rulenum
+= step
;
4692 KASSERT(ioc_rule
->rulenum
!= IPFW_DEFAULT_RULE
&&
4693 ioc_rule
->rulenum
!= 0,
4694 ("invalid rule num %d", ioc_rule
->rulenum
));
4697 * Now find the right place for the new rule in the sorted list.
4699 for (prev
= NULL
, f
= ctx
->ipfw_layer3_chain
; f
;
4700 prev
= f
, f
= f
->next
) {
4701 if (f
->rulenum
> ioc_rule
->rulenum
) {
4702 /* Found the location */
4706 KASSERT(f
!= NULL
, ("no default rule?!"));
4709 * Duplicate the rule onto each CPU.
4710 * The rule duplicated on CPU0 will be returned.
4712 bzero(&fwmsg
, sizeof(fwmsg
));
4713 netmsg_init(&fwmsg
.base
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
4714 ipfw_add_rule_dispatch
);
4715 fwmsg
.ioc_rule
= ioc_rule
;
4716 fwmsg
.prev_rule
= prev
;
4717 fwmsg
.next_rule
= prev
== NULL
? NULL
: f
;
4718 fwmsg
.rule_flags
= rule_flags
;
4719 if (rule_flags
& IPFW_RULE_F_CROSSREF
) {
4720 fwmsg
.cross_rules
= kmalloc(
4721 sizeof(struct ip_fw
*) * netisr_ncpus
, M_TEMP
,
4725 netisr_domsg_global(&fwmsg
.base
);
4726 KKASSERT(fwmsg
.prev_rule
== NULL
&& fwmsg
.next_rule
== NULL
);
4728 rule
= fwmsg
.base
.lmsg
.u
.ms_resultp
;
4729 KKASSERT(rule
!= NULL
&& rule
->cpuid
== mycpuid
);
4731 if (fwmsg
.cross_rules
!= NULL
) {
4732 netmsg_init(&fwmsg
.base
, NULL
, &curthread
->td_msgport
,
4733 MSGF_PRIORITY
, ipfw_crossref_rule_dispatch
);
4734 fwmsg
.sibling
= rule
;
4735 netisr_domsg_global(&fwmsg
.base
);
4736 KKASSERT(fwmsg
.sibling
== NULL
);
4738 kfree(fwmsg
.cross_rules
, M_TEMP
);
4741 atomic_add_int(&ipfw_gd
.ipfw_refcnt
, 1);
4745 DPRINTF("++ installed rule %d, static count now %d\n",
4746 rule
->rulenum
, static_count
);
4750 * Free storage associated with a static rule (including derived
4752 * The caller is in charge of clearing rule pointers to avoid
4753 * dangling pointers.
4754 * @return a pointer to the next entry.
4755 * Arguments are not checked, so they better be correct.
4757 static struct ip_fw
*
4758 ipfw_delete_rule(struct ipfw_context
*ctx
,
4759 struct ip_fw
*prev
, struct ip_fw
*rule
)
4765 ctx
->ipfw_layer3_chain
= n
;
4769 /* Mark the rule as invalid */
4770 rule
->rule_flags
|= IPFW_RULE_F_INVALID
;
4771 rule
->next_rule
= NULL
;
4772 rule
->sibling
= NULL
;
4774 /* Don't reset cpuid here; keep various assertion working */
4778 /* Statistics only need to be updated once */
4780 ipfw_dec_static_count(rule
);
4782 if ((rule
->rule_flags
& IPFW_RULE_F_CROSSREF
) == 0) {
4783 /* Try to free this rule */
4784 ipfw_free_rule(rule
);
4786 /* TODO: check staging area. */
4788 rule
->next
= ipfw_gd
.ipfw_crossref_free
;
4789 ipfw_gd
.ipfw_crossref_free
= rule
;
4793 /* Return the next rule */
4798 ipfw_flush_dispatch(netmsg_t nmsg
)
4800 int kill_default
= nmsg
->lmsg
.u
.ms_result
;
4801 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
4804 ASSERT_NETISR_NCPUS(mycpuid
);
4809 ipfw_state_flush(ctx
, NULL
);
4810 KASSERT(ctx
->ipfw_state_cnt
== 0,
4811 ("%d pcpu states remain", ctx
->ipfw_state_cnt
));
4812 ctx
->ipfw_state_loosecnt
= 0;
4813 ctx
->ipfw_state_lastexp
= 0;
4818 ipfw_track_flush(ctx
, NULL
);
4819 ctx
->ipfw_track_lastexp
= 0;
4820 if (ctx
->ipfw_trkcnt_spare
!= NULL
) {
4821 kfree(ctx
->ipfw_trkcnt_spare
, M_IPFW
);
4822 ctx
->ipfw_trkcnt_spare
= NULL
;
4825 ipfw_flush_rule_ptrs(ctx
); /* more efficient to do outside the loop */
4827 while ((rule
= ctx
->ipfw_layer3_chain
) != NULL
&&
4828 (kill_default
|| rule
->rulenum
!= IPFW_DEFAULT_RULE
))
4829 ipfw_delete_rule(ctx
, NULL
, rule
);
4831 netisr_forwardmsg(&nmsg
->base
, mycpuid
+ 1);
4835 * Deletes all rules from a chain (including the default rule
4836 * if the second argument is set).
4839 ipfw_flush(int kill_default
)
4841 struct netmsg_base nmsg
;
4843 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
4850 * If 'kill_default' then caller has done the necessary
4851 * msgport syncing; unnecessary to do it again.
4853 if (!kill_default
) {
4855 * Let ipfw_chk() know the rules are going to
4856 * be flushed, so it could jump directly to
4860 /* XXX use priority sync */
4861 netmsg_service_sync();
4865 * Press the 'flush' button
4867 bzero(&nmsg
, sizeof(nmsg
));
4868 netmsg_init(&nmsg
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
4869 ipfw_flush_dispatch
);
4870 nmsg
.lmsg
.u
.ms_result
= kill_default
;
4871 netisr_domsg_global(&nmsg
);
4872 ipfw_gd
.ipfw_state_loosecnt
= 0;
4873 ipfw_gd
.ipfw_state_globexp
= 0;
4874 ipfw_gd
.ipfw_track_globexp
= 0;
4877 state_cnt
= ipfw_state_cntcoll();
4878 KASSERT(state_cnt
== 0, ("%d states remain", state_cnt
));
4880 KASSERT(ipfw_gd
.ipfw_trkcnt_cnt
== 0,
4881 ("%d trkcnts remain", ipfw_gd
.ipfw_trkcnt_cnt
));
4884 KASSERT(static_count
== 0,
4885 ("%u static rules remain", static_count
));
4886 KASSERT(static_ioc_len
== 0,
4887 ("%u bytes of static rules remain", static_ioc_len
));
4889 KASSERT(static_count
== 1,
4890 ("%u static rules remain", static_count
));
4891 KASSERT(static_ioc_len
== IOC_RULESIZE(ctx
->ipfw_default_rule
),
4892 ("%u bytes of static rules remain, should be %lu",
4894 (u_long
)IOC_RULESIZE(ctx
->ipfw_default_rule
)));
4903 ipfw_alt_delete_rule_dispatch(netmsg_t nmsg
)
4905 struct netmsg_del
*dmsg
= (struct netmsg_del
*)nmsg
;
4906 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
4907 struct ip_fw
*rule
, *prev
;
4909 ASSERT_NETISR_NCPUS(mycpuid
);
4911 rule
= dmsg
->start_rule
;
4912 KKASSERT(rule
->cpuid
== mycpuid
);
4913 dmsg
->start_rule
= rule
->sibling
;
4915 prev
= dmsg
->prev_rule
;
4917 KKASSERT(prev
->cpuid
== mycpuid
);
4920 * Move to the position on the next CPU
4921 * before the msg is forwarded.
4923 dmsg
->prev_rule
= prev
->sibling
;
4927 * flush pointers outside the loop, then delete all matching
4928 * rules. 'prev' remains the same throughout the cycle.
4930 ipfw_flush_rule_ptrs(ctx
);
4931 while (rule
&& rule
->rulenum
== dmsg
->rulenum
) {
4932 if (rule
->rule_flags
& IPFW_RULE_F_GENSTATE
) {
4933 /* Flush states generated by this rule. */
4934 ipfw_state_flush(ctx
, rule
);
4936 if (rule
->rule_flags
& IPFW_RULE_F_GENTRACK
) {
4937 /* Flush tracks generated by this rule. */
4938 ipfw_track_flush(ctx
, rule
);
4940 rule
= ipfw_delete_rule(ctx
, prev
, rule
);
4943 netisr_forwardmsg(&nmsg
->base
, mycpuid
+ 1);
4947 ipfw_alt_delete_rule(uint16_t rulenum
)
4949 struct ip_fw
*prev
, *rule
;
4950 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
4951 struct netmsg_del dmsg
;
4956 * Locate first rule to delete
4958 for (prev
= NULL
, rule
= ctx
->ipfw_layer3_chain
;
4959 rule
&& rule
->rulenum
< rulenum
;
4960 prev
= rule
, rule
= rule
->next
)
4962 if (rule
->rulenum
!= rulenum
)
4966 * Get rid of the rule duplications on all CPUs
4968 bzero(&dmsg
, sizeof(dmsg
));
4969 netmsg_init(&dmsg
.base
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
4970 ipfw_alt_delete_rule_dispatch
);
4971 dmsg
.prev_rule
= prev
;
4972 dmsg
.start_rule
= rule
;
4973 dmsg
.rulenum
= rulenum
;
4975 netisr_domsg_global(&dmsg
.base
);
4976 KKASSERT(dmsg
.prev_rule
== NULL
&& dmsg
.start_rule
== NULL
);
4981 ipfw_alt_delete_ruleset_dispatch(netmsg_t nmsg
)
4983 struct netmsg_del
*dmsg
= (struct netmsg_del
*)nmsg
;
4984 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
4985 struct ip_fw
*prev
, *rule
;
4990 ASSERT_NETISR_NCPUS(mycpuid
);
4992 ipfw_flush_rule_ptrs(ctx
);
4995 rule
= ctx
->ipfw_layer3_chain
;
4996 while (rule
!= NULL
) {
4997 if (rule
->set
== dmsg
->from_set
) {
4998 if (rule
->rule_flags
& IPFW_RULE_F_GENSTATE
) {
4999 /* Flush states generated by this rule. */
5000 ipfw_state_flush(ctx
, rule
);
5002 if (rule
->rule_flags
& IPFW_RULE_F_GENTRACK
) {
5003 /* Flush tracks generated by this rule. */
5004 ipfw_track_flush(ctx
, rule
);
5006 rule
= ipfw_delete_rule(ctx
, prev
, rule
);
5015 KASSERT(del
, ("no match set?!"));
5017 netisr_forwardmsg(&nmsg
->base
, mycpuid
+ 1);
5021 ipfw_alt_delete_ruleset(uint8_t set
)
5023 struct netmsg_del dmsg
;
5026 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
5031 * Check whether the 'set' exists. If it exists,
5032 * then check whether any rules within the set will
5033 * try to create states.
5036 for (rule
= ctx
->ipfw_layer3_chain
; rule
; rule
= rule
->next
) {
5037 if (rule
->set
== set
)
5041 return 0; /* XXX EINVAL? */
5046 bzero(&dmsg
, sizeof(dmsg
));
5047 netmsg_init(&dmsg
.base
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
5048 ipfw_alt_delete_ruleset_dispatch
);
5049 dmsg
.from_set
= set
;
5050 netisr_domsg_global(&dmsg
.base
);
5056 ipfw_alt_move_rule_dispatch(netmsg_t nmsg
)
5058 struct netmsg_del
*dmsg
= (struct netmsg_del
*)nmsg
;
5061 ASSERT_NETISR_NCPUS(mycpuid
);
5063 rule
= dmsg
->start_rule
;
5064 KKASSERT(rule
->cpuid
== mycpuid
);
5067 * Move to the position on the next CPU
5068 * before the msg is forwarded.
5070 dmsg
->start_rule
= rule
->sibling
;
5072 while (rule
&& rule
->rulenum
<= dmsg
->rulenum
) {
5073 if (rule
->rulenum
== dmsg
->rulenum
)
5074 rule
->set
= dmsg
->to_set
;
5077 netisr_forwardmsg(&nmsg
->base
, mycpuid
+ 1);
5081 ipfw_alt_move_rule(uint16_t rulenum
, uint8_t set
)
5083 struct netmsg_del dmsg
;
5084 struct netmsg_base
*nmsg
;
5086 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
5091 * Locate first rule to move
5093 for (rule
= ctx
->ipfw_layer3_chain
; rule
&& rule
->rulenum
<= rulenum
;
5094 rule
= rule
->next
) {
5095 if (rule
->rulenum
== rulenum
&& rule
->set
!= set
)
5098 if (rule
== NULL
|| rule
->rulenum
> rulenum
)
5099 return 0; /* XXX error? */
5101 bzero(&dmsg
, sizeof(dmsg
));
5103 netmsg_init(nmsg
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
5104 ipfw_alt_move_rule_dispatch
);
5105 dmsg
.start_rule
= rule
;
5106 dmsg
.rulenum
= rulenum
;
5109 netisr_domsg_global(nmsg
);
5110 KKASSERT(dmsg
.start_rule
== NULL
);
5115 ipfw_alt_move_ruleset_dispatch(netmsg_t nmsg
)
5117 struct netmsg_del
*dmsg
= (struct netmsg_del
*)nmsg
;
5118 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
5121 ASSERT_NETISR_NCPUS(mycpuid
);
5123 for (rule
= ctx
->ipfw_layer3_chain
; rule
; rule
= rule
->next
) {
5124 if (rule
->set
== dmsg
->from_set
)
5125 rule
->set
= dmsg
->to_set
;
5127 netisr_forwardmsg(&nmsg
->base
, mycpuid
+ 1);
5131 ipfw_alt_move_ruleset(uint8_t from_set
, uint8_t to_set
)
5133 struct netmsg_del dmsg
;
5134 struct netmsg_base
*nmsg
;
5138 bzero(&dmsg
, sizeof(dmsg
));
5140 netmsg_init(nmsg
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
5141 ipfw_alt_move_ruleset_dispatch
);
5142 dmsg
.from_set
= from_set
;
5143 dmsg
.to_set
= to_set
;
5145 netisr_domsg_global(nmsg
);
5150 ipfw_alt_swap_ruleset_dispatch(netmsg_t nmsg
)
5152 struct netmsg_del
*dmsg
= (struct netmsg_del
*)nmsg
;
5153 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
5156 ASSERT_NETISR_NCPUS(mycpuid
);
5158 for (rule
= ctx
->ipfw_layer3_chain
; rule
; rule
= rule
->next
) {
5159 if (rule
->set
== dmsg
->from_set
)
5160 rule
->set
= dmsg
->to_set
;
5161 else if (rule
->set
== dmsg
->to_set
)
5162 rule
->set
= dmsg
->from_set
;
5164 netisr_forwardmsg(&nmsg
->base
, mycpuid
+ 1);
5168 ipfw_alt_swap_ruleset(uint8_t set1
, uint8_t set2
)
5170 struct netmsg_del dmsg
;
5171 struct netmsg_base
*nmsg
;
5175 bzero(&dmsg
, sizeof(dmsg
));
5177 netmsg_init(nmsg
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
5178 ipfw_alt_swap_ruleset_dispatch
);
5179 dmsg
.from_set
= set1
;
5182 netisr_domsg_global(nmsg
);
5187 * Remove all rules with given number, and also do set manipulation.
5189 * The argument is an uint32_t. The low 16 bit are the rule or set number,
5190 * the next 8 bits are the new set, the top 8 bits are the command:
5192 * 0 delete rules with given number
5193 * 1 delete rules with given set number
5194 * 2 move rules with given number to new set
5195 * 3 move rules with given set number to new set
5196 * 4 swap sets with given numbers
5199 ipfw_ctl_alter(uint32_t arg
)
5202 uint8_t cmd
, new_set
;
5207 rulenum
= arg
& 0xffff;
5208 cmd
= (arg
>> 24) & 0xff;
5209 new_set
= (arg
>> 16) & 0xff;
5213 if (new_set
>= IPFW_DEFAULT_SET
)
5215 if (cmd
== 0 || cmd
== 2) {
5216 if (rulenum
== IPFW_DEFAULT_RULE
)
5219 if (rulenum
>= IPFW_DEFAULT_SET
)
5224 case 0: /* delete rules with given number */
5225 error
= ipfw_alt_delete_rule(rulenum
);
5228 case 1: /* delete all rules with given set number */
5229 error
= ipfw_alt_delete_ruleset(rulenum
);
5232 case 2: /* move rules with given number to new set */
5233 error
= ipfw_alt_move_rule(rulenum
, new_set
);
5236 case 3: /* move rules with given set number to new set */
5237 error
= ipfw_alt_move_ruleset(rulenum
, new_set
);
5240 case 4: /* swap two sets */
5241 error
= ipfw_alt_swap_ruleset(rulenum
, new_set
);
5248 * Clear counters for a specific rule.
5251 clear_counters(struct ip_fw
*rule
, int log_only
)
5253 ipfw_insn_log
*l
= (ipfw_insn_log
*)ACTION_PTR(rule
);
5255 if (log_only
== 0) {
5256 rule
->bcnt
= rule
->pcnt
= 0;
5257 rule
->timestamp
= 0;
5259 if (l
->o
.opcode
== O_LOG
)
5260 l
->log_left
= l
->max_log
;
5264 ipfw_zero_entry_dispatch(netmsg_t nmsg
)
5266 struct netmsg_zent
*zmsg
= (struct netmsg_zent
*)nmsg
;
5267 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
5270 ASSERT_NETISR_NCPUS(mycpuid
);
5272 if (zmsg
->rulenum
== 0) {
5273 KKASSERT(zmsg
->start_rule
== NULL
);
5275 ctx
->ipfw_norule_counter
= 0;
5276 for (rule
= ctx
->ipfw_layer3_chain
; rule
; rule
= rule
->next
)
5277 clear_counters(rule
, zmsg
->log_only
);
5279 struct ip_fw
*start
= zmsg
->start_rule
;
5281 KKASSERT(start
->cpuid
== mycpuid
);
5282 KKASSERT(start
->rulenum
== zmsg
->rulenum
);
5285 * We can have multiple rules with the same number, so we
5286 * need to clear them all.
5288 for (rule
= start
; rule
&& rule
->rulenum
== zmsg
->rulenum
;
5290 clear_counters(rule
, zmsg
->log_only
);
5293 * Move to the position on the next CPU
5294 * before the msg is forwarded.
5296 zmsg
->start_rule
= start
->sibling
;
5298 netisr_forwardmsg(&nmsg
->base
, mycpuid
+ 1);
5302 * Reset some or all counters on firewall rules.
5303 * @arg frwl is null to clear all entries, or contains a specific
5305 * @arg log_only is 1 if we only want to reset logs, zero otherwise.
5308 ipfw_ctl_zero_entry(int rulenum
, int log_only
)
5310 struct netmsg_zent zmsg
;
5311 struct netmsg_base
*nmsg
;
5313 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
5317 bzero(&zmsg
, sizeof(zmsg
));
5319 netmsg_init(nmsg
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
5320 ipfw_zero_entry_dispatch
);
5321 zmsg
.log_only
= log_only
;
5324 msg
= log_only
? "ipfw: All logging counts reset.\n"
5325 : "ipfw: Accounting cleared.\n";
5330 * Locate the first rule with 'rulenum'
5332 for (rule
= ctx
->ipfw_layer3_chain
; rule
; rule
= rule
->next
) {
5333 if (rule
->rulenum
== rulenum
)
5336 if (rule
== NULL
) /* we did not find any matching rules */
5338 zmsg
.start_rule
= rule
;
5339 zmsg
.rulenum
= rulenum
;
5341 msg
= log_only
? "ipfw: Entry %d logging count reset.\n"
5342 : "ipfw: Entry %d cleared.\n";
5344 netisr_domsg_global(nmsg
);
5345 KKASSERT(zmsg
.start_rule
== NULL
);
5348 log(LOG_SECURITY
| LOG_NOTICE
, msg
, rulenum
);
5353 * Check validity of the structure before insert.
5354 * Fortunately rules are simple, so this mostly need to check rule sizes.
5357 ipfw_check_ioc_rule(struct ipfw_ioc_rule
*rule
, int size
, uint32_t *rule_flags
)
5360 int have_action
= 0;
5365 /* Check for valid size */
5366 if (size
< sizeof(*rule
)) {
5367 kprintf("ipfw: rule too short\n");
5370 l
= IOC_RULESIZE(rule
);
5372 kprintf("ipfw: size mismatch (have %d want %d)\n", size
, l
);
5376 /* Check rule number */
5377 if (rule
->rulenum
== IPFW_DEFAULT_RULE
) {
5378 kprintf("ipfw: invalid rule number\n");
5383 * Now go for the individual checks. Very simple ones, basically only
5384 * instruction sizes.
5386 for (l
= rule
->cmd_len
, cmd
= rule
->cmd
; l
> 0;
5387 l
-= cmdlen
, cmd
+= cmdlen
) {
5388 cmdlen
= F_LEN(cmd
);
5390 kprintf("ipfw: opcode %d size truncated\n",
5395 DPRINTF("ipfw: opcode %d\n", cmd
->opcode
);
5397 if (cmd
->opcode
== O_KEEP_STATE
|| cmd
->opcode
== O_LIMIT
||
5398 IPFW_ISXLAT(cmd
->opcode
)) {
5399 /* This rule will generate states. */
5400 *rule_flags
|= IPFW_RULE_F_GENSTATE
;
5401 if (cmd
->opcode
== O_LIMIT
)
5402 *rule_flags
|= IPFW_RULE_F_GENTRACK
;
5404 if (cmd
->opcode
== O_DEFRAG
|| IPFW_ISXLAT(cmd
->opcode
))
5405 *rule_flags
|= IPFW_RULE_F_CROSSREF
;
5406 if (cmd
->opcode
== O_IP_SRC_IFIP
||
5407 cmd
->opcode
== O_IP_DST_IFIP
) {
5408 *rule_flags
|= IPFW_RULE_F_DYNIFADDR
;
5409 cmd
->arg1
&= IPFW_IFIP_SETTINGS
;
5412 switch (cmd
->opcode
) {
5427 case O_IPPRECEDENCE
:
5434 if (cmdlen
!= F_INSN_SIZE(ipfw_insn
))
5438 case O_IP_SRC_TABLE
:
5439 case O_IP_DST_TABLE
:
5440 if (cmdlen
!= F_INSN_SIZE(ipfw_insn
))
5442 if (cmd
->arg1
>= ipfw_table_max
) {
5443 kprintf("ipfw: invalid table id %u, max %d\n",
5444 cmd
->arg1
, ipfw_table_max
);
5451 if (cmdlen
!= F_INSN_SIZE(ipfw_insn_ifip
))
5457 if (cmdlen
< F_INSN_SIZE(ipfw_insn_u32
))
5468 if (cmdlen
!= F_INSN_SIZE(ipfw_insn_u32
))
5473 if (cmdlen
!= F_INSN_SIZE(ipfw_insn_limit
))
5477 if (cmdlen
!= F_INSN_SIZE(ipfw_insn_rdr
))
5482 if (cmdlen
!= F_INSN_SIZE(ipfw_insn_log
))
5485 ((ipfw_insn_log
*)cmd
)->log_left
=
5486 ((ipfw_insn_log
*)cmd
)->max_log
;
5492 if (cmdlen
!= F_INSN_SIZE(ipfw_insn_ip
))
5494 if (((ipfw_insn_ip
*)cmd
)->mask
.s_addr
== 0) {
5495 kprintf("ipfw: opcode %d, useless rule\n",
5503 if (cmd
->arg1
== 0 || cmd
->arg1
> 256) {
5504 kprintf("ipfw: invalid set size %d\n",
5508 if (cmdlen
!= F_INSN_SIZE(ipfw_insn_u32
) +
5514 if (cmdlen
!= F_INSN_SIZE(ipfw_insn_mac
))
5520 case O_IP_DSTPORT
: /* XXX artificial limit, 30 port pairs */
5521 if (cmdlen
< 2 || cmdlen
> 31)
5528 if (cmdlen
!= F_INSN_SIZE(ipfw_insn_if
))
5534 if (cmdlen
!= F_INSN_SIZE(ipfw_insn_pipe
))
5539 if (cmdlen
!= F_INSN_SIZE(ipfw_insn_sa
)) {
5544 fwd_addr
= ((ipfw_insn_sa
*)cmd
)->
5546 if (IN_MULTICAST(ntohl(fwd_addr
))) {
5547 kprintf("ipfw: try forwarding to "
5548 "multicast address\n");
5554 case O_FORWARD_MAC
: /* XXX not implemented yet */
5564 if (cmdlen
!= F_INSN_SIZE(ipfw_insn
))
5568 kprintf("ipfw: opcode %d, multiple actions"
5575 kprintf("ipfw: opcode %d, action must be"
5582 kprintf("ipfw: opcode %d, unknown opcode\n",
5587 if (have_action
== 0) {
5588 kprintf("ipfw: missing action\n");
5594 kprintf("ipfw: opcode %d size %d wrong\n",
5595 cmd
->opcode
, cmdlen
);
5600 ipfw_ctl_add_rule(struct sockopt
*sopt
)
5602 struct ipfw_ioc_rule
*ioc_rule
;
5604 uint32_t rule_flags
;
5609 size
= sopt
->sopt_valsize
;
5610 if (size
> (sizeof(uint32_t) * IPFW_RULE_SIZE_MAX
) ||
5611 size
< sizeof(*ioc_rule
)) {
5614 if (size
!= (sizeof(uint32_t) * IPFW_RULE_SIZE_MAX
)) {
5615 sopt
->sopt_val
= krealloc(sopt
->sopt_val
, sizeof(uint32_t) *
5616 IPFW_RULE_SIZE_MAX
, M_TEMP
, M_WAITOK
);
5618 ioc_rule
= sopt
->sopt_val
;
5620 error
= ipfw_check_ioc_rule(ioc_rule
, size
, &rule_flags
);
5624 ipfw_add_rule(ioc_rule
, rule_flags
);
5626 if (sopt
->sopt_dir
== SOPT_GET
)
5627 sopt
->sopt_valsize
= IOC_RULESIZE(ioc_rule
);
5632 ipfw_copy_rule(const struct ipfw_context
*ctx
, const struct ip_fw
*rule
,
5633 struct ipfw_ioc_rule
*ioc_rule
)
5635 const struct ip_fw
*sibling
;
5641 KASSERT(rule
->cpuid
== 0, ("rule does not belong to cpu0"));
5643 ioc_rule
->act_ofs
= rule
->act_ofs
;
5644 ioc_rule
->cmd_len
= rule
->cmd_len
;
5645 ioc_rule
->rulenum
= rule
->rulenum
;
5646 ioc_rule
->set
= rule
->set
;
5647 ioc_rule
->usr_flags
= rule
->usr_flags
;
5649 ioc_rule
->set_disable
= ctx
->ipfw_set_disable
;
5650 ioc_rule
->static_count
= static_count
;
5651 ioc_rule
->static_len
= static_ioc_len
;
5654 * Visit (read-only) all of the rule's duplications to get
5655 * the necessary statistics
5662 ioc_rule
->timestamp
= 0;
5663 for (sibling
= rule
; sibling
!= NULL
; sibling
= sibling
->sibling
) {
5664 ioc_rule
->pcnt
+= sibling
->pcnt
;
5665 ioc_rule
->bcnt
+= sibling
->bcnt
;
5666 if (sibling
->timestamp
> ioc_rule
->timestamp
)
5667 ioc_rule
->timestamp
= sibling
->timestamp
;
5672 KASSERT(i
== netisr_ncpus
,
5673 ("static rule is not duplicated on netisr_ncpus %d", netisr_ncpus
));
5675 bcopy(rule
->cmd
, ioc_rule
->cmd
, ioc_rule
->cmd_len
* 4 /* XXX */);
5677 return ((uint8_t *)ioc_rule
+ IOC_RULESIZE(ioc_rule
));
5681 ipfw_track_copy(const struct ipfw_trkcnt
*trk
, struct ipfw_ioc_state
*ioc_state
)
5683 struct ipfw_ioc_flowid
*ioc_id
;
5685 if (trk
->tc_expire
== 0) {
5686 /* Not a scanned one. */
5690 ioc_state
->expire
= TIME_LEQ(trk
->tc_expire
, time_uptime
) ?
5691 0 : trk
->tc_expire
- time_uptime
;
5692 ioc_state
->pcnt
= 0;
5693 ioc_state
->bcnt
= 0;
5695 ioc_state
->dyn_type
= O_LIMIT_PARENT
;
5696 ioc_state
->count
= trk
->tc_count
;
5698 ioc_state
->rulenum
= trk
->tc_rulenum
;
5700 ioc_id
= &ioc_state
->id
;
5701 ioc_id
->type
= ETHERTYPE_IP
;
5702 ioc_id
->u
.ip
.proto
= trk
->tc_proto
;
5703 ioc_id
->u
.ip
.src_ip
= trk
->tc_saddr
;
5704 ioc_id
->u
.ip
.dst_ip
= trk
->tc_daddr
;
5705 ioc_id
->u
.ip
.src_port
= trk
->tc_sport
;
5706 ioc_id
->u
.ip
.dst_port
= trk
->tc_dport
;
5712 ipfw_state_copy(const struct ipfw_state
*s
, struct ipfw_ioc_state
*ioc_state
)
5714 struct ipfw_ioc_flowid
*ioc_id
;
5716 if (IPFW_STATE_SCANSKIP(s
))
5719 ioc_state
->expire
= TIME_LEQ(s
->st_expire
, time_uptime
) ?
5720 0 : s
->st_expire
- time_uptime
;
5721 ioc_state
->pcnt
= s
->st_pcnt
;
5722 ioc_state
->bcnt
= s
->st_bcnt
;
5724 ioc_state
->dyn_type
= s
->st_type
;
5725 ioc_state
->count
= 0;
5727 ioc_state
->rulenum
= s
->st_rule
->rulenum
;
5729 ioc_id
= &ioc_state
->id
;
5730 ioc_id
->type
= ETHERTYPE_IP
;
5731 ioc_id
->u
.ip
.proto
= s
->st_proto
;
5732 ipfw_key_4tuple(&s
->st_key
,
5733 &ioc_id
->u
.ip
.src_ip
, &ioc_id
->u
.ip
.src_port
,
5734 &ioc_id
->u
.ip
.dst_ip
, &ioc_id
->u
.ip
.dst_port
);
5736 if (IPFW_ISXLAT(s
->st_type
)) {
5737 const struct ipfw_xlat
*x
= (const struct ipfw_xlat
*)s
;
5739 if (x
->xlat_port
== 0)
5740 ioc_state
->xlat_port
= ioc_id
->u
.ip
.dst_port
;
5742 ioc_state
->xlat_port
= ntohs(x
->xlat_port
);
5743 ioc_state
->xlat_addr
= ntohl(x
->xlat_addr
);
5745 ioc_state
->pcnt
+= x
->xlat_pair
->xlat_pcnt
;
5746 ioc_state
->bcnt
+= x
->xlat_pair
->xlat_bcnt
;
5753 ipfw_state_copy_dispatch(netmsg_t nmsg
)
5755 struct netmsg_cpstate
*nm
= (struct netmsg_cpstate
*)nmsg
;
5756 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
5757 const struct ipfw_state
*s
;
5758 const struct ipfw_track
*t
;
5760 ASSERT_NETISR_NCPUS(mycpuid
);
5761 KASSERT(nm
->state_cnt
< nm
->state_cntmax
,
5762 ("invalid state count %d, max %d",
5763 nm
->state_cnt
, nm
->state_cntmax
));
5765 TAILQ_FOREACH(s
, &ctx
->ipfw_state_list
, st_link
) {
5766 if (ipfw_state_copy(s
, nm
->ioc_state
)) {
5769 if (nm
->state_cnt
== nm
->state_cntmax
)
5775 * Prepare tracks in the global track tree for userland.
5777 TAILQ_FOREACH(t
, &ctx
->ipfw_track_list
, t_link
) {
5778 struct ipfw_trkcnt
*trk
;
5780 if (t
->t_count
== NULL
) /* anchor */
5785 * Only one netisr can run this function at
5786 * any time, and only this function accesses
5787 * trkcnt's tc_expire, so this is safe w/o
5788 * ipfw_gd.ipfw_trkcnt_token.
5790 if (trk
->tc_expire
> t
->t_expire
)
5792 trk
->tc_expire
= t
->t_expire
;
5796 * Copy tracks in the global track tree to userland in
5799 if (mycpuid
== netisr_ncpus
- 1) {
5800 struct ipfw_trkcnt
*trk
;
5802 KASSERT(nm
->state_cnt
< nm
->state_cntmax
,
5803 ("invalid state count %d, max %d",
5804 nm
->state_cnt
, nm
->state_cntmax
));
5807 RB_FOREACH(trk
, ipfw_trkcnt_tree
, &ipfw_gd
.ipfw_trkcnt_tree
) {
5808 if (ipfw_track_copy(trk
, nm
->ioc_state
)) {
5811 if (nm
->state_cnt
== nm
->state_cntmax
) {
5820 if (nm
->state_cnt
== nm
->state_cntmax
) {
5821 /* No more space; done. */
5822 netisr_replymsg(&nm
->base
, 0);
5824 netisr_forwardmsg(&nm
->base
, mycpuid
+ 1);
5829 ipfw_ctl_get_rules(struct sockopt
*sopt
)
5831 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
5840 * pass up a copy of the current rules. Static rules
5841 * come first (the last of which has number IPFW_DEFAULT_RULE),
5842 * followed by a possibly empty list of states.
5845 size
= static_ioc_len
; /* size of static rules */
5848 * Size of the states.
5849 * XXX take tracks as state for userland compat.
5851 state_cnt
= ipfw_state_cntcoll() + ipfw_gd
.ipfw_trkcnt_cnt
;
5852 state_cnt
= (state_cnt
* 5) / 4; /* leave 25% headroom */
5853 size
+= state_cnt
* sizeof(struct ipfw_ioc_state
);
5855 if (sopt
->sopt_valsize
< size
) {
5856 /* short length, no need to return incomplete rules */
5857 /* XXX: if superuser, no need to zero buffer */
5858 bzero(sopt
->sopt_val
, sopt
->sopt_valsize
);
5861 bp
= sopt
->sopt_val
;
5863 for (rule
= ctx
->ipfw_layer3_chain
; rule
; rule
= rule
->next
)
5864 bp
= ipfw_copy_rule(ctx
, rule
, bp
);
5867 struct netmsg_cpstate nm
;
5869 size_t old_size
= size
;
5872 netmsg_init(&nm
.base
, NULL
, &curthread
->td_msgport
,
5873 MSGF_PRIORITY
, ipfw_state_copy_dispatch
);
5875 nm
.state_cntmax
= state_cnt
;
5877 netisr_domsg_global(&nm
.base
);
5880 * The # of states may be shrinked after the snapshot
5881 * of the state count was taken. To give user a correct
5882 * state count, nm->state_cnt is used to recalculate
5885 size
= static_ioc_len
+
5886 (nm
.state_cnt
* sizeof(struct ipfw_ioc_state
));
5887 KKASSERT(size
<= old_size
);
5890 sopt
->sopt_valsize
= size
;
5895 ipfw_set_disable_dispatch(netmsg_t nmsg
)
5897 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
5899 ASSERT_NETISR_NCPUS(mycpuid
);
5901 ctx
->ipfw_set_disable
= nmsg
->lmsg
.u
.ms_result32
;
5902 netisr_forwardmsg(&nmsg
->base
, mycpuid
+ 1);
5906 ipfw_ctl_set_disable(uint32_t disable
, uint32_t enable
)
5908 struct netmsg_base nmsg
;
5909 uint32_t set_disable
;
5913 /* IPFW_DEFAULT_SET is always enabled */
5914 enable
|= (1 << IPFW_DEFAULT_SET
);
5915 set_disable
= (ipfw_ctx
[mycpuid
]->ipfw_set_disable
| disable
) & ~enable
;
5917 bzero(&nmsg
, sizeof(nmsg
));
5918 netmsg_init(&nmsg
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
5919 ipfw_set_disable_dispatch
);
5920 nmsg
.lmsg
.u
.ms_result32
= set_disable
;
5922 netisr_domsg_global(&nmsg
);
5926 ipfw_table_create_dispatch(netmsg_t nm
)
5928 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
5929 int tblid
= nm
->lmsg
.u
.ms_result
;
5931 ASSERT_NETISR_NCPUS(mycpuid
);
5933 if (!rn_inithead((void **)&ctx
->ipfw_tables
[tblid
],
5934 rn_cpumaskhead(mycpuid
), 32))
5935 panic("ipfw: create table%d failed", tblid
);
5937 netisr_forwardmsg(&nm
->base
, mycpuid
+ 1);
5941 ipfw_table_create(struct sockopt
*sopt
)
5943 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
5944 struct ipfw_ioc_table
*tbl
;
5945 struct netmsg_base nm
;
5949 if (sopt
->sopt_valsize
!= sizeof(*tbl
))
5952 tbl
= sopt
->sopt_val
;
5953 if (tbl
->tableid
< 0 || tbl
->tableid
>= ipfw_table_max
)
5956 if (ctx
->ipfw_tables
[tbl
->tableid
] != NULL
)
5959 netmsg_init(&nm
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
5960 ipfw_table_create_dispatch
);
5961 nm
.lmsg
.u
.ms_result
= tbl
->tableid
;
5962 netisr_domsg_global(&nm
);
5968 ipfw_table_killrn(struct radix_node_head
*rnh
, struct radix_node
*rn
)
5970 struct radix_node
*ret
;
5972 ret
= rnh
->rnh_deladdr(rn
->rn_key
, rn
->rn_mask
, rnh
);
5974 panic("deleted other table entry");
5979 ipfw_table_killent(struct radix_node
*rn
, void *xrnh
)
5982 ipfw_table_killrn(xrnh
, rn
);
5987 ipfw_table_flush_oncpu(struct ipfw_context
*ctx
, int tableid
,
5990 struct radix_node_head
*rnh
;
5992 ASSERT_NETISR_NCPUS(mycpuid
);
5994 rnh
= ctx
->ipfw_tables
[tableid
];
5995 rnh
->rnh_walktree(rnh
, ipfw_table_killent
, rnh
);
5998 ctx
->ipfw_tables
[tableid
] = NULL
;
6003 ipfw_table_flush_dispatch(netmsg_t nmsg
)
6005 struct netmsg_tblflush
*nm
= (struct netmsg_tblflush
*)nmsg
;
6006 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
6008 ASSERT_NETISR_NCPUS(mycpuid
);
6010 ipfw_table_flush_oncpu(ctx
, nm
->tableid
, nm
->destroy
);
6011 netisr_forwardmsg(&nm
->base
, mycpuid
+ 1);
6015 ipfw_table_flushall_oncpu(struct ipfw_context
*ctx
, int destroy
)
6019 ASSERT_NETISR_NCPUS(mycpuid
);
6021 for (i
= 0; i
< ipfw_table_max
; ++i
) {
6022 if (ctx
->ipfw_tables
[i
] != NULL
)
6023 ipfw_table_flush_oncpu(ctx
, i
, destroy
);
6028 ipfw_table_flushall_dispatch(netmsg_t nmsg
)
6030 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
6032 ASSERT_NETISR_NCPUS(mycpuid
);
6034 ipfw_table_flushall_oncpu(ctx
, 0);
6035 netisr_forwardmsg(&nmsg
->base
, mycpuid
+ 1);
6039 ipfw_table_flush(struct sockopt
*sopt
)
6041 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
6042 struct ipfw_ioc_table
*tbl
;
6043 struct netmsg_tblflush nm
;
6047 if (sopt
->sopt_valsize
!= sizeof(*tbl
))
6050 tbl
= sopt
->sopt_val
;
6051 if (sopt
->sopt_name
== IP_FW_TBL_FLUSH
&& tbl
->tableid
< 0) {
6052 netmsg_init(&nm
.base
, NULL
, &curthread
->td_msgport
,
6053 MSGF_PRIORITY
, ipfw_table_flushall_dispatch
);
6054 netisr_domsg_global(&nm
.base
);
6058 if (tbl
->tableid
< 0 || tbl
->tableid
>= ipfw_table_max
)
6061 if (ctx
->ipfw_tables
[tbl
->tableid
] == NULL
)
6064 netmsg_init(&nm
.base
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
6065 ipfw_table_flush_dispatch
);
6066 nm
.tableid
= tbl
->tableid
;
6068 if (sopt
->sopt_name
== IP_FW_TBL_DESTROY
)
6070 netisr_domsg_global(&nm
.base
);
6076 ipfw_table_cntent(struct radix_node
*rn __unused
, void *xcnt
)
6085 ipfw_table_cpent(struct radix_node
*rn
, void *xcp
)
6087 struct ipfw_table_cp
*cp
= xcp
;
6088 struct ipfw_tblent
*te
= (struct ipfw_tblent
*)rn
;
6089 struct ipfw_ioc_tblent
*ioc_te
;
6094 KASSERT(cp
->te_idx
< cp
->te_cnt
, ("invalid table cp idx %d, cnt %d",
6095 cp
->te_idx
, cp
->te_cnt
));
6096 ioc_te
= &cp
->te
[cp
->te_idx
];
6098 if (te
->te_nodes
->rn_mask
!= NULL
) {
6099 memcpy(&ioc_te
->netmask
, te
->te_nodes
->rn_mask
,
6100 *te
->te_nodes
->rn_mask
);
6102 ioc_te
->netmask
.sin_len
= 0;
6104 memcpy(&ioc_te
->key
, &te
->te_key
, sizeof(ioc_te
->key
));
6106 ioc_te
->use
= te
->te_use
;
6107 ioc_te
->last_used
= te
->te_lastuse
;
6112 while ((te
= te
->te_sibling
) != NULL
) {
6116 ioc_te
->use
+= te
->te_use
;
6117 if (te
->te_lastuse
> ioc_te
->last_used
)
6118 ioc_te
->last_used
= te
->te_lastuse
;
6120 KASSERT(cnt
== netisr_ncpus
,
6121 ("invalid # of tblent %d, should be %d", cnt
, netisr_ncpus
));
6129 ipfw_table_get(struct sockopt
*sopt
)
6131 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
6132 struct radix_node_head
*rnh
;
6133 struct ipfw_ioc_table
*tbl
;
6134 struct ipfw_ioc_tblcont
*cont
;
6135 struct ipfw_table_cp cp
;
6140 if (sopt
->sopt_valsize
< sizeof(*tbl
))
6143 tbl
= sopt
->sopt_val
;
6144 if (tbl
->tableid
< 0) {
6145 struct ipfw_ioc_tbllist
*list
;
6149 * List available table ids.
6151 for (i
= 0; i
< ipfw_table_max
; ++i
) {
6152 if (ctx
->ipfw_tables
[i
] != NULL
)
6156 sz
= __offsetof(struct ipfw_ioc_tbllist
, tables
[cnt
]);
6157 if (sopt
->sopt_valsize
< sz
) {
6158 bzero(sopt
->sopt_val
, sopt
->sopt_valsize
);
6161 list
= sopt
->sopt_val
;
6162 list
->tablecnt
= cnt
;
6165 for (i
= 0; i
< ipfw_table_max
; ++i
) {
6166 if (ctx
->ipfw_tables
[i
] != NULL
) {
6167 KASSERT(cnt
< list
->tablecnt
,
6168 ("invalid idx %d, cnt %d",
6169 cnt
, list
->tablecnt
));
6170 list
->tables
[cnt
++] = i
;
6173 sopt
->sopt_valsize
= sz
;
6175 } else if (tbl
->tableid
>= ipfw_table_max
) {
6179 rnh
= ctx
->ipfw_tables
[tbl
->tableid
];
6182 rnh
->rnh_walktree(rnh
, ipfw_table_cntent
, &cnt
);
6184 sz
= __offsetof(struct ipfw_ioc_tblcont
, ent
[cnt
]);
6185 if (sopt
->sopt_valsize
< sz
) {
6186 bzero(sopt
->sopt_val
, sopt
->sopt_valsize
);
6189 cont
= sopt
->sopt_val
;
6195 rnh
->rnh_walktree(rnh
, ipfw_table_cpent
, &cp
);
6197 sopt
->sopt_valsize
= sz
;
6202 ipfw_table_add_dispatch(netmsg_t nmsg
)
6204 struct netmsg_tblent
*nm
= (struct netmsg_tblent
*)nmsg
;
6205 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
6206 struct radix_node_head
*rnh
;
6207 struct ipfw_tblent
*te
;
6209 ASSERT_NETISR_NCPUS(mycpuid
);
6211 rnh
= ctx
->ipfw_tables
[nm
->tableid
];
6213 te
= kmalloc(sizeof(*te
), M_IPFW
, M_WAITOK
| M_ZERO
);
6214 te
->te_nodes
->rn_key
= (char *)&te
->te_key
;
6215 memcpy(&te
->te_key
, nm
->key
, sizeof(te
->te_key
));
6217 if (rnh
->rnh_addaddr((char *)&te
->te_key
, (char *)nm
->netmask
, rnh
,
6218 te
->te_nodes
) == NULL
) {
6221 netisr_replymsg(&nm
->base
, EEXIST
);
6224 panic("rnh_addaddr failed");
6227 /* Link siblings. */
6228 if (nm
->sibling
!= NULL
)
6229 nm
->sibling
->te_sibling
= te
;
6232 netisr_forwardmsg(&nm
->base
, mycpuid
+ 1);
6236 ipfw_table_del_dispatch(netmsg_t nmsg
)
6238 struct netmsg_tblent
*nm
= (struct netmsg_tblent
*)nmsg
;
6239 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
6240 struct radix_node_head
*rnh
;
6241 struct radix_node
*rn
;
6243 ASSERT_NETISR_NCPUS(mycpuid
);
6245 rnh
= ctx
->ipfw_tables
[nm
->tableid
];
6246 rn
= rnh
->rnh_deladdr((char *)nm
->key
, (char *)nm
->netmask
, rnh
);
6249 netisr_replymsg(&nm
->base
, ESRCH
);
6252 panic("rnh_deladdr failed");
6256 netisr_forwardmsg(&nm
->base
, mycpuid
+ 1);
6260 ipfw_table_alt(struct sockopt
*sopt
)
6262 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
6263 struct ipfw_ioc_tblcont
*tbl
;
6264 struct ipfw_ioc_tblent
*te
;
6265 struct sockaddr_in key0
;
6266 struct sockaddr
*netmask
= NULL
, *key
;
6267 struct netmsg_tblent nm
;
6271 if (sopt
->sopt_valsize
!= sizeof(*tbl
))
6273 tbl
= sopt
->sopt_val
;
6275 if (tbl
->tableid
< 0 || tbl
->tableid
>= ipfw_table_max
)
6277 if (tbl
->entcnt
!= 1)
6280 if (ctx
->ipfw_tables
[tbl
->tableid
] == NULL
)
6284 if (te
->key
.sin_family
!= AF_INET
||
6285 te
->key
.sin_port
!= 0 ||
6286 te
->key
.sin_len
!= sizeof(struct sockaddr_in
))
6288 key
= (struct sockaddr
*)&te
->key
;
6290 if (te
->netmask
.sin_len
!= 0) {
6291 if (te
->netmask
.sin_port
!= 0 ||
6292 te
->netmask
.sin_len
> sizeof(struct sockaddr_in
))
6294 netmask
= (struct sockaddr
*)&te
->netmask
;
6295 sa_maskedcopy(key
, (struct sockaddr
*)&key0
, netmask
);
6296 key
= (struct sockaddr
*)&key0
;
6299 if (sopt
->sopt_name
== IP_FW_TBL_ADD
) {
6300 netmsg_init(&nm
.base
, NULL
, &curthread
->td_msgport
,
6301 MSGF_PRIORITY
, ipfw_table_add_dispatch
);
6303 netmsg_init(&nm
.base
, NULL
, &curthread
->td_msgport
,
6304 MSGF_PRIORITY
, ipfw_table_del_dispatch
);
6307 nm
.netmask
= netmask
;
6308 nm
.tableid
= tbl
->tableid
;
6310 return (netisr_domsg_global(&nm
.base
));
6314 ipfw_table_zeroent(struct radix_node
*rn
, void *arg __unused
)
6316 struct ipfw_tblent
*te
= (struct ipfw_tblent
*)rn
;
6324 ipfw_table_zero_dispatch(netmsg_t nmsg
)
6326 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
6327 struct radix_node_head
*rnh
;
6329 ASSERT_NETISR_NCPUS(mycpuid
);
6331 rnh
= ctx
->ipfw_tables
[nmsg
->lmsg
.u
.ms_result
];
6332 rnh
->rnh_walktree(rnh
, ipfw_table_zeroent
, NULL
);
6334 netisr_forwardmsg(&nmsg
->base
, mycpuid
+ 1);
6338 ipfw_table_zeroall_dispatch(netmsg_t nmsg
)
6340 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
6343 ASSERT_NETISR_NCPUS(mycpuid
);
6345 for (i
= 0; i
< ipfw_table_max
; ++i
) {
6346 struct radix_node_head
*rnh
= ctx
->ipfw_tables
[i
];
6349 rnh
->rnh_walktree(rnh
, ipfw_table_zeroent
, NULL
);
6351 netisr_forwardmsg(&nmsg
->base
, mycpuid
+ 1);
6355 ipfw_table_zero(struct sockopt
*sopt
)
6357 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
6358 struct netmsg_base nm
;
6359 struct ipfw_ioc_table
*tbl
;
6363 if (sopt
->sopt_valsize
!= sizeof(*tbl
))
6365 tbl
= sopt
->sopt_val
;
6367 if (tbl
->tableid
< 0) {
6368 netmsg_init(&nm
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
6369 ipfw_table_zeroall_dispatch
);
6370 netisr_domsg_global(&nm
);
6372 } else if (tbl
->tableid
>= ipfw_table_max
) {
6374 } else if (ctx
->ipfw_tables
[tbl
->tableid
] == NULL
) {
6378 netmsg_init(&nm
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
6379 ipfw_table_zero_dispatch
);
6380 nm
.lmsg
.u
.ms_result
= tbl
->tableid
;
6381 netisr_domsg_global(&nm
);
6387 ipfw_table_killexp(struct radix_node
*rn
, void *xnm
)
6389 struct netmsg_tblexp
*nm
= xnm
;
6390 struct ipfw_tblent
*te
= (struct ipfw_tblent
*)rn
;
6392 if (te
->te_expired
) {
6393 ipfw_table_killrn(nm
->rnh
, rn
);
6400 ipfw_table_expire_dispatch(netmsg_t nmsg
)
6402 struct netmsg_tblexp
*nm
= (struct netmsg_tblexp
*)nmsg
;
6403 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
6404 struct radix_node_head
*rnh
;
6406 ASSERT_NETISR_NCPUS(mycpuid
);
6408 rnh
= ctx
->ipfw_tables
[nm
->tableid
];
6410 rnh
->rnh_walktree(rnh
, ipfw_table_killexp
, nm
);
6412 KASSERT(nm
->expcnt
== nm
->cnt
* (mycpuid
+ 1),
6413 ("not all expired addresses (%d) were deleted (%d)",
6414 nm
->cnt
* (mycpuid
+ 1), nm
->expcnt
));
6416 netisr_forwardmsg(&nm
->base
, mycpuid
+ 1);
6420 ipfw_table_expireall_dispatch(netmsg_t nmsg
)
6422 struct netmsg_tblexp
*nm
= (struct netmsg_tblexp
*)nmsg
;
6423 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
6426 ASSERT_NETISR_NCPUS(mycpuid
);
6428 for (i
= 0; i
< ipfw_table_max
; ++i
) {
6429 struct radix_node_head
*rnh
= ctx
->ipfw_tables
[i
];
6434 rnh
->rnh_walktree(rnh
, ipfw_table_killexp
, nm
);
6437 KASSERT(nm
->expcnt
== nm
->cnt
* (mycpuid
+ 1),
6438 ("not all expired addresses (%d) were deleted (%d)",
6439 nm
->cnt
* (mycpuid
+ 1), nm
->expcnt
));
6441 netisr_forwardmsg(&nm
->base
, mycpuid
+ 1);
6445 ipfw_table_markexp(struct radix_node
*rn
, void *xnm
)
6447 struct netmsg_tblexp
*nm
= xnm
;
6448 struct ipfw_tblent
*te
;
6451 te
= (struct ipfw_tblent
*)rn
;
6452 lastuse
= te
->te_lastuse
;
6454 while ((te
= te
->te_sibling
) != NULL
) {
6455 if (te
->te_lastuse
> lastuse
)
6456 lastuse
= te
->te_lastuse
;
6458 if (!TIME_LEQ(lastuse
+ nm
->expire
, time_second
)) {
6463 te
= (struct ipfw_tblent
*)rn
;
6465 while ((te
= te
->te_sibling
) != NULL
)
6473 ipfw_table_expire(struct sockopt
*sopt
)
6475 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
6476 struct netmsg_tblexp nm
;
6477 struct ipfw_ioc_tblexp
*tbl
;
6478 struct radix_node_head
*rnh
;
6482 if (sopt
->sopt_valsize
!= sizeof(*tbl
))
6484 tbl
= sopt
->sopt_val
;
6489 nm
.expire
= tbl
->expire
;
6491 if (tbl
->tableid
< 0) {
6494 for (i
= 0; i
< ipfw_table_max
; ++i
) {
6495 rnh
= ctx
->ipfw_tables
[i
];
6498 rnh
->rnh_walktree(rnh
, ipfw_table_markexp
, &nm
);
6501 /* No addresses can be expired. */
6504 tbl
->expcnt
= nm
.cnt
;
6506 netmsg_init(&nm
.base
, NULL
, &curthread
->td_msgport
,
6507 MSGF_PRIORITY
, ipfw_table_expireall_dispatch
);
6509 netisr_domsg_global(&nm
.base
);
6510 KASSERT(nm
.expcnt
== nm
.cnt
* netisr_ncpus
,
6511 ("not all expired addresses (%d) were deleted (%d)",
6512 nm
.cnt
* netisr_ncpus
, nm
.expcnt
));
6515 } else if (tbl
->tableid
>= ipfw_table_max
) {
6519 rnh
= ctx
->ipfw_tables
[tbl
->tableid
];
6522 rnh
->rnh_walktree(rnh
, ipfw_table_markexp
, &nm
);
6524 /* No addresses can be expired. */
6527 tbl
->expcnt
= nm
.cnt
;
6529 netmsg_init(&nm
.base
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
6530 ipfw_table_expire_dispatch
);
6531 nm
.tableid
= tbl
->tableid
;
6532 netisr_domsg_global(&nm
.base
);
6533 KASSERT(nm
.expcnt
== nm
.cnt
* netisr_ncpus
,
6534 ("not all expired addresses (%d) were deleted (%d)",
6535 nm
.cnt
* netisr_ncpus
, nm
.expcnt
));
6540 ipfw_crossref_free_dispatch(netmsg_t nmsg
)
6542 struct ip_fw
*rule
= nmsg
->lmsg
.u
.ms_resultp
;
6544 KKASSERT((rule
->rule_flags
&
6545 (IPFW_RULE_F_CROSSREF
| IPFW_RULE_F_INVALID
)) ==
6546 (IPFW_RULE_F_CROSSREF
| IPFW_RULE_F_INVALID
));
6547 ipfw_free_rule(rule
);
6549 netisr_replymsg(&nmsg
->base
, 0);
6553 ipfw_crossref_reap(void)
6555 struct ip_fw
*rule
, *prev
= NULL
;
6559 rule
= ipfw_gd
.ipfw_crossref_free
;
6560 while (rule
!= NULL
) {
6561 uint64_t inflight
= 0;
6564 for (i
= 0; i
< netisr_ncpus
; ++i
)
6565 inflight
+= rule
->cross_rules
[i
]->cross_refs
;
6566 if (inflight
== 0) {
6567 struct ip_fw
*f
= rule
;
6576 ipfw_gd
.ipfw_crossref_free
= rule
;
6581 for (i
= 1; i
< netisr_ncpus
; ++i
) {
6582 struct netmsg_base nm
;
6584 netmsg_init(&nm
, NULL
, &curthread
->td_msgport
,
6585 MSGF_PRIORITY
, ipfw_crossref_free_dispatch
);
6586 nm
.lmsg
.u
.ms_resultp
= f
->cross_rules
[i
];
6587 netisr_domsg(&nm
, i
);
6589 KKASSERT((f
->rule_flags
&
6590 (IPFW_RULE_F_CROSSREF
| IPFW_RULE_F_INVALID
)) ==
6591 (IPFW_RULE_F_CROSSREF
| IPFW_RULE_F_INVALID
));
6599 if (ipfw_gd
.ipfw_crossref_free
!= NULL
) {
6600 callout_reset(&ipfw_gd
.ipfw_crossref_ch
, hz
,
6601 ipfw_crossref_timeo
, NULL
);
6606 * {set|get}sockopt parser.
6609 ipfw_ctl(struct sockopt
*sopt
)
6619 switch (sopt
->sopt_name
) {
6621 error
= ipfw_ctl_get_rules(sopt
);
6625 ipfw_flush(0 /* keep default rule */);
6629 error
= ipfw_ctl_add_rule(sopt
);
6634 * IP_FW_DEL is used for deleting single rules or sets,
6635 * and (ab)used to atomically manipulate sets.
6636 * Argument size is used to distinguish between the two:
6638 * delete single rule or set of rules,
6639 * or reassign rules (or sets) to a different set.
6640 * 2 * sizeof(uint32_t)
6641 * atomic disable/enable sets.
6642 * first uint32_t contains sets to be disabled,
6643 * second uint32_t contains sets to be enabled.
6645 masks
= sopt
->sopt_val
;
6646 size
= sopt
->sopt_valsize
;
6647 if (size
== sizeof(*masks
)) {
6649 * Delete or reassign static rule
6651 error
= ipfw_ctl_alter(masks
[0]);
6652 } else if (size
== (2 * sizeof(*masks
))) {
6654 * Set enable/disable
6656 ipfw_ctl_set_disable(masks
[0], masks
[1]);
6663 case IP_FW_RESETLOG
: /* argument is an int, the rule number */
6666 if (sopt
->sopt_val
!= 0) {
6667 error
= soopt_to_kbuf(sopt
, &rulenum
,
6668 sizeof(int), sizeof(int));
6672 error
= ipfw_ctl_zero_entry(rulenum
,
6673 sopt
->sopt_name
== IP_FW_RESETLOG
);
6676 case IP_FW_TBL_CREATE
:
6677 error
= ipfw_table_create(sopt
);
6682 error
= ipfw_table_alt(sopt
);
6685 case IP_FW_TBL_FLUSH
:
6686 case IP_FW_TBL_DESTROY
:
6687 error
= ipfw_table_flush(sopt
);
6691 error
= ipfw_table_get(sopt
);
6694 case IP_FW_TBL_ZERO
:
6695 error
= ipfw_table_zero(sopt
);
6698 case IP_FW_TBL_EXPIRE
:
6699 error
= ipfw_table_expire(sopt
);
6703 kprintf("ipfw_ctl invalid option %d\n", sopt
->sopt_name
);
6707 ipfw_crossref_reap();
6712 ipfw_keepalive_done(struct ipfw_context
*ctx
)
6715 KASSERT(ctx
->ipfw_flags
& IPFW_FLAG_KEEPALIVE
,
6716 ("keepalive is not in progress"));
6717 ctx
->ipfw_flags
&= ~IPFW_FLAG_KEEPALIVE
;
6718 callout_reset(&ctx
->ipfw_keepalive_ch
, dyn_keepalive_period
* hz
,
6719 ipfw_keepalive
, NULL
);
6723 ipfw_keepalive_more(struct ipfw_context
*ctx
)
6725 struct netmsg_base
*nm
= &ctx
->ipfw_keepalive_more
;
6727 KASSERT(ctx
->ipfw_flags
& IPFW_FLAG_KEEPALIVE
,
6728 ("keepalive is not in progress"));
6729 KASSERT(nm
->lmsg
.ms_flags
& MSGF_DONE
,
6730 ("keepalive more did not finish"));
6731 netisr_sendmsg_oncpu(nm
);
6735 ipfw_keepalive_loop(struct ipfw_context
*ctx
, struct ipfw_state
*anchor
)
6737 struct ipfw_state
*s
;
6738 int scanned
= 0, expired
= 0, kept
= 0;
6740 KASSERT(ctx
->ipfw_flags
& IPFW_FLAG_KEEPALIVE
,
6741 ("keepalive is not in progress"));
6743 while ((s
= TAILQ_NEXT(anchor
, st_link
)) != NULL
) {
6744 uint32_t ack_rev
, ack_fwd
;
6745 struct ipfw_flow_id id
;
6748 if (scanned
++ >= ipfw_state_scan_max
) {
6749 ipfw_keepalive_more(ctx
);
6753 TAILQ_REMOVE(&ctx
->ipfw_state_list
, anchor
, st_link
);
6754 TAILQ_INSERT_AFTER(&ctx
->ipfw_state_list
, s
, anchor
, st_link
);
6758 * Don't use IPFW_STATE_SCANSKIP; need to perform keepalive
6761 if (s
->st_type
== O_ANCHOR
)
6764 if (IPFW_STATE_ISDEAD(s
)) {
6765 ipfw_state_remove(ctx
, s
);
6766 if (++expired
>= ipfw_state_expire_max
) {
6767 ipfw_keepalive_more(ctx
);
6774 * Keep alive processing
6777 if (s
->st_proto
!= IPPROTO_TCP
)
6779 if ((s
->st_state
& IPFW_STATE_TCPSTATES
) != BOTH_SYN
)
6781 if (TIME_LEQ(time_uptime
+ dyn_keepalive_interval
,
6783 continue; /* too early */
6785 ipfw_key_4tuple(&s
->st_key
, &id
.src_ip
, &id
.src_port
,
6786 &id
.dst_ip
, &id
.dst_port
);
6787 ack_rev
= s
->st_ack_rev
;
6788 ack_fwd
= s
->st_ack_fwd
;
6790 #define SEND_FWD 0x1
6791 #define SEND_REV 0x2
6793 if (IPFW_ISXLAT(s
->st_type
)) {
6794 const struct ipfw_xlat
*x
= (const struct ipfw_xlat
*)s
;
6796 if (x
->xlat_dir
== MATCH_FORWARD
)
6797 send_dir
= SEND_FWD
;
6799 send_dir
= SEND_REV
;
6801 send_dir
= SEND_FWD
| SEND_REV
;
6804 if (send_dir
& SEND_REV
)
6805 send_pkt(&id
, ack_rev
- 1, ack_fwd
, TH_SYN
);
6806 if (send_dir
& SEND_FWD
)
6807 send_pkt(&id
, ack_fwd
- 1, ack_rev
, 0);
6812 if (++kept
>= ipfw_keepalive_max
) {
6813 ipfw_keepalive_more(ctx
);
6817 TAILQ_REMOVE(&ctx
->ipfw_state_list
, anchor
, st_link
);
6818 ipfw_keepalive_done(ctx
);
6822 ipfw_keepalive_more_dispatch(netmsg_t nm
)
6824 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
6825 struct ipfw_state
*anchor
;
6827 ASSERT_NETISR_NCPUS(mycpuid
);
6828 KASSERT(ctx
->ipfw_flags
& IPFW_FLAG_KEEPALIVE
,
6829 ("keepalive is not in progress"));
6832 netisr_replymsg(&nm
->base
, 0);
6834 anchor
= &ctx
->ipfw_keepalive_anch
;
6835 if (!dyn_keepalive
|| ctx
->ipfw_state_cnt
== 0) {
6836 TAILQ_REMOVE(&ctx
->ipfw_state_list
, anchor
, st_link
);
6837 ipfw_keepalive_done(ctx
);
6840 ipfw_keepalive_loop(ctx
, anchor
);
6844 * This procedure is only used to handle keepalives. It is invoked
6845 * every dyn_keepalive_period
6848 ipfw_keepalive_dispatch(netmsg_t nm
)
6850 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
6851 struct ipfw_state
*anchor
;
6853 ASSERT_NETISR_NCPUS(mycpuid
);
6854 KASSERT((ctx
->ipfw_flags
& IPFW_FLAG_KEEPALIVE
) == 0,
6855 ("keepalive is in progress"));
6856 ctx
->ipfw_flags
|= IPFW_FLAG_KEEPALIVE
;
6860 netisr_replymsg(&nm
->base
, 0);
6863 if (!dyn_keepalive
|| ctx
->ipfw_state_cnt
== 0) {
6864 ipfw_keepalive_done(ctx
);
6868 anchor
= &ctx
->ipfw_keepalive_anch
;
6869 TAILQ_INSERT_HEAD(&ctx
->ipfw_state_list
, anchor
, st_link
);
6870 ipfw_keepalive_loop(ctx
, anchor
);
6874 * This procedure is only used to handle keepalives. It is invoked
6875 * every dyn_keepalive_period
6878 ipfw_keepalive(void *dummy __unused
)
6880 struct netmsg_base
*msg
;
6882 KKASSERT(mycpuid
< netisr_ncpus
);
6883 msg
= &ipfw_ctx
[mycpuid
]->ipfw_keepalive_nm
;
6886 if (msg
->lmsg
.ms_flags
& MSGF_DONE
)
6887 netisr_sendmsg_oncpu(msg
);
6892 ipfw_ip_input_dispatch(netmsg_t nmsg
)
6894 struct netmsg_genpkt
*nm
= (struct netmsg_genpkt
*)nmsg
;
6895 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
6896 struct mbuf
*m
= nm
->m
;
6897 struct ip_fw
*rule
= nm
->arg1
;
6899 ASSERT_NETISR_NCPUS(mycpuid
);
6900 KASSERT(rule
->cpuid
== mycpuid
,
6901 ("rule does not belong to cpu%d", mycpuid
));
6902 KASSERT(m
->m_pkthdr
.fw_flags
& IPFW_MBUF_CONTINUE
,
6903 ("mbuf does not have ipfw continue rule"));
6905 KASSERT(ctx
->ipfw_cont_rule
== NULL
,
6906 ("pending ipfw continue rule"));
6907 ctx
->ipfw_cont_rule
= rule
;
6910 /* May not be cleared, if ipfw was unload/disabled. */
6911 ctx
->ipfw_cont_rule
= NULL
;
6914 * This rule is no longer used; decrement its cross_refs,
6915 * so this rule can be deleted.
6921 ipfw_defrag_redispatch(struct mbuf
*m
, int cpuid
, struct ip_fw
*rule
)
6923 struct netmsg_genpkt
*nm
;
6925 KASSERT(cpuid
!= mycpuid
, ("continue on the same cpu%d", cpuid
));
6929 * Bump cross_refs to prevent this rule and its siblings
6930 * from being deleted, while this mbuf is inflight. The
6931 * cross_refs of the sibling rule on the target cpu will
6932 * be decremented, once this mbuf is going to be filtered
6933 * on the target cpu.
6936 m
->m_pkthdr
.fw_flags
|= IPFW_MBUF_CONTINUE
;
6938 nm
= &m
->m_hdr
.mh_genmsg
;
6939 netmsg_init(&nm
->base
, NULL
, &netisr_apanic_rport
, 0,
6940 ipfw_ip_input_dispatch
);
6942 nm
->arg1
= rule
->cross_rules
[cpuid
];
6943 netisr_sendmsg(&nm
->base
, cpuid
);
6947 ipfw_init_args(struct ip_fw_args
*args
, struct mbuf
*m
, struct ifnet
*oif
)
6954 if (m
->m_pkthdr
.fw_flags
& DUMMYNET_MBUF_TAGGED
) {
6957 /* Extract info from dummynet tag */
6958 mtag
= m_tag_find(m
, PACKET_TAG_DUMMYNET
, NULL
);
6959 KKASSERT(mtag
!= NULL
);
6960 args
->rule
= ((struct dn_pkt
*)m_tag_data(mtag
))->dn_priv
;
6961 KKASSERT(args
->rule
!= NULL
);
6963 m_tag_delete(m
, mtag
);
6964 m
->m_pkthdr
.fw_flags
&= ~DUMMYNET_MBUF_TAGGED
;
6965 } else if (m
->m_pkthdr
.fw_flags
& IPFW_MBUF_CONTINUE
) {
6966 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
6968 KKASSERT(ctx
->ipfw_cont_rule
!= NULL
);
6969 args
->rule
= ctx
->ipfw_cont_rule
;
6970 ctx
->ipfw_cont_rule
= NULL
;
6972 if (ctx
->ipfw_cont_xlat
!= NULL
) {
6973 args
->xlat
= ctx
->ipfw_cont_xlat
;
6974 ctx
->ipfw_cont_xlat
= NULL
;
6975 if (m
->m_pkthdr
.fw_flags
& IPFW_MBUF_XLATINS
) {
6976 args
->flags
|= IP_FWARG_F_XLATINS
;
6977 m
->m_pkthdr
.fw_flags
&= ~IPFW_MBUF_XLATINS
;
6979 if (m
->m_pkthdr
.fw_flags
& IPFW_MBUF_XLATFWD
) {
6980 args
->flags
|= IP_FWARG_F_XLATFWD
;
6981 m
->m_pkthdr
.fw_flags
&= ~IPFW_MBUF_XLATFWD
;
6984 KKASSERT((m
->m_pkthdr
.fw_flags
&
6985 (IPFW_MBUF_XLATINS
| IPFW_MBUF_XLATFWD
)) == 0);
6987 args
->flags
|= IP_FWARG_F_CONT
;
6988 m
->m_pkthdr
.fw_flags
&= ~IPFW_MBUF_CONTINUE
;
6997 ipfw_check_in(void *arg
, struct mbuf
**m0
, struct ifnet
*ifp
, int dir
)
6999 struct ip_fw_args args
;
7000 struct mbuf
*m
= *m0
;
7001 int tee
= 0, error
= 0, ret
;
7003 ipfw_init_args(&args
, m
, NULL
);
7005 ret
= ipfw_chk(&args
);
7008 if (ret
!= IP_FW_REDISPATCH
)
7023 case IP_FW_DUMMYNET
:
7024 /* Send packet to the appropriate pipe */
7025 m
= ipfw_dummynet_io(m
, args
.cookie
, DN_TO_IP_IN
, &args
);
7034 * Must clear bridge tag when changing
7036 m
->m_pkthdr
.fw_flags
&= ~BRIDGE_MBUF_TAGGED
;
7037 if (ip_divert_p
!= NULL
) {
7038 m
= ip_divert_p(m
, tee
, 1);
7042 /* not sure this is the right error msg */
7048 panic("unknown ipfw return value: %d", ret
);
7056 ipfw_check_out(void *arg
, struct mbuf
**m0
, struct ifnet
*ifp
, int dir
)
7058 struct ip_fw_args args
;
7059 struct mbuf
*m
= *m0
;
7060 int tee
= 0, error
= 0, ret
;
7062 ipfw_init_args(&args
, m
, ifp
);
7064 ret
= ipfw_chk(&args
);
7067 if (ret
!= IP_FW_REDISPATCH
)
7082 case IP_FW_DUMMYNET
:
7083 m
= ipfw_dummynet_io(m
, args
.cookie
, DN_TO_IP_OUT
, &args
);
7091 if (ip_divert_p
!= NULL
) {
7092 m
= ip_divert_p(m
, tee
, 0);
7096 /* not sure this is the right error msg */
7102 panic("unknown ipfw return value: %d", ret
);
7112 struct pfil_head
*pfh
;
7116 pfh
= pfil_head_get(PFIL_TYPE_AF
, AF_INET
);
7120 pfil_add_hook(ipfw_check_in
, NULL
, PFIL_IN
, pfh
);
7121 pfil_add_hook(ipfw_check_out
, NULL
, PFIL_OUT
, pfh
);
7127 struct pfil_head
*pfh
;
7131 pfh
= pfil_head_get(PFIL_TYPE_AF
, AF_INET
);
7135 pfil_remove_hook(ipfw_check_in
, NULL
, PFIL_IN
, pfh
);
7136 pfil_remove_hook(ipfw_check_out
, NULL
, PFIL_OUT
, pfh
);
7140 ipfw_sysctl_dyncnt(SYSCTL_HANDLER_ARGS
)
7144 dyn_cnt
= ipfw_state_cntcoll();
7145 dyn_cnt
+= ipfw_gd
.ipfw_trkcnt_cnt
;
7147 return (sysctl_handle_int(oidp
, &dyn_cnt
, 0, req
));
7151 ipfw_sysctl_statecnt(SYSCTL_HANDLER_ARGS
)
7155 state_cnt
= ipfw_state_cntcoll();
7156 return (sysctl_handle_int(oidp
, &state_cnt
, 0, req
));
7160 ipfw_sysctl_statemax(SYSCTL_HANDLER_ARGS
)
7162 int state_max
, error
;
7164 state_max
= ipfw_state_max
;
7165 error
= sysctl_handle_int(oidp
, &state_max
, 0, req
);
7166 if (error
|| req
->newptr
== NULL
)
7172 ipfw_state_max_set(state_max
);
7177 ipfw_sysctl_dynmax(SYSCTL_HANDLER_ARGS
)
7181 dyn_max
= ipfw_state_max
+ ipfw_track_max
;
7183 error
= sysctl_handle_int(oidp
, &dyn_max
, 0, req
);
7184 if (error
|| req
->newptr
== NULL
)
7190 ipfw_state_max_set(dyn_max
/ 2);
7191 ipfw_track_max
= dyn_max
/ 2;
7196 ipfw_sysctl_enable_dispatch(netmsg_t nmsg
)
7198 int enable
= nmsg
->lmsg
.u
.ms_result
;
7202 if (fw_enable
== enable
)
7211 netisr_replymsg(&nmsg
->base
, 0);
7215 ipfw_sysctl_enable(SYSCTL_HANDLER_ARGS
)
7217 struct netmsg_base nmsg
;
7221 error
= sysctl_handle_int(oidp
, &enable
, 0, req
);
7222 if (error
|| req
->newptr
== NULL
)
7225 netmsg_init(&nmsg
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
7226 ipfw_sysctl_enable_dispatch
);
7227 nmsg
.lmsg
.u
.ms_result
= enable
;
7229 return netisr_domsg(&nmsg
, 0);
7233 ipfw_sysctl_autoinc_step(SYSCTL_HANDLER_ARGS
)
7235 return sysctl_int_range(oidp
, arg1
, arg2
, req
,
7236 IPFW_AUTOINC_STEP_MIN
, IPFW_AUTOINC_STEP_MAX
);
7240 ipfw_sysctl_scancnt(SYSCTL_HANDLER_ARGS
)
7243 return sysctl_int_range(oidp
, arg1
, arg2
, req
, 1, INT_MAX
);
7247 ipfw_sysctl_stat(SYSCTL_HANDLER_ARGS
)
7252 for (cpu
= 0; cpu
< netisr_ncpus
; ++cpu
)
7253 stat
+= *((u_long
*)((uint8_t *)ipfw_ctx
[cpu
] + arg2
));
7255 error
= sysctl_handle_long(oidp
, &stat
, 0, req
);
7256 if (error
|| req
->newptr
== NULL
)
7259 /* Zero out this stat. */
7260 for (cpu
= 0; cpu
< netisr_ncpus
; ++cpu
)
7261 *((u_long
*)((uint8_t *)ipfw_ctx
[cpu
] + arg2
)) = 0;
7266 ipfw_ctx_init_dispatch(netmsg_t nmsg
)
7268 struct netmsg_ipfw
*fwmsg
= (struct netmsg_ipfw
*)nmsg
;
7269 struct ipfw_context
*ctx
;
7270 struct ip_fw
*def_rule
;
7272 ASSERT_NETISR_NCPUS(mycpuid
);
7274 ctx
= kmalloc(__offsetof(struct ipfw_context
,
7275 ipfw_tables
[ipfw_table_max
]), M_IPFW
, M_WAITOK
| M_ZERO
);
7277 RB_INIT(&ctx
->ipfw_state_tree
);
7278 TAILQ_INIT(&ctx
->ipfw_state_list
);
7280 RB_INIT(&ctx
->ipfw_track_tree
);
7281 TAILQ_INIT(&ctx
->ipfw_track_list
);
7283 callout_init_mp(&ctx
->ipfw_stateto_ch
);
7284 netmsg_init(&ctx
->ipfw_stateexp_nm
, NULL
, &netisr_adone_rport
,
7285 MSGF_DROPABLE
| MSGF_PRIORITY
, ipfw_state_expire_dispatch
);
7286 ctx
->ipfw_stateexp_anch
.st_type
= O_ANCHOR
;
7287 netmsg_init(&ctx
->ipfw_stateexp_more
, NULL
, &netisr_adone_rport
,
7288 MSGF_DROPABLE
, ipfw_state_expire_more_dispatch
);
7290 callout_init_mp(&ctx
->ipfw_trackto_ch
);
7291 netmsg_init(&ctx
->ipfw_trackexp_nm
, NULL
, &netisr_adone_rport
,
7292 MSGF_DROPABLE
| MSGF_PRIORITY
, ipfw_track_expire_dispatch
);
7293 netmsg_init(&ctx
->ipfw_trackexp_more
, NULL
, &netisr_adone_rport
,
7294 MSGF_DROPABLE
, ipfw_track_expire_more_dispatch
);
7296 callout_init_mp(&ctx
->ipfw_keepalive_ch
);
7297 netmsg_init(&ctx
->ipfw_keepalive_nm
, NULL
, &netisr_adone_rport
,
7298 MSGF_DROPABLE
| MSGF_PRIORITY
, ipfw_keepalive_dispatch
);
7299 ctx
->ipfw_keepalive_anch
.st_type
= O_ANCHOR
;
7300 netmsg_init(&ctx
->ipfw_keepalive_more
, NULL
, &netisr_adone_rport
,
7301 MSGF_DROPABLE
, ipfw_keepalive_more_dispatch
);
7303 callout_init_mp(&ctx
->ipfw_xlatreap_ch
);
7304 netmsg_init(&ctx
->ipfw_xlatreap_nm
, NULL
, &netisr_adone_rport
,
7305 MSGF_DROPABLE
| MSGF_PRIORITY
, ipfw_xlat_reap_dispatch
);
7306 TAILQ_INIT(&ctx
->ipfw_xlatreap
);
7308 ipfw_ctx
[mycpuid
] = ctx
;
7310 def_rule
= kmalloc(sizeof(*def_rule
), M_IPFW
, M_WAITOK
| M_ZERO
);
7312 def_rule
->act_ofs
= 0;
7313 def_rule
->rulenum
= IPFW_DEFAULT_RULE
;
7314 def_rule
->cmd_len
= 1;
7315 def_rule
->set
= IPFW_DEFAULT_SET
;
7317 def_rule
->cmd
[0].len
= 1;
7318 #ifdef IPFIREWALL_DEFAULT_TO_ACCEPT
7319 def_rule
->cmd
[0].opcode
= O_ACCEPT
;
7321 if (filters_default_to_accept
)
7322 def_rule
->cmd
[0].opcode
= O_ACCEPT
;
7324 def_rule
->cmd
[0].opcode
= O_DENY
;
7327 def_rule
->refcnt
= 1;
7328 def_rule
->cpuid
= mycpuid
;
7330 /* Install the default rule */
7331 ctx
->ipfw_default_rule
= def_rule
;
7332 ctx
->ipfw_layer3_chain
= def_rule
;
7334 /* Link rule CPU sibling */
7335 ipfw_link_sibling(fwmsg
, def_rule
);
7337 /* Statistics only need to be updated once */
7339 ipfw_inc_static_count(def_rule
);
7341 netisr_forwardmsg(&nmsg
->base
, mycpuid
+ 1);
7345 ipfw_crossref_reap_dispatch(netmsg_t nmsg
)
7350 netisr_replymsg(&nmsg
->base
, 0);
7352 ipfw_crossref_reap();
7356 ipfw_crossref_timeo(void *dummy __unused
)
7358 struct netmsg_base
*msg
= &ipfw_gd
.ipfw_crossref_nm
;
7360 KKASSERT(mycpuid
== 0);
7363 if (msg
->lmsg
.ms_flags
& MSGF_DONE
)
7364 netisr_sendmsg_oncpu(msg
);
7369 ipfw_ifaddr_dispatch(netmsg_t nmsg
)
7371 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
7372 struct ifnet
*ifp
= nmsg
->lmsg
.u
.ms_resultp
;
7375 ASSERT_NETISR_NCPUS(mycpuid
);
7377 for (f
= ctx
->ipfw_layer3_chain
; f
!= NULL
; f
= f
->next
) {
7381 if ((f
->rule_flags
& IPFW_RULE_F_DYNIFADDR
) == 0)
7384 for (l
= f
->cmd_len
, cmd
= f
->cmd
; l
> 0;
7385 l
-= cmdlen
, cmd
+= cmdlen
) {
7386 cmdlen
= F_LEN(cmd
);
7387 if (cmd
->opcode
== O_IP_SRC_IFIP
||
7388 cmd
->opcode
== O_IP_DST_IFIP
) {
7389 if (strncmp(ifp
->if_xname
,
7390 ((ipfw_insn_ifip
*)cmd
)->ifname
,
7392 cmd
->arg1
&= ~IPFW_IFIP_VALID
;
7396 netisr_forwardmsg(&nmsg
->base
, mycpuid
+ 1);
7400 ipfw_ifaddr(void *arg __unused
, struct ifnet
*ifp
,
7401 enum ifaddr_event event __unused
, struct ifaddr
*ifa __unused
)
7403 struct netmsg_base nm
;
7405 netmsg_init(&nm
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
7406 ipfw_ifaddr_dispatch
);
7407 nm
.lmsg
.u
.ms_resultp
= ifp
;
7408 netisr_domsg_global(&nm
);
7412 ipfw_init_dispatch(netmsg_t nmsg
)
7414 struct netmsg_ipfw fwmsg
;
7420 kprintf("IP firewall already loaded\n");
7425 if (ipfw_table_max
> UINT16_MAX
|| ipfw_table_max
<= 0)
7426 ipfw_table_max
= UINT16_MAX
;
7428 /* Initialize global track tree. */
7429 RB_INIT(&ipfw_gd
.ipfw_trkcnt_tree
);
7430 IPFW_TRKCNT_TOKINIT
;
7432 /* GC for freed crossref rules. */
7433 callout_init_mp(&ipfw_gd
.ipfw_crossref_ch
);
7434 netmsg_init(&ipfw_gd
.ipfw_crossref_nm
, NULL
, &netisr_adone_rport
,
7435 MSGF_PRIORITY
| MSGF_DROPABLE
, ipfw_crossref_reap_dispatch
);
7437 ipfw_state_max_set(ipfw_state_max
);
7438 ipfw_state_headroom
= 8 * netisr_ncpus
;
7440 bzero(&fwmsg
, sizeof(fwmsg
));
7441 netmsg_init(&fwmsg
.base
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
7442 ipfw_ctx_init_dispatch
);
7443 netisr_domsg_global(&fwmsg
.base
);
7445 ip_fw_chk_ptr
= ipfw_chk
;
7446 ip_fw_ctl_ptr
= ipfw_ctl
;
7447 ip_fw_dn_io_ptr
= ipfw_dummynet_io
;
7449 kprintf("ipfw2 initialized, default to %s, logging ",
7450 ipfw_ctx
[mycpuid
]->ipfw_default_rule
->cmd
[0].opcode
==
7451 O_ACCEPT
? "accept" : "deny");
7453 #ifdef IPFIREWALL_VERBOSE
7456 #ifdef IPFIREWALL_VERBOSE_LIMIT
7457 verbose_limit
= IPFIREWALL_VERBOSE_LIMIT
;
7459 if (fw_verbose
== 0) {
7460 kprintf("disabled\n");
7461 } else if (verbose_limit
== 0) {
7462 kprintf("unlimited\n");
7464 kprintf("limited to %d packets/entry by default\n",
7469 for (cpu
= 0; cpu
< netisr_ncpus
; ++cpu
) {
7470 callout_reset_bycpu(&ipfw_ctx
[cpu
]->ipfw_stateto_ch
, hz
,
7471 ipfw_state_expire_ipifunc
, NULL
, cpu
);
7472 callout_reset_bycpu(&ipfw_ctx
[cpu
]->ipfw_trackto_ch
, hz
,
7473 ipfw_track_expire_ipifunc
, NULL
, cpu
);
7474 callout_reset_bycpu(&ipfw_ctx
[cpu
]->ipfw_keepalive_ch
, hz
,
7475 ipfw_keepalive
, NULL
, cpu
);
7481 ipfw_ifaddr_event
= EVENTHANDLER_REGISTER(ifaddr_event
, ipfw_ifaddr
,
7482 NULL
, EVENTHANDLER_PRI_ANY
);
7483 if (ipfw_ifaddr_event
== NULL
)
7484 kprintf("ipfw: ifaddr_event register failed\n");
7487 netisr_replymsg(&nmsg
->base
, error
);
7493 struct netmsg_base smsg
;
7495 netmsg_init(&smsg
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
7496 ipfw_init_dispatch
);
7497 return netisr_domsg(&smsg
, 0);
7503 ipfw_ctx_fini_dispatch(netmsg_t nmsg
)
7505 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
7507 ASSERT_NETISR_NCPUS(mycpuid
);
7509 callout_cancel(&ctx
->ipfw_stateto_ch
);
7510 callout_cancel(&ctx
->ipfw_trackto_ch
);
7511 callout_cancel(&ctx
->ipfw_keepalive_ch
);
7512 callout_cancel(&ctx
->ipfw_xlatreap_ch
);
7515 netisr_dropmsg(&ctx
->ipfw_stateexp_more
);
7516 netisr_dropmsg(&ctx
->ipfw_stateexp_nm
);
7517 netisr_dropmsg(&ctx
->ipfw_trackexp_more
);
7518 netisr_dropmsg(&ctx
->ipfw_trackexp_nm
);
7519 netisr_dropmsg(&ctx
->ipfw_keepalive_more
);
7520 netisr_dropmsg(&ctx
->ipfw_keepalive_nm
);
7521 netisr_dropmsg(&ctx
->ipfw_xlatreap_nm
);
7524 ipfw_table_flushall_oncpu(ctx
, 1);
7526 netisr_forwardmsg(&nmsg
->base
, mycpuid
+ 1);
7530 ipfw_fini_dispatch(netmsg_t nmsg
)
7532 struct netmsg_base nm
;
7537 ipfw_crossref_reap();
7539 if (ipfw_gd
.ipfw_refcnt
!= 0) {
7547 /* Synchronize any inflight state/track expire IPIs. */
7548 lwkt_synchronize_ipiqs("ipfwfini");
7550 netmsg_init(&nm
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
7551 ipfw_ctx_fini_dispatch
);
7552 netisr_domsg_global(&nm
);
7554 callout_cancel(&ipfw_gd
.ipfw_crossref_ch
);
7556 netisr_dropmsg(&ipfw_gd
.ipfw_crossref_nm
);
7559 if (ipfw_ifaddr_event
!= NULL
)
7560 EVENTHANDLER_DEREGISTER(ifaddr_event
, ipfw_ifaddr_event
);
7562 ip_fw_chk_ptr
= NULL
;
7563 ip_fw_ctl_ptr
= NULL
;
7564 ip_fw_dn_io_ptr
= NULL
;
7565 ipfw_flush(1 /* kill default rule */);
7567 /* Free pre-cpu context */
7568 for (cpu
= 0; cpu
< netisr_ncpus
; ++cpu
)
7569 kfree(ipfw_ctx
[cpu
], M_IPFW
);
7571 kprintf("IP firewall unloaded\n");
7573 netisr_replymsg(&nmsg
->base
, error
);
7577 ipfw_fflush_dispatch(netmsg_t nmsg
)
7580 ipfw_flush(0 /* keep default rule */);
7581 ipfw_crossref_reap();
7582 netisr_replymsg(&nmsg
->base
, 0);
7588 struct netmsg_base smsg
;
7592 netmsg_init(&smsg
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
7593 ipfw_fflush_dispatch
);
7594 netisr_domsg(&smsg
, 0);
7596 if (ipfw_gd
.ipfw_refcnt
== 0)
7598 kprintf("ipfw: flush pending %d\n", ++i
);
7599 tsleep(&smsg
, 0, "ipfwff", (3 * hz
) / 2);
7602 netmsg_init(&smsg
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
7603 ipfw_fini_dispatch
);
7604 return netisr_domsg(&smsg
, 0);
7607 #endif /* KLD_MODULE */
7610 ipfw_modevent(module_t mod
, int type
, void *unused
)
7621 kprintf("ipfw statically compiled, cannot unload\n");
7633 static moduledata_t ipfwmod
= {
7638 DECLARE_MODULE(ipfw
, ipfwmod
, SI_SUB_PROTO_END
, SI_ORDER_ANY
);
7639 MODULE_VERSION(ipfw
, 1);