2 * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * $FreeBSD: src/sys/netinet/ip_fw2.c,v 1.6.2.12 2003/04/08 10:42:32 maxim Exp $
29 * Implement IP packet firewall (new version)
35 #error IPFIREWALL requires INET.
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/malloc.h>
42 #include <sys/kernel.h>
44 #include <sys/socket.h>
45 #include <sys/socketvar.h>
46 #include <sys/sysctl.h>
47 #include <sys/syslog.h>
48 #include <sys/ucred.h>
49 #include <sys/in_cksum.h>
50 #include <sys/limits.h>
55 #include <net/route.h>
57 #include <net/dummynet/ip_dummynet.h>
59 #include <sys/thread2.h>
60 #include <net/netmsg2.h>
62 #include <netinet/in.h>
63 #include <netinet/in_systm.h>
64 #include <netinet/in_var.h>
65 #include <netinet/in_pcb.h>
66 #include <netinet/ip.h>
67 #include <netinet/ip_var.h>
68 #include <netinet/ip_icmp.h>
69 #include <netinet/tcp.h>
70 #include <netinet/tcp_seq.h>
71 #include <netinet/tcp_timer.h>
72 #include <netinet/tcp_var.h>
73 #include <netinet/tcpip.h>
74 #include <netinet/udp.h>
75 #include <netinet/udp_var.h>
76 #include <netinet/ip_divert.h>
77 #include <netinet/if_ether.h> /* XXX for ETHERTYPE_IP */
79 #include <net/ipfw/ip_fw2.h>
81 #ifdef IPFIREWALL_DEBUG
82 #define DPRINTF(fmt, ...) \
85 kprintf(fmt, __VA_ARGS__); \
88 #define DPRINTF(fmt, ...) ((void)0)
92 * Description about per-CPU rule duplication:
94 * Module loading/unloading and all ioctl operations are serialized
95 * by netisr0, so we don't have any ordering or locking problems.
97 * Following graph shows how operation on per-CPU rule list is
98 * performed [2 CPU case]:
102 * netisr0 <------------------------------------+
108 * forwardmsg---------->netisr1 |
113 * replymsg--------------+
117 * Rule structure [2 CPU case]
121 * layer3_chain layer3_chain
124 * +-------+ sibling +-------+ sibling
125 * | rule1 |--------->| rule1 |--------->NULL
126 * +-------+ +-------+
130 * +-------+ sibling +-------+ sibling
131 * | rule2 |--------->| rule2 |--------->NULL
132 * +-------+ +-------+
135 * 1) Ease statistics calculation during IP_FW_GET. We only need to
136 * iterate layer3_chain in netisr0; the current rule's duplication
137 * to the other CPUs could safely be read-only accessed through
139 * 2) Accelerate rule insertion and deletion, e.g. rule insertion:
140 * a) In netisr0 rule3 is determined to be inserted between rule1
141 * and rule2. To make this decision we need to iterate the
142 * layer3_chain in netisr0. The netmsg, which is used to insert
143 * the rule, will contain rule1 in netisr0 as prev_rule and rule2
144 * in netisr0 as next_rule.
145 * b) After the insertion in netisr0 is done, we will move on to
146 * netisr1. But instead of relocating the rule3's position in
147 * netisr1 by iterating the layer3_chain in netisr1, we set the
148 * netmsg's prev_rule to rule1->sibling and next_rule to
149 * rule2->sibling before the netmsg is forwarded to netisr1 from
154 * Description of states and tracks.
156 * Both states and tracks are stored in per-cpu RB trees instead of
157 * per-cpu hash tables to avoid the worst case hash degeneration.
159 * The lifetimes of states and tracks are regulated by dyn_*_lifetime,
160 * measured in seconds and depending on the flags.
162 * When a packet is received, its address fields are first masked with
163 * the mask defined for the rule, then matched against the entries in
164 * the per-cpu state RB tree. States are generated by 'keep-state'
165 * and 'limit' options.
167 * The max number of states is ipfw_state_max. When we reach the
168 * maximum number of states we do not create anymore. This is done to
169 * avoid consuming too much memory, but also too much time when
170 * searching on each packet.
172 * Each state holds a pointer to the parent ipfw rule of the current
173 * CPU so we know what action to perform. States are removed when the
174 * parent rule is deleted. XXX we should make them survive.
176 * There are some limitations with states -- we do not obey the
177 * 'randomized match', and we do not do multiple passes through the
178 * firewall. XXX check the latter!!!
180 * States grow independently on each CPU, e.g. 2 CPU case:
183 * ................... ...................
184 * : state RB tree : : state RB tree :
186 * : state1 state2 : : state3 :
188 * :.....|....|......: :........|........:
193 * +-------+ +-------+
194 * | rule1 | | rule1 |
195 * +-------+ +-------+
197 * Tracks are used to enforce limits on the number of sessions. Tracks
198 * are generated by 'limit' option.
200 * The max number of tracks is ipfw_track_max. When we reach the
201 * maximum number of tracks we do not create anymore. This is done to
202 * avoid consuming too much memory.
204 * Tracks are organized into two layers, track counter RB tree is
205 * shared between CPUs, track RB tree is per-cpu. States generated by
206 * 'limit' option are linked to the track in addition to the per-cpu
207 * state RB tree; mainly to ease expiration. e.g. 2 CPU case:
209 * ..............................
210 * : track counter RB tree :
215 * : +--->counter<----+ :
217 * : | +-----------+ | :
218 * :......|................|....:
221 * ................. |t_count | .................
222 * : track RB tree : | | : track RB tree :
224 * : +-->track1-------+ +--------track2 :
227 * :.|.....|.......: :...............:
228 * | +----------------+
229 * | .................... |
230 * | : state RB tree : |st_track
232 * +---state1 state2---+
234 * :.....|.......|....:
243 #define IPFW_AUTOINC_STEP_MIN 1
244 #define IPFW_AUTOINC_STEP_MAX 1000
245 #define IPFW_AUTOINC_STEP_DEF 100
247 #define IPFW_TABLE_MAX_DEF 64
249 #define IPFW_DEFAULT_RULE 65535 /* rulenum for the default rule */
250 #define IPFW_DEFAULT_SET 31 /* set number for the default rule */
252 #define MATCH_REVERSE 0
253 #define MATCH_FORWARD 1
255 #define MATCH_UNKNOWN 3
257 #define TIME_LEQ(a, b) ((a) - (b) <= 0)
259 #define IPFW_STATE_TCPFLAGS (TH_SYN | TH_FIN | TH_RST)
260 #define IPFW_STATE_TCPSTATES (IPFW_STATE_TCPFLAGS | \
261 (IPFW_STATE_TCPFLAGS << 8))
263 #define BOTH_SYN (TH_SYN | (TH_SYN << 8))
264 #define BOTH_FIN (TH_FIN | (TH_FIN << 8))
265 #define BOTH_RST (TH_RST | (TH_RST << 8))
266 /* TH_ACK here means FIN was ACKed. */
267 #define BOTH_FINACK (TH_ACK | (TH_ACK << 8))
269 #define IPFW_STATE_TCPCLOSED(s) ((s)->st_proto == IPPROTO_TCP && \
270 (((s)->st_state & BOTH_RST) || \
271 ((s)->st_state & BOTH_FINACK) == BOTH_FINACK))
273 #define O_ANCHOR O_NOP
275 #define IPFW_ISXLAT(type) ((type) == O_REDIRECT)
276 #define IPFW_XLAT_INVALID(s) (IPFW_ISXLAT((s)->st_type) && \
277 ((struct ipfw_xlat *)(s))->xlat_invalid)
279 #define IPFW_MBUF_XLATINS FW_MBUF_PRIVATE1
280 #define IPFW_MBUF_XLATFWD FW_MBUF_PRIVATE2
282 #define IPFW_XLATE_INSERT 0x0001
283 #define IPFW_XLATE_FORWARD 0x0002
284 #define IPFW_XLATE_OUTPUT 0x0004
287 struct netmsg_base base
;
288 const struct ipfw_ioc_rule
*ioc_rule
;
289 struct ip_fw
*next_rule
;
290 struct ip_fw
*prev_rule
;
291 struct ip_fw
*sibling
;
293 struct ip_fw
**cross_rules
;
297 struct netmsg_base base
;
298 struct ip_fw
*start_rule
;
299 struct ip_fw
*prev_rule
;
306 struct netmsg_base base
;
307 struct ip_fw
*start_rule
;
312 struct netmsg_cpstate
{
313 struct netmsg_base base
;
314 struct ipfw_ioc_state
*ioc_state
;
319 struct netmsg_tblent
{
320 struct netmsg_base base
;
321 struct sockaddr
*key
;
322 struct sockaddr
*netmask
;
323 struct ipfw_tblent
*sibling
;
327 struct netmsg_tblflush
{
328 struct netmsg_base base
;
333 struct netmsg_tblexp
{
334 struct netmsg_base base
;
339 struct radix_node_head
*rnh
;
342 struct ipfw_table_cp
{
343 struct ipfw_ioc_tblent
*te
;
350 * offset The offset of a fragment. offset != 0 means that
351 * we have a fragment at this offset of an IPv4 packet.
352 * offset == 0 means that (if this is an IPv4 packet)
353 * this is the first or only fragment.
358 * Local copies of addresses. They are only valid if we have
361 * proto The protocol. Set to 0 for non-ip packets,
362 * or to the protocol read from the packet otherwise.
363 * proto != 0 means that we have an IPv4 packet.
365 * src_port, dst_port port numbers, in HOST format. Only
366 * valid for TCP and UDP packets.
368 * src_ip, dst_ip ip addresses, in NETWORK format.
369 * Only valid for IPv4 packets.
372 uint16_t src_port
; /* NOTE: host format */
373 uint16_t dst_port
; /* NOTE: host format */
374 struct in_addr src_ip
; /* NOTE: network format */
375 struct in_addr dst_ip
; /* NOTE: network format */
376 uint16_t ip_len
; /* NOTE: host format */
381 uint32_t addr1
; /* host byte order */
382 uint32_t addr2
; /* host byte order */
386 uint16_t port1
; /* host byte order */
387 uint16_t port2
; /* host byte order */
392 struct ipfw_addrs addrs
;
396 struct ipfw_ports ports
;
400 uint8_t swap
; /* IPFW_KEY_SWAP_ */
404 #define IPFW_KEY_SWAP_ADDRS 0x1
405 #define IPFW_KEY_SWAP_PORTS 0x2
406 #define IPFW_KEY_SWAP_ALL (IPFW_KEY_SWAP_ADDRS | IPFW_KEY_SWAP_PORTS)
409 RB_ENTRY(ipfw_trkcnt
) tc_rblink
;
410 struct ipfw_key tc_key
;
414 time_t tc_expire
; /* userland get-only */
415 uint16_t tc_rulenum
; /* userland get-only */
418 #define tc_addrs tc_key.addr_u.value
419 #define tc_ports tc_key.port_u.value
420 #define tc_proto tc_key.proto
421 #define tc_saddr tc_key.addr_u.addrs.addr1
422 #define tc_daddr tc_key.addr_u.addrs.addr2
423 #define tc_sport tc_key.port_u.ports.port1
424 #define tc_dport tc_key.port_u.ports.port2
426 RB_HEAD(ipfw_trkcnt_tree
, ipfw_trkcnt
);
431 RB_ENTRY(ipfw_track
) t_rblink
;
432 struct ipfw_key t_key
;
433 struct ip_fw
*t_rule
;
435 LIST_HEAD(, ipfw_state
) t_state_list
;
437 volatile int *t_count
;
438 struct ipfw_trkcnt
*t_trkcnt
;
439 TAILQ_ENTRY(ipfw_track
) t_link
;
442 #define t_addrs t_key.addr_u.value
443 #define t_ports t_key.port_u.value
444 #define t_proto t_key.proto
445 #define t_saddr t_key.addr_u.addrs.addr1
446 #define t_daddr t_key.addr_u.addrs.addr2
447 #define t_sport t_key.port_u.ports.port1
448 #define t_dport t_key.port_u.ports.port2
450 RB_HEAD(ipfw_track_tree
, ipfw_track
);
451 TAILQ_HEAD(ipfw_track_list
, ipfw_track
);
454 RB_ENTRY(ipfw_state
) st_rblink
;
455 struct ipfw_key st_key
;
457 time_t st_expire
; /* expire time */
458 struct ip_fw
*st_rule
;
460 uint64_t st_pcnt
; /* packets */
461 uint64_t st_bcnt
; /* bytes */
465 * State of this rule, typically a combination of TCP flags.
467 * st_ack_fwd/st_ack_rev:
468 * Most recent ACKs in forward and reverse direction. They
469 * are used to generate keepalives.
472 uint32_t st_ack_fwd
; /* host byte order */
473 uint32_t st_seq_fwd
; /* host byte order */
474 uint32_t st_ack_rev
; /* host byte order */
475 uint32_t st_seq_rev
; /* host byte order */
477 uint16_t st_flags
; /* IPFW_STATE_F_ */
478 uint16_t st_type
; /* KEEP_STATE/LIMIT/RDR */
479 struct ipfw_track
*st_track
;
481 LIST_ENTRY(ipfw_state
) st_trklink
;
482 TAILQ_ENTRY(ipfw_state
) st_link
;
485 #define st_addrs st_key.addr_u.value
486 #define st_ports st_key.port_u.value
487 #define st_proto st_key.proto
488 #define st_swap st_key.swap
490 #define IPFW_STATE_F_ACKFWD 0x0001
491 #define IPFW_STATE_F_SEQFWD 0x0002
492 #define IPFW_STATE_F_ACKREV 0x0004
493 #define IPFW_STATE_F_SEQREV 0x0008
494 #define IPFW_STATE_F_XLATSRC 0x0010
495 #define IPFW_STATE_F_XLATSLAVE 0x0020
496 #define IPFW_STATE_F_LINKED 0x0040
498 #define IPFW_STATE_SCANSKIP(s) ((s)->st_type == O_ANCHOR || \
499 ((s)->st_flags & IPFW_STATE_F_XLATSLAVE))
501 /* Expired or being deleted. */
502 #define IPFW_STATE_ISDEAD(s) (TIME_LEQ((s)->st_expire, time_uptime) || \
503 IPFW_XLAT_INVALID((s)))
505 TAILQ_HEAD(ipfw_state_list
, ipfw_state
);
506 RB_HEAD(ipfw_state_tree
, ipfw_state
);
509 struct ipfw_state xlat_st
; /* MUST be the first field */
510 uint32_t xlat_addr
; /* network byte order */
511 uint16_t xlat_port
; /* network byte order */
512 uint16_t xlat_dir
; /* MATCH_ */
513 struct ifnet
*xlat_ifp
; /* matching ifnet */
514 struct ipfw_xlat
*xlat_pair
; /* paired state */
515 int xlat_pcpu
; /* paired cpu */
516 volatile int xlat_invalid
; /* invalid, but not dtor yet */
517 volatile uint64_t xlat_crefs
; /* cross references */
518 struct netmsg_base xlat_freenm
; /* for remote free */
521 #define xlat_type xlat_st.st_type
522 #define xlat_flags xlat_st.st_flags
523 #define xlat_rule xlat_st.st_rule
524 #define xlat_bcnt xlat_st.st_bcnt
525 #define xlat_pcnt xlat_st.st_pcnt
528 struct radix_node te_nodes
[2];
529 struct sockaddr_in te_key
;
532 struct ipfw_tblent
*te_sibling
;
533 volatile int te_expired
;
536 struct ipfw_context
{
537 struct ip_fw
*ipfw_layer3_chain
; /* rules for layer3 */
538 struct ip_fw
*ipfw_default_rule
; /* default rule */
539 uint64_t ipfw_norule_counter
; /* ipfw_log(NULL) stat*/
542 * ipfw_set_disable contains one bit per set value (0..31).
543 * If the bit is set, all rules with the corresponding set
544 * are disabled. Set IPDW_DEFAULT_SET is reserved for the
545 * default rule and CANNOT be disabled.
547 uint32_t ipfw_set_disable
;
549 uint8_t ipfw_flags
; /* IPFW_FLAG_ */
551 struct ip_fw
*ipfw_cont_rule
;
552 struct ipfw_xlat
*ipfw_cont_xlat
;
554 struct ipfw_state_tree ipfw_state_tree
;
555 struct ipfw_state_list ipfw_state_list
;
556 int ipfw_state_loosecnt
;
560 struct ipfw_state state
;
561 struct ipfw_track track
;
562 struct ipfw_trkcnt trkcnt
;
565 struct ipfw_track_tree ipfw_track_tree
;
566 struct ipfw_track_list ipfw_track_list
;
567 struct ipfw_trkcnt
*ipfw_trkcnt_spare
;
569 struct callout ipfw_stateto_ch
;
570 time_t ipfw_state_lastexp
;
571 struct netmsg_base ipfw_stateexp_nm
;
572 struct netmsg_base ipfw_stateexp_more
;
573 struct ipfw_state ipfw_stateexp_anch
;
575 struct callout ipfw_trackto_ch
;
576 time_t ipfw_track_lastexp
;
577 struct netmsg_base ipfw_trackexp_nm
;
578 struct netmsg_base ipfw_trackexp_more
;
579 struct ipfw_track ipfw_trackexp_anch
;
581 struct callout ipfw_keepalive_ch
;
582 struct netmsg_base ipfw_keepalive_nm
;
583 struct netmsg_base ipfw_keepalive_more
;
584 struct ipfw_state ipfw_keepalive_anch
;
586 struct callout ipfw_xlatreap_ch
;
587 struct netmsg_base ipfw_xlatreap_nm
;
588 struct ipfw_state_list ipfw_xlatreap
;
593 u_long ipfw_sts_reap
;
594 u_long ipfw_sts_reapfailed
;
595 u_long ipfw_sts_overflow
;
596 u_long ipfw_sts_nomem
;
597 u_long ipfw_sts_tcprecycled
;
599 u_long ipfw_tks_nomem
;
600 u_long ipfw_tks_reap
;
601 u_long ipfw_tks_reapfailed
;
602 u_long ipfw_tks_overflow
;
603 u_long ipfw_tks_cntnomem
;
606 u_long ipfw_defraged
;
607 u_long ipfw_defrag_remote
;
610 u_long ipfw_xlate_split
;
611 u_long ipfw_xlate_conflicts
;
612 u_long ipfw_xlate_cresolved
;
615 struct radix_node_head
*ipfw_tables
[];
618 #define IPFW_FLAG_KEEPALIVE 0x01
619 #define IPFW_FLAG_STATEEXP 0x02
620 #define IPFW_FLAG_TRACKEXP 0x04
621 #define IPFW_FLAG_STATEREAP 0x08
622 #define IPFW_FLAG_TRACKREAP 0x10
624 #define ipfw_state_tmpkey ipfw_tmpkey.state
625 #define ipfw_track_tmpkey ipfw_tmpkey.track
626 #define ipfw_trkcnt_tmpkey ipfw_tmpkey.trkcnt
629 int ipfw_state_loosecnt
; /* cache aligned */
630 time_t ipfw_state_globexp __cachealign
;
632 struct lwkt_token ipfw_trkcnt_token __cachealign
;
633 struct ipfw_trkcnt_tree ipfw_trkcnt_tree
;
635 time_t ipfw_track_globexp
;
637 /* Accessed in netisr0. */
638 struct ip_fw
*ipfw_crossref_free __cachealign
;
639 struct callout ipfw_crossref_ch
;
640 struct netmsg_base ipfw_crossref_nm
;
644 * Module can not be unloaded, if there are references to
645 * certains rules of ipfw(4), e.g. dummynet(4)
647 int ipfw_refcnt __cachealign
;
651 static struct ipfw_context
*ipfw_ctx
[MAXCPU
];
653 MALLOC_DEFINE(M_IPFW
, "IpFw/IpAcct", "IpFw/IpAcct chain's");
656 * Following two global variables are accessed and updated only
659 static uint32_t static_count
; /* # of static rules */
660 static uint32_t static_ioc_len
; /* bytes of static rules */
663 * If 1, then ipfw static rules are being flushed,
664 * ipfw_chk() will skip to the default rule.
666 static int ipfw_flushing
;
668 static int fw_verbose
;
669 static int verbose_limit
;
672 static int autoinc_step
= IPFW_AUTOINC_STEP_DEF
;
674 static int ipfw_table_max
= IPFW_TABLE_MAX_DEF
;
676 static int ipfw_sysctl_enable(SYSCTL_HANDLER_ARGS
);
677 static int ipfw_sysctl_autoinc_step(SYSCTL_HANDLER_ARGS
);
679 TUNABLE_INT("net.inet.ip.fw.table_max", &ipfw_table_max
);
681 SYSCTL_NODE(_net_inet_ip
, OID_AUTO
, fw
, CTLFLAG_RW
, 0, "Firewall");
682 SYSCTL_NODE(_net_inet_ip_fw
, OID_AUTO
, stats
, CTLFLAG_RW
, 0,
683 "Firewall statistics");
685 SYSCTL_PROC(_net_inet_ip_fw
, OID_AUTO
, enable
, CTLTYPE_INT
| CTLFLAG_RW
,
686 &fw_enable
, 0, ipfw_sysctl_enable
, "I", "Enable ipfw");
687 SYSCTL_PROC(_net_inet_ip_fw
, OID_AUTO
, autoinc_step
, CTLTYPE_INT
| CTLFLAG_RW
,
688 &autoinc_step
, 0, ipfw_sysctl_autoinc_step
, "I",
689 "Rule number autincrement step");
690 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
,one_pass
,CTLFLAG_RW
,
692 "Only do a single pass through ipfw when using dummynet(4)");
693 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, debug
, CTLFLAG_RW
,
694 &fw_debug
, 0, "Enable printing of debug ip_fw statements");
695 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, verbose
, CTLFLAG_RW
,
696 &fw_verbose
, 0, "Log matches to ipfw rules");
697 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, verbose_limit
, CTLFLAG_RW
,
698 &verbose_limit
, 0, "Set upper limit of matches of ipfw rules logged");
699 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, table_max
, CTLFLAG_RD
,
700 &ipfw_table_max
, 0, "Max # of tables");
702 static int ipfw_sysctl_dyncnt(SYSCTL_HANDLER_ARGS
);
703 static int ipfw_sysctl_dynmax(SYSCTL_HANDLER_ARGS
);
704 static int ipfw_sysctl_statecnt(SYSCTL_HANDLER_ARGS
);
705 static int ipfw_sysctl_statemax(SYSCTL_HANDLER_ARGS
);
706 static int ipfw_sysctl_scancnt(SYSCTL_HANDLER_ARGS
);
707 static int ipfw_sysctl_stat(SYSCTL_HANDLER_ARGS
);
710 * Timeouts for various events in handing states.
714 * 2 == 1~2 second(s).
716 * We use 2 seconds for FIN lifetime, so that the states will not be
717 * ripped prematurely.
719 static uint32_t dyn_ack_lifetime
= 300;
720 static uint32_t dyn_syn_lifetime
= 20;
721 static uint32_t dyn_finwait_lifetime
= 20;
722 static uint32_t dyn_fin_lifetime
= 2;
723 static uint32_t dyn_rst_lifetime
= 2;
724 static uint32_t dyn_udp_lifetime
= 10;
725 static uint32_t dyn_short_lifetime
= 5; /* used by tracks too */
728 * Keepalives are sent if dyn_keepalive is set. They are sent every
729 * dyn_keepalive_period seconds, in the last dyn_keepalive_interval
730 * seconds of lifetime of a rule.
732 static uint32_t dyn_keepalive_interval
= 20;
733 static uint32_t dyn_keepalive_period
= 5;
734 static uint32_t dyn_keepalive
= 1; /* do send keepalives */
736 static struct ipfw_global ipfw_gd
;
737 static int ipfw_state_loosecnt_updthr
;
738 static int ipfw_state_max
= 4096; /* max # of states */
739 static int ipfw_track_max
= 4096; /* max # of tracks */
741 static int ipfw_state_headroom
; /* setup at module load time */
742 static int ipfw_state_reap_min
= 8;
743 static int ipfw_state_expire_max
= 32;
744 static int ipfw_state_scan_max
= 256;
745 static int ipfw_keepalive_max
= 8;
746 static int ipfw_track_reap_max
= 4;
747 static int ipfw_track_expire_max
= 16;
748 static int ipfw_track_scan_max
= 128;
750 static eventhandler_tag ipfw_ifaddr_event
;
753 SYSCTL_PROC(_net_inet_ip_fw
, OID_AUTO
, dyn_count
,
754 CTLTYPE_INT
| CTLFLAG_RD
, NULL
, 0, ipfw_sysctl_dyncnt
, "I",
755 "Number of states and tracks");
756 SYSCTL_PROC(_net_inet_ip_fw
, OID_AUTO
, dyn_max
,
757 CTLTYPE_INT
| CTLFLAG_RW
, NULL
, 0, ipfw_sysctl_dynmax
, "I",
758 "Max number of states and tracks");
760 SYSCTL_PROC(_net_inet_ip_fw
, OID_AUTO
, state_cnt
,
761 CTLTYPE_INT
| CTLFLAG_RD
, NULL
, 0, ipfw_sysctl_statecnt
, "I",
763 SYSCTL_PROC(_net_inet_ip_fw
, OID_AUTO
, state_max
,
764 CTLTYPE_INT
| CTLFLAG_RW
, NULL
, 0, ipfw_sysctl_statemax
, "I",
765 "Max number of states");
766 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, state_headroom
, CTLFLAG_RW
,
767 &ipfw_state_headroom
, 0, "headroom for state reap");
768 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, track_cnt
, CTLFLAG_RD
,
769 &ipfw_gd
.ipfw_trkcnt_cnt
, 0, "Number of tracks");
770 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, track_max
, CTLFLAG_RW
,
771 &ipfw_track_max
, 0, "Max number of tracks");
772 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, static_count
, CTLFLAG_RD
,
773 &static_count
, 0, "Number of static rules");
774 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, dyn_ack_lifetime
, CTLFLAG_RW
,
775 &dyn_ack_lifetime
, 0, "Lifetime of dyn. rules for acks");
776 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, dyn_syn_lifetime
, CTLFLAG_RW
,
777 &dyn_syn_lifetime
, 0, "Lifetime of dyn. rules for syn");
778 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, dyn_fin_lifetime
, CTLFLAG_RW
,
779 &dyn_fin_lifetime
, 0, "Lifetime of dyn. rules for fin");
780 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, dyn_finwait_lifetime
, CTLFLAG_RW
,
781 &dyn_finwait_lifetime
, 0, "Lifetime of dyn. rules for fin wait");
782 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, dyn_rst_lifetime
, CTLFLAG_RW
,
783 &dyn_rst_lifetime
, 0, "Lifetime of dyn. rules for rst");
784 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, dyn_udp_lifetime
, CTLFLAG_RW
,
785 &dyn_udp_lifetime
, 0, "Lifetime of dyn. rules for UDP");
786 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, dyn_short_lifetime
, CTLFLAG_RW
,
787 &dyn_short_lifetime
, 0, "Lifetime of dyn. rules for other situations");
788 SYSCTL_INT(_net_inet_ip_fw
, OID_AUTO
, dyn_keepalive
, CTLFLAG_RW
,
789 &dyn_keepalive
, 0, "Enable keepalives for dyn. rules");
790 SYSCTL_PROC(_net_inet_ip_fw
, OID_AUTO
, state_scan_max
,
791 CTLTYPE_INT
| CTLFLAG_RW
, &ipfw_state_scan_max
, 0, ipfw_sysctl_scancnt
,
792 "I", "# of states to scan for each expire iteration");
793 SYSCTL_PROC(_net_inet_ip_fw
, OID_AUTO
, state_expire_max
,
794 CTLTYPE_INT
| CTLFLAG_RW
, &ipfw_state_expire_max
, 0, ipfw_sysctl_scancnt
,
795 "I", "# of states to expire for each expire iteration");
796 SYSCTL_PROC(_net_inet_ip_fw
, OID_AUTO
, keepalive_max
,
797 CTLTYPE_INT
| CTLFLAG_RW
, &ipfw_keepalive_max
, 0, ipfw_sysctl_scancnt
,
798 "I", "# of states to expire for each expire iteration");
799 SYSCTL_PROC(_net_inet_ip_fw
, OID_AUTO
, state_reap_min
,
800 CTLTYPE_INT
| CTLFLAG_RW
, &ipfw_state_reap_min
, 0, ipfw_sysctl_scancnt
,
801 "I", "# of states to reap for state shortage");
802 SYSCTL_PROC(_net_inet_ip_fw
, OID_AUTO
, track_scan_max
,
803 CTLTYPE_INT
| CTLFLAG_RW
, &ipfw_track_scan_max
, 0, ipfw_sysctl_scancnt
,
804 "I", "# of tracks to scan for each expire iteration");
805 SYSCTL_PROC(_net_inet_ip_fw
, OID_AUTO
, track_expire_max
,
806 CTLTYPE_INT
| CTLFLAG_RW
, &ipfw_track_expire_max
, 0, ipfw_sysctl_scancnt
,
807 "I", "# of tracks to expire for each expire iteration");
808 SYSCTL_PROC(_net_inet_ip_fw
, OID_AUTO
, track_reap_max
,
809 CTLTYPE_INT
| CTLFLAG_RW
, &ipfw_track_reap_max
, 0, ipfw_sysctl_scancnt
,
810 "I", "# of tracks to reap for track shortage");
812 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, state_reap
,
813 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
814 __offsetof(struct ipfw_context
, ipfw_sts_reap
), ipfw_sysctl_stat
,
815 "LU", "# of state reaps due to states shortage");
816 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, state_reapfailed
,
817 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
818 __offsetof(struct ipfw_context
, ipfw_sts_reapfailed
), ipfw_sysctl_stat
,
819 "LU", "# of state reap failure");
820 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, state_overflow
,
821 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
822 __offsetof(struct ipfw_context
, ipfw_sts_overflow
), ipfw_sysctl_stat
,
823 "LU", "# of state overflow");
824 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, state_nomem
,
825 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
826 __offsetof(struct ipfw_context
, ipfw_sts_nomem
), ipfw_sysctl_stat
,
827 "LU", "# of state allocation failure");
828 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, state_tcprecycled
,
829 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
830 __offsetof(struct ipfw_context
, ipfw_sts_tcprecycled
), ipfw_sysctl_stat
,
831 "LU", "# of state deleted due to fast TCP port recycling");
833 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, track_nomem
,
834 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
835 __offsetof(struct ipfw_context
, ipfw_tks_nomem
), ipfw_sysctl_stat
,
836 "LU", "# of track allocation failure");
837 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, track_reap
,
838 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
839 __offsetof(struct ipfw_context
, ipfw_tks_reap
), ipfw_sysctl_stat
,
840 "LU", "# of track reap due to tracks shortage");
841 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, track_reapfailed
,
842 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
843 __offsetof(struct ipfw_context
, ipfw_tks_reapfailed
), ipfw_sysctl_stat
,
844 "LU", "# of track reap failure");
845 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, track_overflow
,
846 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
847 __offsetof(struct ipfw_context
, ipfw_tks_overflow
), ipfw_sysctl_stat
,
848 "LU", "# of track overflow");
849 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, track_cntnomem
,
850 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
851 __offsetof(struct ipfw_context
, ipfw_tks_cntnomem
), ipfw_sysctl_stat
,
852 "LU", "# of track counter allocation failure");
853 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, frags
,
854 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
855 __offsetof(struct ipfw_context
, ipfw_frags
), ipfw_sysctl_stat
,
856 "LU", "# of IP fragements defraged");
857 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, defraged
,
858 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
859 __offsetof(struct ipfw_context
, ipfw_defraged
), ipfw_sysctl_stat
,
860 "LU", "# of IP packets after defrag");
861 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, defrag_remote
,
862 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
863 __offsetof(struct ipfw_context
, ipfw_defrag_remote
), ipfw_sysctl_stat
,
864 "LU", "# of IP packets after defrag dispatched to remote cpus");
865 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, xlated
,
866 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
867 __offsetof(struct ipfw_context
, ipfw_xlated
), ipfw_sysctl_stat
,
868 "LU", "# address/port translations");
869 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, xlate_split
,
870 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
871 __offsetof(struct ipfw_context
, ipfw_xlate_split
), ipfw_sysctl_stat
,
872 "LU", "# address/port translations split between different cpus");
873 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, xlate_conflicts
,
874 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
875 __offsetof(struct ipfw_context
, ipfw_xlate_conflicts
), ipfw_sysctl_stat
,
876 "LU", "# address/port translations conflicts on remote cpu");
877 SYSCTL_PROC(_net_inet_ip_fw_stats
, OID_AUTO
, xlate_cresolved
,
878 CTLTYPE_ULONG
| CTLFLAG_RW
, NULL
,
879 __offsetof(struct ipfw_context
, ipfw_xlate_cresolved
), ipfw_sysctl_stat
,
880 "LU", "# address/port translations conflicts resolved on remote cpu");
882 static int ipfw_state_cmp(struct ipfw_state
*,
883 struct ipfw_state
*);
884 static int ipfw_trkcnt_cmp(struct ipfw_trkcnt
*,
885 struct ipfw_trkcnt
*);
886 static int ipfw_track_cmp(struct ipfw_track
*,
887 struct ipfw_track
*);
889 RB_PROTOTYPE(ipfw_state_tree
, ipfw_state
, st_rblink
, ipfw_state_cmp
);
890 RB_GENERATE(ipfw_state_tree
, ipfw_state
, st_rblink
, ipfw_state_cmp
);
892 RB_PROTOTYPE(ipfw_trkcnt_tree
, ipfw_trkcnt
, tc_rblink
, ipfw_trkcnt_cmp
);
893 RB_GENERATE(ipfw_trkcnt_tree
, ipfw_trkcnt
, tc_rblink
, ipfw_trkcnt_cmp
);
895 RB_PROTOTYPE(ipfw_track_tree
, ipfw_track
, t_rblink
, ipfw_track_cmp
);
896 RB_GENERATE(ipfw_track_tree
, ipfw_track
, t_rblink
, ipfw_track_cmp
);
898 static int ipfw_chk(struct ip_fw_args
*);
899 static void ipfw_track_expire_ipifunc(void *);
900 static void ipfw_state_expire_ipifunc(void *);
901 static void ipfw_keepalive(void *);
902 static int ipfw_state_expire_start(struct ipfw_context
*,
904 static void ipfw_crossref_timeo(void *);
905 static void ipfw_state_remove(struct ipfw_context
*,
906 struct ipfw_state
*);
907 static void ipfw_xlat_reap_timeo(void *);
908 static void ipfw_defrag_redispatch(struct mbuf
*, int,
911 #define IPFW_TRKCNT_TOKGET lwkt_gettoken(&ipfw_gd.ipfw_trkcnt_token)
912 #define IPFW_TRKCNT_TOKREL lwkt_reltoken(&ipfw_gd.ipfw_trkcnt_token)
913 #define IPFW_TRKCNT_TOKINIT \
914 lwkt_token_init(&ipfw_gd.ipfw_trkcnt_token, "ipfw_trkcnt");
917 sa_maskedcopy(const struct sockaddr
*src
, struct sockaddr
*dst
,
918 const struct sockaddr
*netmask
)
920 const u_char
*cp1
= (const u_char
*)src
;
921 u_char
*cp2
= (u_char
*)dst
;
922 const u_char
*cp3
= (const u_char
*)netmask
;
923 u_char
*cplim
= cp2
+ *cp3
;
924 u_char
*cplim2
= cp2
+ *cp1
;
926 *cp2
++ = *cp1
++; *cp2
++ = *cp1
++; /* copies sa_len & sa_family */
931 *cp2
++ = *cp1
++ & *cp3
++;
933 bzero(cp2
, cplim2
- cp2
);
936 static __inline
uint16_t
937 pfil_cksum_fixup(uint16_t cksum
, uint16_t old
, uint16_t new, uint8_t udp
)
943 l
= cksum
+ old
- new;
944 l
= (l
>> 16) + (l
& 65535);
952 ipfw_key_build(struct ipfw_key
*key
, in_addr_t saddr
, uint16_t sport
,
953 in_addr_t daddr
, uint16_t dport
, uint8_t proto
)
960 key
->addr_u
.addrs
.addr1
= daddr
;
961 key
->addr_u
.addrs
.addr2
= saddr
;
962 key
->swap
|= IPFW_KEY_SWAP_ADDRS
;
964 key
->addr_u
.addrs
.addr1
= saddr
;
965 key
->addr_u
.addrs
.addr2
= daddr
;
969 key
->port_u
.ports
.port1
= dport
;
970 key
->port_u
.ports
.port2
= sport
;
971 key
->swap
|= IPFW_KEY_SWAP_PORTS
;
973 key
->port_u
.ports
.port1
= sport
;
974 key
->port_u
.ports
.port2
= dport
;
977 if (sport
== dport
&& (key
->swap
& IPFW_KEY_SWAP_ADDRS
))
978 key
->swap
|= IPFW_KEY_SWAP_PORTS
;
979 if (saddr
== daddr
&& (key
->swap
& IPFW_KEY_SWAP_PORTS
))
980 key
->swap
|= IPFW_KEY_SWAP_ADDRS
;
984 ipfw_key_4tuple(const struct ipfw_key
*key
, in_addr_t
*saddr
, uint16_t *sport
,
985 in_addr_t
*daddr
, uint16_t *dport
)
988 if (key
->swap
& IPFW_KEY_SWAP_ADDRS
) {
989 *saddr
= key
->addr_u
.addrs
.addr2
;
990 *daddr
= key
->addr_u
.addrs
.addr1
;
992 *saddr
= key
->addr_u
.addrs
.addr1
;
993 *daddr
= key
->addr_u
.addrs
.addr2
;
996 if (key
->swap
& IPFW_KEY_SWAP_PORTS
) {
997 *sport
= key
->port_u
.ports
.port2
;
998 *dport
= key
->port_u
.ports
.port1
;
1000 *sport
= key
->port_u
.ports
.port1
;
1001 *dport
= key
->port_u
.ports
.port2
;
1006 ipfw_state_cmp(struct ipfw_state
*s1
, struct ipfw_state
*s2
)
1009 if (s1
->st_proto
> s2
->st_proto
)
1011 if (s1
->st_proto
< s2
->st_proto
)
1014 if (s1
->st_addrs
> s2
->st_addrs
)
1016 if (s1
->st_addrs
< s2
->st_addrs
)
1019 if (s1
->st_ports
> s2
->st_ports
)
1021 if (s1
->st_ports
< s2
->st_ports
)
1024 if (s1
->st_swap
== s2
->st_swap
||
1025 (s1
->st_swap
^ s2
->st_swap
) == IPFW_KEY_SWAP_ALL
)
1028 if (s1
->st_swap
> s2
->st_swap
)
1035 ipfw_trkcnt_cmp(struct ipfw_trkcnt
*t1
, struct ipfw_trkcnt
*t2
)
1038 if (t1
->tc_proto
> t2
->tc_proto
)
1040 if (t1
->tc_proto
< t2
->tc_proto
)
1043 if (t1
->tc_addrs
> t2
->tc_addrs
)
1045 if (t1
->tc_addrs
< t2
->tc_addrs
)
1048 if (t1
->tc_ports
> t2
->tc_ports
)
1050 if (t1
->tc_ports
< t2
->tc_ports
)
1053 if (t1
->tc_ruleid
> t2
->tc_ruleid
)
1055 if (t1
->tc_ruleid
< t2
->tc_ruleid
)
1062 ipfw_track_cmp(struct ipfw_track
*t1
, struct ipfw_track
*t2
)
1065 if (t1
->t_proto
> t2
->t_proto
)
1067 if (t1
->t_proto
< t2
->t_proto
)
1070 if (t1
->t_addrs
> t2
->t_addrs
)
1072 if (t1
->t_addrs
< t2
->t_addrs
)
1075 if (t1
->t_ports
> t2
->t_ports
)
1077 if (t1
->t_ports
< t2
->t_ports
)
1080 if ((uintptr_t)t1
->t_rule
> (uintptr_t)t2
->t_rule
)
1082 if ((uintptr_t)t1
->t_rule
< (uintptr_t)t2
->t_rule
)
1088 static __inline
struct ipfw_state
*
1089 ipfw_state_link(struct ipfw_context
*ctx
, struct ipfw_state
*s
)
1091 struct ipfw_state
*dup
;
1093 KASSERT((s
->st_flags
& IPFW_STATE_F_LINKED
) == 0,
1094 ("state %p was linked", s
));
1095 dup
= RB_INSERT(ipfw_state_tree
, &ctx
->ipfw_state_tree
, s
);
1097 TAILQ_INSERT_TAIL(&ctx
->ipfw_state_list
, s
, st_link
);
1098 s
->st_flags
|= IPFW_STATE_F_LINKED
;
1103 static __inline
void
1104 ipfw_state_unlink(struct ipfw_context
*ctx
, struct ipfw_state
*s
)
1107 KASSERT(s
->st_flags
& IPFW_STATE_F_LINKED
,
1108 ("state %p was not linked", s
));
1109 RB_REMOVE(ipfw_state_tree
, &ctx
->ipfw_state_tree
, s
);
1110 TAILQ_REMOVE(&ctx
->ipfw_state_list
, s
, st_link
);
1111 s
->st_flags
&= ~IPFW_STATE_F_LINKED
;
1115 ipfw_state_max_set(int state_max
)
1118 ipfw_state_max
= state_max
;
1119 /* Allow 5% states over-allocation. */
1120 ipfw_state_loosecnt_updthr
= (state_max
/ 20) / netisr_ncpus
;
1124 ipfw_state_cntcoll(void)
1126 int cpu
, state_cnt
= 0;
1128 for (cpu
= 0; cpu
< netisr_ncpus
; ++cpu
)
1129 state_cnt
+= ipfw_ctx
[cpu
]->ipfw_state_cnt
;
1134 ipfw_state_cntsync(void)
1138 state_cnt
= ipfw_state_cntcoll();
1139 ipfw_gd
.ipfw_state_loosecnt
= state_cnt
;
1144 ipfw_free_rule(struct ip_fw
*rule
)
1146 KASSERT(rule
->cpuid
== mycpuid
, ("rule freed on cpu%d", mycpuid
));
1147 KASSERT(rule
->refcnt
> 0, ("invalid refcnt %u", rule
->refcnt
));
1149 if (rule
->refcnt
== 0) {
1150 if (rule
->cross_rules
!= NULL
)
1151 kfree(rule
->cross_rules
, M_IPFW
);
1152 kfree(rule
, M_IPFW
);
1159 ipfw_unref_rule(void *priv
)
1161 ipfw_free_rule(priv
);
1163 KASSERT(ipfw_gd
.ipfw_refcnt
> 0,
1164 ("invalid ipfw_refcnt %d", ipfw_gd
.ipfw_refcnt
));
1165 atomic_subtract_int(&ipfw_gd
.ipfw_refcnt
, 1);
1169 static __inline
void
1170 ipfw_ref_rule(struct ip_fw
*rule
)
1172 KASSERT(rule
->cpuid
== mycpuid
, ("rule used on cpu%d", mycpuid
));
1174 atomic_add_int(&ipfw_gd
.ipfw_refcnt
, 1);
1180 * This macro maps an ip pointer into a layer3 header pointer of type T
1182 #define L3HDR(T, ip) ((T *)((uint32_t *)(ip) + (ip)->ip_hl))
1185 icmptype_match(struct ip
*ip
, ipfw_insn_u32
*cmd
)
1187 int type
= L3HDR(struct icmp
,ip
)->icmp_type
;
1188 int idx_max
= F_LEN(&cmd
->o
) - F_INSN_SIZE(ipfw_insn
);
1189 int idx
= type
/ 32;
1193 return (cmd
->d
[idx
] & (1 << (type
% 32)));
1197 icmpcode_match(struct ip
*ip
, ipfw_insn_u32
*cmd
)
1199 int code
= L3HDR(struct icmp
,ip
)->icmp_code
;
1200 int idx_max
= F_LEN(&cmd
->o
) - F_INSN_SIZE(ipfw_insn
);
1201 int idx
= code
/ 32;
1205 return (cmd
->d
[idx
] & (1 << (code
% 32)));
1208 #define TT ((1 << ICMP_ECHO) | \
1209 (1 << ICMP_ROUTERSOLICIT) | \
1210 (1 << ICMP_TSTAMP) | \
1211 (1 << ICMP_IREQ) | \
1212 (1 << ICMP_MASKREQ))
1215 is_icmp_query(struct ip
*ip
)
1217 int type
= L3HDR(struct icmp
, ip
)->icmp_type
;
1219 return (type
< 32 && (TT
& (1 << type
)));
1225 * The following checks use two arrays of 8 or 16 bits to store the
1226 * bits that we want set or clear, respectively. They are in the
1227 * low and high half of cmd->arg1 or cmd->d[0].
1229 * We scan options and store the bits we find set. We succeed if
1231 * (want_set & ~bits) == 0 && (want_clear & ~bits) == want_clear
1233 * The code is sometimes optimized not to store additional variables.
1236 flags_match(ipfw_insn
*cmd
, uint8_t bits
)
1241 if (((cmd
->arg1
& 0xff) & bits
) != 0)
1242 return 0; /* some bits we want set were clear */
1244 want_clear
= (cmd
->arg1
>> 8) & 0xff;
1245 if ((want_clear
& bits
) != want_clear
)
1246 return 0; /* some bits we want clear were set */
1251 ipopts_match(struct ip
*ip
, ipfw_insn
*cmd
)
1253 int optlen
, bits
= 0;
1254 u_char
*cp
= (u_char
*)(ip
+ 1);
1255 int x
= (ip
->ip_hl
<< 2) - sizeof(struct ip
);
1257 for (; x
> 0; x
-= optlen
, cp
+= optlen
) {
1258 int opt
= cp
[IPOPT_OPTVAL
];
1260 if (opt
== IPOPT_EOL
)
1263 if (opt
== IPOPT_NOP
) {
1266 optlen
= cp
[IPOPT_OLEN
];
1267 if (optlen
<= 0 || optlen
> x
)
1268 return 0; /* invalid or truncated */
1273 bits
|= IP_FW_IPOPT_LSRR
;
1277 bits
|= IP_FW_IPOPT_SSRR
;
1281 bits
|= IP_FW_IPOPT_RR
;
1285 bits
|= IP_FW_IPOPT_TS
;
1292 return (flags_match(cmd
, bits
));
1296 tcpopts_match(struct ip
*ip
, ipfw_insn
*cmd
)
1298 int optlen
, bits
= 0;
1299 struct tcphdr
*tcp
= L3HDR(struct tcphdr
,ip
);
1300 u_char
*cp
= (u_char
*)(tcp
+ 1);
1301 int x
= (tcp
->th_off
<< 2) - sizeof(struct tcphdr
);
1303 for (; x
> 0; x
-= optlen
, cp
+= optlen
) {
1306 if (opt
== TCPOPT_EOL
)
1309 if (opt
== TCPOPT_NOP
) {
1319 bits
|= IP_FW_TCPOPT_MSS
;
1323 bits
|= IP_FW_TCPOPT_WINDOW
;
1326 case TCPOPT_SACK_PERMITTED
:
1328 bits
|= IP_FW_TCPOPT_SACK
;
1331 case TCPOPT_TIMESTAMP
:
1332 bits
|= IP_FW_TCPOPT_TS
;
1338 bits
|= IP_FW_TCPOPT_CC
;
1345 return (flags_match(cmd
, bits
));
1349 iface_match(struct ifnet
*ifp
, ipfw_insn_if
*cmd
)
1351 if (ifp
== NULL
) /* no iface with this packet, match fails */
1354 /* Check by name or by IP address */
1355 if (cmd
->name
[0] != '\0') { /* match by name */
1358 if (kfnmatch(cmd
->name
, ifp
->if_xname
, 0) == 0)
1361 if (strncmp(ifp
->if_xname
, cmd
->name
, IFNAMSIZ
) == 0)
1365 struct ifaddr_container
*ifac
;
1367 TAILQ_FOREACH(ifac
, &ifp
->if_addrheads
[mycpuid
], ifa_link
) {
1368 struct ifaddr
*ia
= ifac
->ifa
;
1370 if (ia
->ifa_addr
== NULL
)
1372 if (ia
->ifa_addr
->sa_family
!= AF_INET
)
1374 if (cmd
->p
.ip
.s_addr
== ((struct sockaddr_in
*)
1375 (ia
->ifa_addr
))->sin_addr
.s_addr
)
1376 return(1); /* match */
1379 return(0); /* no match, fail ... */
1382 #define SNPARGS(buf, len) buf + len, sizeof(buf) > len ? sizeof(buf) - len : 0
1385 * We enter here when we have a rule with O_LOG.
1386 * XXX this function alone takes about 2Kbytes of code!
1389 ipfw_log(struct ipfw_context
*ctx
, struct ip_fw
*f
, u_int hlen
,
1390 struct ether_header
*eh
, struct mbuf
*m
, struct ifnet
*oif
)
1393 int limit_reached
= 0;
1394 char action2
[40], proto
[48], fragment
[28], abuf
[INET_ADDRSTRLEN
];
1399 if (f
== NULL
) { /* bogus pkt */
1400 if (verbose_limit
!= 0 &&
1401 ctx
->ipfw_norule_counter
>= verbose_limit
)
1403 ctx
->ipfw_norule_counter
++;
1404 if (ctx
->ipfw_norule_counter
== verbose_limit
)
1405 limit_reached
= verbose_limit
;
1407 } else { /* O_LOG is the first action, find the real one */
1408 ipfw_insn
*cmd
= ACTION_PTR(f
);
1409 ipfw_insn_log
*l
= (ipfw_insn_log
*)cmd
;
1411 if (l
->max_log
!= 0 && l
->log_left
== 0)
1414 if (l
->log_left
== 0)
1415 limit_reached
= l
->max_log
;
1416 cmd
+= F_LEN(cmd
); /* point to first action */
1417 if (cmd
->opcode
== O_PROB
)
1421 switch (cmd
->opcode
) {
1427 if (cmd
->arg1
==ICMP_REJECT_RST
) {
1429 } else if (cmd
->arg1
==ICMP_UNREACH_HOST
) {
1432 ksnprintf(SNPARGS(action2
, 0), "Unreach %d",
1446 ksnprintf(SNPARGS(action2
, 0), "Divert %d", cmd
->arg1
);
1450 ksnprintf(SNPARGS(action2
, 0), "Tee %d", cmd
->arg1
);
1454 ksnprintf(SNPARGS(action2
, 0), "SkipTo %d", cmd
->arg1
);
1458 ksnprintf(SNPARGS(action2
, 0), "Pipe %d", cmd
->arg1
);
1462 ksnprintf(SNPARGS(action2
, 0), "Queue %d", cmd
->arg1
);
1467 ipfw_insn_sa
*sa
= (ipfw_insn_sa
*)cmd
;
1470 len
= ksnprintf(SNPARGS(action2
, 0),
1472 kinet_ntoa(sa
->sa
.sin_addr
, abuf
));
1473 if (sa
->sa
.sin_port
) {
1474 ksnprintf(SNPARGS(action2
, len
), ":%d",
1486 if (hlen
== 0) { /* non-ip */
1487 ksnprintf(SNPARGS(proto
, 0), "MAC");
1489 struct ip
*ip
= mtod(m
, struct ip
*);
1490 /* these three are all aliases to the same thing */
1491 struct icmp
*const icmp
= L3HDR(struct icmp
, ip
);
1492 struct tcphdr
*const tcp
= (struct tcphdr
*)icmp
;
1493 struct udphdr
*const udp
= (struct udphdr
*)icmp
;
1495 int ip_off
, offset
, ip_len
;
1498 ip_off
= ntohs(ip
->ip_off
);
1499 ip_len
= ntohs(ip
->ip_len
);
1500 offset
= ip_off
& IP_OFFMASK
;
1504 len
= ksnprintf(SNPARGS(proto
, 0), "TCP %s",
1505 kinet_ntoa(ip
->ip_src
, abuf
));
1507 ksnprintf(SNPARGS(proto
, len
), ":%d %s:%d",
1508 ntohs(tcp
->th_sport
),
1509 kinet_ntoa(ip
->ip_dst
, abuf
),
1510 ntohs(tcp
->th_dport
));
1512 ksnprintf(SNPARGS(proto
, len
), " %s",
1513 kinet_ntoa(ip
->ip_dst
, abuf
));
1518 len
= ksnprintf(SNPARGS(proto
, 0), "UDP %s",
1519 kinet_ntoa(ip
->ip_src
, abuf
));
1521 ksnprintf(SNPARGS(proto
, len
), ":%d %s:%d",
1522 ntohs(udp
->uh_sport
),
1523 kinet_ntoa(ip
->ip_dst
, abuf
),
1524 ntohs(udp
->uh_dport
));
1526 ksnprintf(SNPARGS(proto
, len
), " %s",
1527 kinet_ntoa(ip
->ip_dst
, abuf
));
1533 len
= ksnprintf(SNPARGS(proto
, 0),
1538 len
= ksnprintf(SNPARGS(proto
, 0), "ICMP ");
1540 len
+= ksnprintf(SNPARGS(proto
, len
), "%s",
1541 kinet_ntoa(ip
->ip_src
, abuf
));
1542 ksnprintf(SNPARGS(proto
, len
), " %s",
1543 kinet_ntoa(ip
->ip_dst
, abuf
));
1547 len
= ksnprintf(SNPARGS(proto
, 0), "P:%d %s", ip
->ip_p
,
1548 kinet_ntoa(ip
->ip_src
, abuf
));
1549 ksnprintf(SNPARGS(proto
, len
), " %s",
1550 kinet_ntoa(ip
->ip_dst
, abuf
));
1554 if (ip_off
& (IP_MF
| IP_OFFMASK
)) {
1555 ksnprintf(SNPARGS(fragment
, 0), " (frag %d:%d@%d%s)",
1556 ntohs(ip
->ip_id
), ip_len
- (ip
->ip_hl
<< 2),
1557 offset
<< 3, (ip_off
& IP_MF
) ? "+" : "");
1561 if (oif
|| m
->m_pkthdr
.rcvif
) {
1562 log(LOG_SECURITY
| LOG_INFO
,
1563 "ipfw: %d %s %s %s via %s%s\n",
1564 f
? f
->rulenum
: -1,
1565 action
, proto
, oif
? "out" : "in",
1566 oif
? oif
->if_xname
: m
->m_pkthdr
.rcvif
->if_xname
,
1569 log(LOG_SECURITY
| LOG_INFO
,
1570 "ipfw: %d %s %s [no if info]%s\n",
1571 f
? f
->rulenum
: -1,
1572 action
, proto
, fragment
);
1575 if (limit_reached
) {
1576 log(LOG_SECURITY
| LOG_NOTICE
,
1577 "ipfw: limit %d reached on entry %d\n",
1578 limit_reached
, f
? f
->rulenum
: -1);
1585 ipfw_xlat_reap(struct ipfw_xlat
*x
, struct ipfw_xlat
*slave_x
)
1587 struct ip_fw
*rule
= slave_x
->xlat_rule
;
1589 KKASSERT(rule
->cpuid
== mycpuid
);
1591 /* No more cross references; free this pair now. */
1593 kfree(slave_x
, M_IPFW
);
1595 /* See the comment in ipfw_ip_xlate_dispatch(). */
1600 ipfw_xlat_reap_dispatch(netmsg_t nm
)
1602 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
1603 struct ipfw_state
*s
, *ns
;
1605 ASSERT_NETISR_NCPUS(mycpuid
);
1609 netisr_replymsg(&ctx
->ipfw_xlatreap_nm
, 0);
1612 /* TODO: limit scanning depth */
1613 TAILQ_FOREACH_MUTABLE(s
, &ctx
->ipfw_xlatreap
, st_link
, ns
) {
1614 struct ipfw_xlat
*x
= (struct ipfw_xlat
*)s
;
1615 struct ipfw_xlat
*slave_x
= x
->xlat_pair
;
1618 crefs
= slave_x
->xlat_crefs
+ x
->xlat_crefs
;
1620 TAILQ_REMOVE(&ctx
->ipfw_xlatreap
, &x
->xlat_st
, st_link
);
1621 ipfw_xlat_reap(x
, slave_x
);
1624 if (!TAILQ_EMPTY(&ctx
->ipfw_xlatreap
)) {
1625 callout_reset(&ctx
->ipfw_xlatreap_ch
, 2, ipfw_xlat_reap_timeo
,
1626 &ctx
->ipfw_xlatreap_nm
);
1631 ipfw_xlat_reap_timeo(void *xnm
)
1633 struct netmsg_base
*nm
= xnm
;
1635 KKASSERT(mycpuid
< netisr_ncpus
);
1638 if (nm
->lmsg
.ms_flags
& MSGF_DONE
)
1639 netisr_sendmsg_oncpu(nm
);
1644 ipfw_xlat_free_dispatch(netmsg_t nmsg
)
1646 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
1647 struct ipfw_xlat
*x
= nmsg
->lmsg
.u
.ms_resultp
;
1648 struct ipfw_xlat
*slave_x
= x
->xlat_pair
;
1651 ASSERT_NETISR_NCPUS(mycpuid
);
1653 KKASSERT(slave_x
!= NULL
);
1654 KKASSERT(slave_x
->xlat_invalid
&& x
->xlat_invalid
);
1656 KASSERT((x
->xlat_flags
& IPFW_STATE_F_LINKED
) == 0,
1657 ("master xlat is still linked"));
1658 if (slave_x
->xlat_flags
& IPFW_STATE_F_LINKED
)
1659 ipfw_state_unlink(ctx
, &slave_x
->xlat_st
);
1661 /* See the comment in ipfw_ip_xlate_dispatch(). */
1662 slave_x
->xlat_crefs
--;
1664 crefs
= slave_x
->xlat_crefs
+ x
->xlat_crefs
;
1666 ipfw_xlat_reap(x
, slave_x
);
1670 if (TAILQ_EMPTY(&ctx
->ipfw_xlatreap
)) {
1671 callout_reset(&ctx
->ipfw_xlatreap_ch
, 2, ipfw_xlat_reap_timeo
,
1672 &ctx
->ipfw_xlatreap_nm
);
1676 * This pair is still referenced; defer its destruction.
1677 * YYY reuse st_link.
1679 TAILQ_INSERT_TAIL(&ctx
->ipfw_xlatreap
, &x
->xlat_st
, st_link
);
1682 static __inline
void
1683 ipfw_xlat_invalidate(struct ipfw_xlat
*x
)
1686 x
->xlat_invalid
= 1;
1687 x
->xlat_pair
->xlat_invalid
= 1;
1691 ipfw_state_del(struct ipfw_context
*ctx
, struct ipfw_state
*s
)
1693 struct ipfw_xlat
*x
, *slave_x
;
1694 struct netmsg_base
*nm
;
1696 KASSERT(s
->st_type
== O_KEEP_STATE
|| s
->st_type
== O_LIMIT
||
1697 IPFW_ISXLAT(s
->st_type
), ("invalid state type %u", s
->st_type
));
1698 KASSERT((s
->st_flags
& IPFW_STATE_F_XLATSLAVE
) == 0,
1699 ("delete slave xlat"));
1701 KASSERT(ctx
->ipfw_state_cnt
> 0,
1702 ("invalid state count %d", ctx
->ipfw_state_cnt
));
1703 ctx
->ipfw_state_cnt
--;
1704 if (ctx
->ipfw_state_loosecnt
> 0)
1705 ctx
->ipfw_state_loosecnt
--;
1708 * Unhook this state.
1710 if (s
->st_track
!= NULL
) {
1711 struct ipfw_track
*t
= s
->st_track
;
1713 KASSERT(!LIST_EMPTY(&t
->t_state_list
),
1714 ("track state list is empty"));
1715 LIST_REMOVE(s
, st_trklink
);
1717 KASSERT(*t
->t_count
> 0,
1718 ("invalid track count %d", *t
->t_count
));
1719 atomic_subtract_int(t
->t_count
, 1);
1721 ipfw_state_unlink(ctx
, s
);
1724 * Free this state. Xlat requires special processing,
1725 * since xlat are paired state and they could be on
1729 if (!IPFW_ISXLAT(s
->st_type
)) {
1730 /* Not xlat; free now. */
1735 x
= (struct ipfw_xlat
*)s
;
1737 if (x
->xlat_pair
== NULL
) {
1738 /* Not setup yet; free now. */
1743 slave_x
= x
->xlat_pair
;
1744 KKASSERT(slave_x
->xlat_flags
& IPFW_STATE_F_XLATSLAVE
);
1746 if (x
->xlat_pcpu
== mycpuid
) {
1748 * Paired states are on the same cpu; delete this
1751 KKASSERT(x
->xlat_crefs
== 0);
1752 KKASSERT(slave_x
->xlat_crefs
== 0);
1753 if (slave_x
->xlat_flags
& IPFW_STATE_F_LINKED
)
1754 ipfw_state_unlink(ctx
, &slave_x
->xlat_st
);
1756 kfree(slave_x
, M_IPFW
);
1761 * Free the paired states on the cpu owning the slave xlat.
1765 * Mark the state pair invalid; completely deleting them
1766 * may take some time.
1768 ipfw_xlat_invalidate(x
);
1770 nm
= &x
->xlat_freenm
;
1771 netmsg_init(nm
, NULL
, &netisr_apanic_rport
, MSGF_PRIORITY
,
1772 ipfw_xlat_free_dispatch
);
1773 nm
->lmsg
.u
.ms_resultp
= x
;
1775 /* See the comment in ipfw_xlate_redispatch(). */
1776 x
->xlat_rule
->cross_refs
++;
1779 netisr_sendmsg(nm
, x
->xlat_pcpu
);
1783 ipfw_state_remove(struct ipfw_context
*ctx
, struct ipfw_state
*s
)
1786 if (s
->st_flags
& IPFW_STATE_F_XLATSLAVE
) {
1787 KKASSERT(IPFW_ISXLAT(s
->st_type
));
1788 ipfw_xlat_invalidate((struct ipfw_xlat
*)s
);
1789 ipfw_state_unlink(ctx
, s
);
1792 ipfw_state_del(ctx
, s
);
1796 ipfw_state_reap(struct ipfw_context
*ctx
, int reap_max
)
1798 struct ipfw_state
*s
, *anchor
;
1801 if (reap_max
< ipfw_state_reap_min
)
1802 reap_max
= ipfw_state_reap_min
;
1804 if ((ctx
->ipfw_flags
& IPFW_FLAG_STATEEXP
) == 0) {
1806 * Kick start state expiring. Ignore scan limit,
1807 * we are short of states.
1809 ctx
->ipfw_flags
|= IPFW_FLAG_STATEREAP
;
1810 expired
= ipfw_state_expire_start(ctx
, INT_MAX
, reap_max
);
1811 ctx
->ipfw_flags
&= ~IPFW_FLAG_STATEREAP
;
1816 * States are being expired.
1819 if (ctx
->ipfw_state_cnt
== 0)
1823 anchor
= &ctx
->ipfw_stateexp_anch
;
1824 while ((s
= TAILQ_NEXT(anchor
, st_link
)) != NULL
) {
1826 * Ignore scan limit; we are short of states.
1829 TAILQ_REMOVE(&ctx
->ipfw_state_list
, anchor
, st_link
);
1830 TAILQ_INSERT_AFTER(&ctx
->ipfw_state_list
, s
, anchor
, st_link
);
1832 if (IPFW_STATE_SCANSKIP(s
))
1835 if (IPFW_STATE_ISDEAD(s
) || IPFW_STATE_TCPCLOSED(s
)) {
1836 ipfw_state_del(ctx
, s
);
1837 if (++expired
>= reap_max
)
1839 if ((expired
& 0xff) == 0 &&
1840 ipfw_state_cntcoll() + ipfw_state_headroom
<=
1847 * Leave the anchor on the list, even if the end of the list has
1848 * been reached. ipfw_state_expire_more_dispatch() will handle
1855 ipfw_state_flush(struct ipfw_context
*ctx
, const struct ip_fw
*rule
)
1857 struct ipfw_state
*s
, *sn
;
1859 TAILQ_FOREACH_MUTABLE(s
, &ctx
->ipfw_state_list
, st_link
, sn
) {
1860 if (IPFW_STATE_SCANSKIP(s
))
1862 if (rule
!= NULL
&& s
->st_rule
!= rule
)
1864 ipfw_state_del(ctx
, s
);
1869 ipfw_state_expire_done(struct ipfw_context
*ctx
)
1872 KASSERT(ctx
->ipfw_flags
& IPFW_FLAG_STATEEXP
,
1873 ("stateexp is not in progress"));
1874 ctx
->ipfw_flags
&= ~IPFW_FLAG_STATEEXP
;
1875 callout_reset(&ctx
->ipfw_stateto_ch
, hz
,
1876 ipfw_state_expire_ipifunc
, NULL
);
1880 ipfw_state_expire_more(struct ipfw_context
*ctx
)
1882 struct netmsg_base
*nm
= &ctx
->ipfw_stateexp_more
;
1884 KASSERT(ctx
->ipfw_flags
& IPFW_FLAG_STATEEXP
,
1885 ("stateexp is not in progress"));
1886 KASSERT(nm
->lmsg
.ms_flags
& MSGF_DONE
,
1887 ("stateexp more did not finish"));
1888 netisr_sendmsg_oncpu(nm
);
1892 ipfw_state_expire_loop(struct ipfw_context
*ctx
, struct ipfw_state
*anchor
,
1893 int scan_max
, int expire_max
)
1895 struct ipfw_state
*s
;
1896 int scanned
= 0, expired
= 0;
1898 KASSERT(ctx
->ipfw_flags
& IPFW_FLAG_STATEEXP
,
1899 ("stateexp is not in progress"));
1901 while ((s
= TAILQ_NEXT(anchor
, st_link
)) != NULL
) {
1902 if (scanned
++ >= scan_max
) {
1903 ipfw_state_expire_more(ctx
);
1907 TAILQ_REMOVE(&ctx
->ipfw_state_list
, anchor
, st_link
);
1908 TAILQ_INSERT_AFTER(&ctx
->ipfw_state_list
, s
, anchor
, st_link
);
1910 if (IPFW_STATE_SCANSKIP(s
))
1913 if (IPFW_STATE_ISDEAD(s
) ||
1914 ((ctx
->ipfw_flags
& IPFW_FLAG_STATEREAP
) &&
1915 IPFW_STATE_TCPCLOSED(s
))) {
1916 ipfw_state_del(ctx
, s
);
1917 if (++expired
>= expire_max
) {
1918 ipfw_state_expire_more(ctx
);
1921 if ((ctx
->ipfw_flags
& IPFW_FLAG_STATEREAP
) &&
1922 (expired
& 0xff) == 0 &&
1923 ipfw_state_cntcoll() + ipfw_state_headroom
<=
1925 ipfw_state_expire_more(ctx
);
1930 TAILQ_REMOVE(&ctx
->ipfw_state_list
, anchor
, st_link
);
1931 ipfw_state_expire_done(ctx
);
1936 ipfw_state_expire_more_dispatch(netmsg_t nm
)
1938 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
1939 struct ipfw_state
*anchor
;
1941 ASSERT_NETISR_NCPUS(mycpuid
);
1942 KASSERT(ctx
->ipfw_flags
& IPFW_FLAG_STATEEXP
,
1943 ("statexp is not in progress"));
1946 netisr_replymsg(&nm
->base
, 0);
1948 anchor
= &ctx
->ipfw_stateexp_anch
;
1949 if (ctx
->ipfw_state_cnt
== 0) {
1950 TAILQ_REMOVE(&ctx
->ipfw_state_list
, anchor
, st_link
);
1951 ipfw_state_expire_done(ctx
);
1954 ipfw_state_expire_loop(ctx
, anchor
,
1955 ipfw_state_scan_max
, ipfw_state_expire_max
);
1959 ipfw_state_expire_start(struct ipfw_context
*ctx
, int scan_max
, int expire_max
)
1961 struct ipfw_state
*anchor
;
1963 KASSERT((ctx
->ipfw_flags
& IPFW_FLAG_STATEEXP
) == 0,
1964 ("stateexp is in progress"));
1965 ctx
->ipfw_flags
|= IPFW_FLAG_STATEEXP
;
1967 if (ctx
->ipfw_state_cnt
== 0) {
1968 ipfw_state_expire_done(ctx
);
1973 * Do not expire more than once per second, it is useless.
1975 if ((ctx
->ipfw_flags
& IPFW_FLAG_STATEREAP
) == 0 &&
1976 ctx
->ipfw_state_lastexp
== time_uptime
) {
1977 ipfw_state_expire_done(ctx
);
1980 ctx
->ipfw_state_lastexp
= time_uptime
;
1982 anchor
= &ctx
->ipfw_stateexp_anch
;
1983 TAILQ_INSERT_HEAD(&ctx
->ipfw_state_list
, anchor
, st_link
);
1984 return (ipfw_state_expire_loop(ctx
, anchor
, scan_max
, expire_max
));
1988 ipfw_state_expire_dispatch(netmsg_t nm
)
1990 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
1992 ASSERT_NETISR_NCPUS(mycpuid
);
1996 netisr_replymsg(&nm
->base
, 0);
1999 if (ctx
->ipfw_flags
& IPFW_FLAG_STATEEXP
) {
2000 /* Running; done. */
2003 ipfw_state_expire_start(ctx
,
2004 ipfw_state_scan_max
, ipfw_state_expire_max
);
2008 ipfw_state_expire_ipifunc(void *dummy __unused
)
2010 struct netmsg_base
*msg
;
2012 KKASSERT(mycpuid
< netisr_ncpus
);
2013 msg
= &ipfw_ctx
[mycpuid
]->ipfw_stateexp_nm
;
2016 if (msg
->lmsg
.ms_flags
& MSGF_DONE
)
2017 netisr_sendmsg_oncpu(msg
);
2022 ipfw_state_update_tcp(struct ipfw_state
*s
, int dir
, const struct tcphdr
*tcp
)
2024 uint32_t seq
= ntohl(tcp
->th_seq
);
2025 uint32_t ack
= ntohl(tcp
->th_ack
);
2027 if (tcp
->th_flags
& TH_RST
)
2030 if (dir
== MATCH_FORWARD
) {
2031 if ((s
->st_flags
& IPFW_STATE_F_SEQFWD
) == 0) {
2032 s
->st_flags
|= IPFW_STATE_F_SEQFWD
;
2033 s
->st_seq_fwd
= seq
;
2034 } else if (SEQ_GEQ(seq
, s
->st_seq_fwd
)) {
2035 s
->st_seq_fwd
= seq
;
2037 /* Out-of-sequence; done. */
2040 if (tcp
->th_flags
& TH_ACK
) {
2041 if ((s
->st_flags
& IPFW_STATE_F_ACKFWD
) == 0) {
2042 s
->st_flags
|= IPFW_STATE_F_ACKFWD
;
2043 s
->st_ack_fwd
= ack
;
2044 } else if (SEQ_GEQ(ack
, s
->st_ack_fwd
)) {
2045 s
->st_ack_fwd
= ack
;
2047 /* Out-of-sequence; done. */
2051 if ((s
->st_state
& ((TH_FIN
| TH_ACK
) << 8)) ==
2052 (TH_FIN
<< 8) && s
->st_ack_fwd
== s
->st_seq_rev
+ 1)
2053 s
->st_state
|= (TH_ACK
<< 8);
2056 if ((s
->st_flags
& IPFW_STATE_F_SEQREV
) == 0) {
2057 s
->st_flags
|= IPFW_STATE_F_SEQREV
;
2058 s
->st_seq_rev
= seq
;
2059 } else if (SEQ_GEQ(seq
, s
->st_seq_rev
)) {
2060 s
->st_seq_rev
= seq
;
2062 /* Out-of-sequence; done. */
2065 if (tcp
->th_flags
& TH_ACK
) {
2066 if ((s
->st_flags
& IPFW_STATE_F_ACKREV
) == 0) {
2067 s
->st_flags
|= IPFW_STATE_F_ACKREV
;
2069 } else if (SEQ_GEQ(ack
, s
->st_ack_rev
)) {
2070 s
->st_ack_rev
= ack
;
2072 /* Out-of-sequence; done. */
2076 if ((s
->st_state
& (TH_FIN
| TH_ACK
)) == TH_FIN
&&
2077 s
->st_ack_rev
== s
->st_seq_fwd
+ 1)
2078 s
->st_state
|= TH_ACK
;
2085 ipfw_state_update(const struct ipfw_flow_id
*pkt
, int dir
,
2086 const struct tcphdr
*tcp
, struct ipfw_state
*s
)
2089 if (pkt
->proto
== IPPROTO_TCP
) { /* update state according to flags */
2090 u_char flags
= pkt
->flags
& IPFW_STATE_TCPFLAGS
;
2092 if (tcp
!= NULL
&& !ipfw_state_update_tcp(s
, dir
, tcp
))
2095 s
->st_state
|= (dir
== MATCH_FORWARD
) ? flags
: (flags
<< 8);
2096 switch (s
->st_state
& IPFW_STATE_TCPSTATES
) {
2097 case TH_SYN
: /* opening */
2098 s
->st_expire
= time_uptime
+ dyn_syn_lifetime
;
2101 case BOTH_SYN
: /* move to established */
2102 case BOTH_SYN
| TH_FIN
: /* one side tries to close */
2103 case BOTH_SYN
| (TH_FIN
<< 8):
2104 s
->st_expire
= time_uptime
+ dyn_ack_lifetime
;
2107 case BOTH_SYN
| BOTH_FIN
: /* both sides closed */
2108 if ((s
->st_state
& BOTH_FINACK
) == BOTH_FINACK
) {
2109 /* And both FINs were ACKed. */
2110 s
->st_expire
= time_uptime
+ dyn_fin_lifetime
;
2112 s
->st_expire
= time_uptime
+
2113 dyn_finwait_lifetime
;
2120 * reset or some invalid combination, but can also
2121 * occur if we use keep-state the wrong way.
2123 if ((s
->st_state
& ((TH_RST
<< 8) | TH_RST
)) == 0)
2124 kprintf("invalid state: 0x%x\n", s
->st_state
);
2126 s
->st_expire
= time_uptime
+ dyn_rst_lifetime
;
2129 } else if (pkt
->proto
== IPPROTO_UDP
) {
2130 s
->st_expire
= time_uptime
+ dyn_udp_lifetime
;
2132 /* other protocols */
2133 s
->st_expire
= time_uptime
+ dyn_short_lifetime
;
2140 static struct ipfw_state
*
2141 ipfw_state_lookup(struct ipfw_context
*ctx
, const struct ipfw_flow_id
*pkt
,
2142 int *match_direction
, const struct tcphdr
*tcp
)
2144 struct ipfw_state
*key
, *s
;
2145 int dir
= MATCH_NONE
;
2147 key
= &ctx
->ipfw_state_tmpkey
;
2148 ipfw_key_build(&key
->st_key
, pkt
->src_ip
, pkt
->src_port
,
2149 pkt
->dst_ip
, pkt
->dst_port
, pkt
->proto
);
2150 s
= RB_FIND(ipfw_state_tree
, &ctx
->ipfw_state_tree
, key
);
2152 goto done
; /* not found. */
2153 if (IPFW_STATE_ISDEAD(s
)) {
2154 ipfw_state_remove(ctx
, s
);
2158 if ((pkt
->flags
& TH_SYN
) && IPFW_STATE_TCPCLOSED(s
)) {
2159 /* TCP ports recycling is too fast. */
2160 ctx
->ipfw_sts_tcprecycled
++;
2161 ipfw_state_remove(ctx
, s
);
2166 if (s
->st_swap
== key
->st_swap
) {
2167 dir
= MATCH_FORWARD
;
2169 KASSERT((s
->st_swap
& key
->st_swap
) == 0,
2170 ("found mismatch state"));
2171 dir
= MATCH_REVERSE
;
2174 /* Update this state. */
2175 ipfw_state_update(pkt
, dir
, tcp
, s
);
2177 if (s
->st_track
!= NULL
) {
2178 /* This track has been used. */
2179 s
->st_track
->t_expire
= time_uptime
+ dyn_short_lifetime
;
2182 if (match_direction
)
2183 *match_direction
= dir
;
2187 static struct ipfw_state
*
2188 ipfw_state_alloc(struct ipfw_context
*ctx
, const struct ipfw_flow_id
*id
,
2189 uint16_t type
, struct ip_fw
*rule
, const struct tcphdr
*tcp
)
2191 struct ipfw_state
*s
;
2194 KASSERT(type
== O_KEEP_STATE
|| type
== O_LIMIT
|| IPFW_ISXLAT(type
),
2195 ("invalid state type %u", type
));
2197 sz
= sizeof(struct ipfw_state
);
2198 if (IPFW_ISXLAT(type
))
2199 sz
= sizeof(struct ipfw_xlat
);
2201 s
= kmalloc(sz
, M_IPFW
, M_INTWAIT
| M_NULLOK
| M_ZERO
);
2203 ctx
->ipfw_sts_nomem
++;
2207 ipfw_key_build(&s
->st_key
, id
->src_ip
, id
->src_port
,
2208 id
->dst_ip
, id
->dst_port
, id
->proto
);
2212 if (IPFW_ISXLAT(type
)) {
2213 struct ipfw_xlat
*x
= (struct ipfw_xlat
*)s
;
2215 x
->xlat_dir
= MATCH_NONE
;
2220 * Update this state:
2221 * Set st_expire and st_state.
2223 ipfw_state_update(id
, MATCH_FORWARD
, tcp
, s
);
2228 static struct ipfw_state
*
2229 ipfw_state_add(struct ipfw_context
*ctx
, const struct ipfw_flow_id
*id
,
2230 uint16_t type
, struct ip_fw
*rule
, struct ipfw_track
*t
,
2231 const struct tcphdr
*tcp
)
2233 struct ipfw_state
*s
, *dup
;
2235 s
= ipfw_state_alloc(ctx
, id
, type
, rule
, tcp
);
2239 ctx
->ipfw_state_cnt
++;
2240 ctx
->ipfw_state_loosecnt
++;
2241 if (ctx
->ipfw_state_loosecnt
>= ipfw_state_loosecnt_updthr
) {
2242 ipfw_gd
.ipfw_state_loosecnt
+= ctx
->ipfw_state_loosecnt
;
2243 ctx
->ipfw_state_loosecnt
= 0;
2246 dup
= ipfw_state_link(ctx
, s
);
2248 panic("ipfw: %u state exists %p", type
, dup
);
2251 /* Keep the track referenced. */
2252 LIST_INSERT_HEAD(&t
->t_state_list
, s
, st_trklink
);
2259 ipfw_track_free(struct ipfw_context
*ctx
, struct ipfw_track
*t
)
2261 struct ipfw_trkcnt
*trk
;
2262 boolean_t trk_freed
= FALSE
;
2264 KASSERT(t
->t_count
!= NULL
, ("track anchor"));
2265 KASSERT(LIST_EMPTY(&t
->t_state_list
),
2266 ("invalid track is still referenced"));
2269 KASSERT(trk
!= NULL
, ("track has no trkcnt"));
2271 RB_REMOVE(ipfw_track_tree
, &ctx
->ipfw_track_tree
, t
);
2272 TAILQ_REMOVE(&ctx
->ipfw_track_list
, t
, t_link
);
2276 * fdrop() style reference counting.
2277 * See kern/kern_descrip.c fdrop().
2280 int refs
= trk
->tc_refs
;
2283 KASSERT(refs
> 0, ("invalid trkcnt refs %d", refs
));
2286 if (atomic_cmpset_int(&trk
->tc_refs
, refs
, 0)) {
2287 KASSERT(trk
->tc_count
== 0,
2288 ("%d states reference this trkcnt",
2290 RB_REMOVE(ipfw_trkcnt_tree
,
2291 &ipfw_gd
.ipfw_trkcnt_tree
, trk
);
2293 KASSERT(ipfw_gd
.ipfw_trkcnt_cnt
> 0,
2294 ("invalid trkcnt cnt %d",
2295 ipfw_gd
.ipfw_trkcnt_cnt
));
2296 ipfw_gd
.ipfw_trkcnt_cnt
--;
2299 if (ctx
->ipfw_trkcnt_spare
== NULL
)
2300 ctx
->ipfw_trkcnt_spare
= trk
;
2308 } else if (atomic_cmpset_int(&trk
->tc_refs
, refs
, refs
- 1)) {
2317 ipfw_track_flush(struct ipfw_context
*ctx
, struct ip_fw
*rule
)
2319 struct ipfw_track
*t
, *tn
;
2321 TAILQ_FOREACH_MUTABLE(t
, &ctx
->ipfw_track_list
, t_link
, tn
) {
2322 if (t
->t_count
== NULL
) /* anchor */
2324 if (rule
!= NULL
&& t
->t_rule
!= rule
)
2326 ipfw_track_free(ctx
, t
);
2331 ipfw_track_state_expire(struct ipfw_context
*ctx
, struct ipfw_track
*t
,
2334 struct ipfw_state
*s
, *sn
;
2335 boolean_t ret
= FALSE
;
2337 KASSERT(t
->t_count
!= NULL
, ("track anchor"));
2339 if (LIST_EMPTY(&t
->t_state_list
))
2343 * Do not expire more than once per second, it is useless.
2345 if (t
->t_lastexp
== time_uptime
)
2347 t
->t_lastexp
= time_uptime
;
2349 LIST_FOREACH_MUTABLE(s
, &t
->t_state_list
, st_trklink
, sn
) {
2350 if (IPFW_STATE_ISDEAD(s
) || (reap
&& IPFW_STATE_TCPCLOSED(s
))) {
2351 KASSERT(s
->st_track
== t
,
2352 ("state track %p does not match %p",
2354 ipfw_state_del(ctx
, s
);
2361 static __inline
struct ipfw_trkcnt
*
2362 ipfw_trkcnt_alloc(struct ipfw_context
*ctx
)
2364 struct ipfw_trkcnt
*trk
;
2366 if (ctx
->ipfw_trkcnt_spare
!= NULL
) {
2367 trk
= ctx
->ipfw_trkcnt_spare
;
2368 ctx
->ipfw_trkcnt_spare
= NULL
;
2370 trk
= kmalloc(sizeof(*trk
), M_IPFW
,
2371 M_INTWAIT
| M_NULLOK
| M_CACHEALIGN
);
2377 ipfw_track_expire_done(struct ipfw_context
*ctx
)
2380 KASSERT(ctx
->ipfw_flags
& IPFW_FLAG_TRACKEXP
,
2381 ("trackexp is not in progress"));
2382 ctx
->ipfw_flags
&= ~IPFW_FLAG_TRACKEXP
;
2383 callout_reset(&ctx
->ipfw_trackto_ch
, hz
,
2384 ipfw_track_expire_ipifunc
, NULL
);
2388 ipfw_track_expire_more(struct ipfw_context
*ctx
)
2390 struct netmsg_base
*nm
= &ctx
->ipfw_trackexp_more
;
2392 KASSERT(ctx
->ipfw_flags
& IPFW_FLAG_TRACKEXP
,
2393 ("trackexp is not in progress"));
2394 KASSERT(nm
->lmsg
.ms_flags
& MSGF_DONE
,
2395 ("trackexp more did not finish"));
2396 netisr_sendmsg_oncpu(nm
);
2400 ipfw_track_expire_loop(struct ipfw_context
*ctx
, struct ipfw_track
*anchor
,
2401 int scan_max
, int expire_max
)
2403 struct ipfw_track
*t
;
2404 int scanned
= 0, expired
= 0;
2405 boolean_t reap
= FALSE
;
2407 KASSERT(ctx
->ipfw_flags
& IPFW_FLAG_TRACKEXP
,
2408 ("trackexp is not in progress"));
2410 if (ctx
->ipfw_flags
& IPFW_FLAG_TRACKREAP
)
2413 while ((t
= TAILQ_NEXT(anchor
, t_link
)) != NULL
) {
2414 if (scanned
++ >= scan_max
) {
2415 ipfw_track_expire_more(ctx
);
2419 TAILQ_REMOVE(&ctx
->ipfw_track_list
, anchor
, t_link
);
2420 TAILQ_INSERT_AFTER(&ctx
->ipfw_track_list
, t
, anchor
, t_link
);
2422 if (t
->t_count
== NULL
) /* anchor */
2425 ipfw_track_state_expire(ctx
, t
, reap
);
2426 if (!LIST_EMPTY(&t
->t_state_list
)) {
2427 /* There are states referencing this track. */
2431 if (TIME_LEQ(t
->t_expire
, time_uptime
) || reap
) {
2433 if (ipfw_track_free(ctx
, t
)) {
2434 if (++expired
>= expire_max
) {
2435 ipfw_track_expire_more(ctx
);
2441 TAILQ_REMOVE(&ctx
->ipfw_track_list
, anchor
, t_link
);
2442 ipfw_track_expire_done(ctx
);
2447 ipfw_track_expire_start(struct ipfw_context
*ctx
, int scan_max
, int expire_max
)
2449 struct ipfw_track
*anchor
;
2451 KASSERT((ctx
->ipfw_flags
& IPFW_FLAG_TRACKEXP
) == 0,
2452 ("trackexp is in progress"));
2453 ctx
->ipfw_flags
|= IPFW_FLAG_TRACKEXP
;
2455 if (RB_EMPTY(&ctx
->ipfw_track_tree
)) {
2456 ipfw_track_expire_done(ctx
);
2461 * Do not expire more than once per second, it is useless.
2463 if ((ctx
->ipfw_flags
& IPFW_FLAG_TRACKREAP
) == 0 &&
2464 ctx
->ipfw_track_lastexp
== time_uptime
) {
2465 ipfw_track_expire_done(ctx
);
2468 ctx
->ipfw_track_lastexp
= time_uptime
;
2470 anchor
= &ctx
->ipfw_trackexp_anch
;
2471 TAILQ_INSERT_HEAD(&ctx
->ipfw_track_list
, anchor
, t_link
);
2472 return (ipfw_track_expire_loop(ctx
, anchor
, scan_max
, expire_max
));
2476 ipfw_track_expire_more_dispatch(netmsg_t nm
)
2478 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
2479 struct ipfw_track
*anchor
;
2481 ASSERT_NETISR_NCPUS(mycpuid
);
2482 KASSERT(ctx
->ipfw_flags
& IPFW_FLAG_TRACKEXP
,
2483 ("trackexp is not in progress"));
2486 netisr_replymsg(&nm
->base
, 0);
2488 anchor
= &ctx
->ipfw_trackexp_anch
;
2489 if (RB_EMPTY(&ctx
->ipfw_track_tree
)) {
2490 TAILQ_REMOVE(&ctx
->ipfw_track_list
, anchor
, t_link
);
2491 ipfw_track_expire_done(ctx
);
2494 ipfw_track_expire_loop(ctx
, anchor
,
2495 ipfw_track_scan_max
, ipfw_track_expire_max
);
2499 ipfw_track_expire_dispatch(netmsg_t nm
)
2501 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
2503 ASSERT_NETISR_NCPUS(mycpuid
);
2507 netisr_replymsg(&nm
->base
, 0);
2510 if (ctx
->ipfw_flags
& IPFW_FLAG_TRACKEXP
) {
2511 /* Running; done. */
2514 ipfw_track_expire_start(ctx
,
2515 ipfw_track_scan_max
, ipfw_track_expire_max
);
2519 ipfw_track_expire_ipifunc(void *dummy __unused
)
2521 struct netmsg_base
*msg
;
2523 KKASSERT(mycpuid
< netisr_ncpus
);
2524 msg
= &ipfw_ctx
[mycpuid
]->ipfw_trackexp_nm
;
2527 if (msg
->lmsg
.ms_flags
& MSGF_DONE
)
2528 netisr_sendmsg_oncpu(msg
);
2533 ipfw_track_reap(struct ipfw_context
*ctx
)
2535 struct ipfw_track
*t
, *anchor
;
2538 if ((ctx
->ipfw_flags
& IPFW_FLAG_TRACKEXP
) == 0) {
2540 * Kick start track expiring. Ignore scan limit,
2541 * we are short of tracks.
2543 ctx
->ipfw_flags
|= IPFW_FLAG_TRACKREAP
;
2544 expired
= ipfw_track_expire_start(ctx
, INT_MAX
,
2545 ipfw_track_reap_max
);
2546 ctx
->ipfw_flags
&= ~IPFW_FLAG_TRACKREAP
;
2551 * Tracks are being expired.
2554 if (RB_EMPTY(&ctx
->ipfw_track_tree
))
2558 anchor
= &ctx
->ipfw_trackexp_anch
;
2559 while ((t
= TAILQ_NEXT(anchor
, t_link
)) != NULL
) {
2561 * Ignore scan limit; we are short of tracks.
2564 TAILQ_REMOVE(&ctx
->ipfw_track_list
, anchor
, t_link
);
2565 TAILQ_INSERT_AFTER(&ctx
->ipfw_track_list
, t
, anchor
, t_link
);
2567 if (t
->t_count
== NULL
) /* anchor */
2570 ipfw_track_state_expire(ctx
, t
, TRUE
);
2571 if (!LIST_EMPTY(&t
->t_state_list
)) {
2572 /* There are states referencing this track. */
2576 if (ipfw_track_free(ctx
, t
)) {
2577 if (++expired
>= ipfw_track_reap_max
) {
2578 ipfw_track_expire_more(ctx
);
2585 * Leave the anchor on the list, even if the end of the list has
2586 * been reached. ipfw_track_expire_more_dispatch() will handle
2592 static struct ipfw_track
*
2593 ipfw_track_alloc(struct ipfw_context
*ctx
, const struct ipfw_flow_id
*id
,
2594 uint16_t limit_mask
, struct ip_fw
*rule
)
2596 struct ipfw_track
*key
, *t
, *dup
;
2597 struct ipfw_trkcnt
*trk
, *ret
;
2598 boolean_t do_expire
= FALSE
;
2600 KASSERT(rule
->track_ruleid
!= 0,
2601 ("rule %u has no track ruleid", rule
->rulenum
));
2603 key
= &ctx
->ipfw_track_tmpkey
;
2604 key
->t_proto
= id
->proto
;
2608 if (limit_mask
& DYN_SRC_ADDR
)
2609 key
->t_saddr
= id
->src_ip
;
2610 if (limit_mask
& DYN_DST_ADDR
)
2611 key
->t_daddr
= id
->dst_ip
;
2612 if (limit_mask
& DYN_SRC_PORT
)
2613 key
->t_sport
= id
->src_port
;
2614 if (limit_mask
& DYN_DST_PORT
)
2615 key
->t_dport
= id
->dst_port
;
2617 t
= RB_FIND(ipfw_track_tree
, &ctx
->ipfw_track_tree
, key
);
2621 t
= kmalloc(sizeof(*t
), M_IPFW
, M_INTWAIT
| M_NULLOK
);
2623 ctx
->ipfw_tks_nomem
++;
2627 t
->t_key
= key
->t_key
;
2630 LIST_INIT(&t
->t_state_list
);
2632 if (ipfw_gd
.ipfw_trkcnt_cnt
>= ipfw_track_max
) {
2633 time_t globexp
, uptime
;
2639 * Do not expire globally more than once per second,
2642 uptime
= time_uptime
;
2643 globexp
= ipfw_gd
.ipfw_track_globexp
;
2644 if (globexp
!= uptime
&&
2645 atomic_cmpset_long(&ipfw_gd
.ipfw_track_globexp
,
2649 /* Expire tracks on other CPUs. */
2650 for (cpu
= 0; cpu
< netisr_ncpus
; ++cpu
) {
2653 lwkt_send_ipiq(globaldata_find(cpu
),
2654 ipfw_track_expire_ipifunc
, NULL
);
2658 trk
= ipfw_trkcnt_alloc(ctx
);
2661 struct ipfw_trkcnt
*tkey
;
2663 tkey
= &ctx
->ipfw_trkcnt_tmpkey
;
2664 key
= NULL
; /* tkey overlaps key */
2666 tkey
->tc_key
= t
->t_key
;
2667 tkey
->tc_ruleid
= rule
->track_ruleid
;
2670 trk
= RB_FIND(ipfw_trkcnt_tree
, &ipfw_gd
.ipfw_trkcnt_tree
,
2675 ctx
->ipfw_tks_reap
++;
2676 if (ipfw_track_reap(ctx
) > 0) {
2677 if (ipfw_gd
.ipfw_trkcnt_cnt
<
2679 trk
= ipfw_trkcnt_alloc(ctx
);
2682 ctx
->ipfw_tks_cntnomem
++;
2684 ctx
->ipfw_tks_overflow
++;
2687 ctx
->ipfw_tks_reapfailed
++;
2688 ctx
->ipfw_tks_overflow
++;
2691 ctx
->ipfw_tks_cntnomem
++;
2696 KASSERT(trk
->tc_refs
> 0 && trk
->tc_refs
< netisr_ncpus
,
2697 ("invalid trkcnt refs %d", trk
->tc_refs
));
2698 atomic_add_int(&trk
->tc_refs
, 1);
2702 trk
->tc_key
= t
->t_key
;
2703 trk
->tc_ruleid
= rule
->track_ruleid
;
2707 trk
->tc_rulenum
= rule
->rulenum
;
2710 ret
= RB_INSERT(ipfw_trkcnt_tree
, &ipfw_gd
.ipfw_trkcnt_tree
,
2713 KASSERT(ret
->tc_refs
> 0 &&
2714 ret
->tc_refs
< netisr_ncpus
,
2715 ("invalid trkcnt refs %d", ret
->tc_refs
));
2716 KASSERT(ctx
->ipfw_trkcnt_spare
== NULL
,
2717 ("trkcnt spare was installed"));
2718 ctx
->ipfw_trkcnt_spare
= trk
;
2721 ipfw_gd
.ipfw_trkcnt_cnt
++;
2723 atomic_add_int(&trk
->tc_refs
, 1);
2726 t
->t_count
= &trk
->tc_count
;
2729 dup
= RB_INSERT(ipfw_track_tree
, &ctx
->ipfw_track_tree
, t
);
2731 panic("ipfw: track exists");
2732 TAILQ_INSERT_TAIL(&ctx
->ipfw_track_list
, t
, t_link
);
2734 t
->t_expire
= time_uptime
+ dyn_short_lifetime
;
2739 * Install state for rule type cmd->o.opcode
2741 * Returns NULL if state is not installed because of errors or because
2742 * states limitations are enforced.
2744 static struct ipfw_state
*
2745 ipfw_state_install(struct ipfw_context
*ctx
, struct ip_fw
*rule
,
2746 ipfw_insn_limit
*cmd
, struct ip_fw_args
*args
, const struct tcphdr
*tcp
)
2748 struct ipfw_state
*s
;
2749 struct ipfw_track
*t
;
2752 if (ipfw_gd
.ipfw_state_loosecnt
>= ipfw_state_max
&&
2753 (diff
= (ipfw_state_cntsync() - ipfw_state_max
)) >= 0) {
2754 boolean_t overflow
= TRUE
;
2756 ctx
->ipfw_sts_reap
++;
2757 if (ipfw_state_reap(ctx
, diff
) == 0)
2758 ctx
->ipfw_sts_reapfailed
++;
2759 if (ipfw_state_cntsync() < ipfw_state_max
)
2763 time_t globexp
, uptime
;
2767 * Do not expire globally more than once per second,
2770 uptime
= time_uptime
;
2771 globexp
= ipfw_gd
.ipfw_state_globexp
;
2772 if (globexp
== uptime
||
2773 !atomic_cmpset_long(&ipfw_gd
.ipfw_state_globexp
,
2775 ctx
->ipfw_sts_overflow
++;
2779 /* Expire states on other CPUs. */
2780 for (cpu
= 0; cpu
< netisr_ncpus
; ++cpu
) {
2783 lwkt_send_ipiq(globaldata_find(cpu
),
2784 ipfw_state_expire_ipifunc
, NULL
);
2786 ctx
->ipfw_sts_overflow
++;
2791 switch (cmd
->o
.opcode
) {
2792 case O_KEEP_STATE
: /* bidir rule */
2794 s
= ipfw_state_add(ctx
, &args
->f_id
, cmd
->o
.opcode
, rule
, NULL
,
2800 case O_LIMIT
: /* limit number of sessions */
2801 t
= ipfw_track_alloc(ctx
, &args
->f_id
, cmd
->limit_mask
, rule
);
2805 if (*t
->t_count
>= cmd
->conn_limit
) {
2806 if (!ipfw_track_state_expire(ctx
, t
, TRUE
))
2810 count
= *t
->t_count
;
2811 if (count
>= cmd
->conn_limit
)
2813 if (atomic_cmpset_int(t
->t_count
, count
, count
+ 1))
2817 s
= ipfw_state_add(ctx
, &args
->f_id
, O_LIMIT
, rule
, t
, tcp
);
2820 atomic_subtract_int(t
->t_count
, 1);
2826 panic("unknown state type %u\n", cmd
->o
.opcode
);
2829 if (s
->st_type
== O_REDIRECT
) {
2830 struct ipfw_xlat
*x
= (struct ipfw_xlat
*)s
;
2831 ipfw_insn_rdr
*r
= (ipfw_insn_rdr
*)cmd
;
2833 x
->xlat_addr
= r
->addr
.s_addr
;
2834 x
->xlat_port
= r
->port
;
2835 x
->xlat_ifp
= args
->m
->m_pkthdr
.rcvif
;
2836 x
->xlat_dir
= MATCH_FORWARD
;
2837 KKASSERT(x
->xlat_ifp
!= NULL
);
2843 ipfw_table_lookup(struct ipfw_context
*ctx
, uint16_t tableid
,
2844 const struct in_addr
*in
)
2846 struct radix_node_head
*rnh
;
2847 struct sockaddr_in sin
;
2848 struct ipfw_tblent
*te
;
2850 KASSERT(tableid
< ipfw_table_max
, ("invalid tableid %u", tableid
));
2851 rnh
= ctx
->ipfw_tables
[tableid
];
2853 return (0); /* no match */
2855 memset(&sin
, 0, sizeof(sin
));
2856 sin
.sin_family
= AF_INET
;
2857 sin
.sin_len
= sizeof(sin
);
2860 te
= (struct ipfw_tblent
*)rnh
->rnh_matchaddr(&sin
, rnh
);
2862 return (0); /* no match */
2865 te
->te_lastuse
= time_second
;
2866 return (1); /* match */
2870 * Transmit a TCP packet, containing either a RST or a keepalive.
2871 * When flags & TH_RST, we are sending a RST packet, because of a
2872 * "reset" action matched the packet.
2873 * Otherwise we are sending a keepalive, and flags & TH_
2875 * Only {src,dst}_{ip,port} of "id" are used.
2878 send_pkt(const struct ipfw_flow_id
*id
, uint32_t seq
, uint32_t ack
, int flags
)
2883 struct route sro
; /* fake route */
2885 MGETHDR(m
, M_NOWAIT
, MT_HEADER
);
2888 m
->m_pkthdr
.rcvif
= NULL
;
2889 m
->m_pkthdr
.len
= m
->m_len
= sizeof(struct ip
) + sizeof(struct tcphdr
);
2890 m
->m_data
+= max_linkhdr
;
2892 ip
= mtod(m
, struct ip
*);
2893 bzero(ip
, m
->m_len
);
2894 tcp
= (struct tcphdr
*)(ip
+ 1); /* no IP options */
2895 ip
->ip_p
= IPPROTO_TCP
;
2899 * Assume we are sending a RST (or a keepalive in the reverse
2900 * direction), swap src and destination addresses and ports.
2902 ip
->ip_src
.s_addr
= htonl(id
->dst_ip
);
2903 ip
->ip_dst
.s_addr
= htonl(id
->src_ip
);
2904 tcp
->th_sport
= htons(id
->dst_port
);
2905 tcp
->th_dport
= htons(id
->src_port
);
2906 if (flags
& TH_RST
) { /* we are sending a RST */
2907 if (flags
& TH_ACK
) {
2908 tcp
->th_seq
= htonl(ack
);
2909 tcp
->th_ack
= htonl(0);
2910 tcp
->th_flags
= TH_RST
;
2914 tcp
->th_seq
= htonl(0);
2915 tcp
->th_ack
= htonl(seq
);
2916 tcp
->th_flags
= TH_RST
| TH_ACK
;
2920 * We are sending a keepalive. flags & TH_SYN determines
2921 * the direction, forward if set, reverse if clear.
2922 * NOTE: seq and ack are always assumed to be correct
2923 * as set by the caller. This may be confusing...
2925 if (flags
& TH_SYN
) {
2927 * we have to rewrite the correct addresses!
2929 ip
->ip_dst
.s_addr
= htonl(id
->dst_ip
);
2930 ip
->ip_src
.s_addr
= htonl(id
->src_ip
);
2931 tcp
->th_dport
= htons(id
->dst_port
);
2932 tcp
->th_sport
= htons(id
->src_port
);
2934 tcp
->th_seq
= htonl(seq
);
2935 tcp
->th_ack
= htonl(ack
);
2936 tcp
->th_flags
= TH_ACK
;
2940 * set ip_len to the payload size so we can compute
2941 * the tcp checksum on the pseudoheader
2942 * XXX check this, could save a couple of words ?
2944 ip
->ip_len
= htons(sizeof(struct tcphdr
));
2945 tcp
->th_sum
= in_cksum(m
, m
->m_pkthdr
.len
);
2948 * now fill fields left out earlier
2950 ip
->ip_ttl
= ip_defttl
;
2951 ip
->ip_len
= htons(m
->m_pkthdr
.len
);
2953 bzero(&sro
, sizeof(sro
));
2954 ip_rtaddr(ip
->ip_dst
, &sro
);
2956 m
->m_pkthdr
.fw_flags
|= IPFW_MBUF_GENERATED
;
2957 ip_output(m
, NULL
, &sro
, 0, NULL
, NULL
);
2963 * Send a reject message, consuming the mbuf passed as an argument.
2966 send_reject(struct ip_fw_args
*args
, int code
, int offset
, int ip_len
)
2968 if (code
!= ICMP_REJECT_RST
) { /* Send an ICMP unreach */
2969 /* IP header is always left in network order */
2970 icmp_error(args
->m
, ICMP_UNREACH
, code
, 0L, 0);
2971 } else if (offset
== 0 && args
->f_id
.proto
== IPPROTO_TCP
) {
2972 struct tcphdr
*const tcp
=
2973 L3HDR(struct tcphdr
, mtod(args
->m
, struct ip
*));
2975 if ((tcp
->th_flags
& TH_RST
) == 0) {
2976 send_pkt(&args
->f_id
, ntohl(tcp
->th_seq
),
2977 ntohl(tcp
->th_ack
), tcp
->th_flags
| TH_RST
);
2987 * Given an ip_fw *, lookup_next_rule will return a pointer
2988 * to the next rule, which can be either the jump
2989 * target (for skipto instructions) or the next one in the list (in
2990 * all other cases including a missing jump target).
2991 * The result is also written in the "next_rule" field of the rule.
2992 * Backward jumps are not allowed, so start looking from the next
2995 * This never returns NULL -- in case we do not have an exact match,
2996 * the next rule is returned. When the ruleset is changed,
2997 * pointers are flushed so we are always correct.
2999 static struct ip_fw
*
3000 lookup_next_rule(struct ip_fw
*me
)
3002 struct ip_fw
*rule
= NULL
;
3005 /* look for action, in case it is a skipto */
3006 cmd
= ACTION_PTR(me
);
3007 if (cmd
->opcode
== O_LOG
)
3009 if (cmd
->opcode
== O_SKIPTO
) {
3010 for (rule
= me
->next
; rule
; rule
= rule
->next
) {
3011 if (rule
->rulenum
>= cmd
->arg1
)
3015 if (rule
== NULL
) /* failure or not a skipto */
3017 me
->next_rule
= rule
;
3022 ipfw_match_uid(const struct ipfw_flow_id
*fid
, struct ifnet
*oif
,
3023 enum ipfw_opcodes opcode
, uid_t uid
)
3025 struct in_addr src_ip
, dst_ip
;
3026 struct inpcbinfo
*pi
;
3030 if (fid
->proto
== IPPROTO_TCP
) {
3032 pi
= &tcbinfo
[mycpuid
];
3033 } else if (fid
->proto
== IPPROTO_UDP
) {
3035 pi
= &udbinfo
[mycpuid
];
3041 * Values in 'fid' are in host byte order
3043 dst_ip
.s_addr
= htonl(fid
->dst_ip
);
3044 src_ip
.s_addr
= htonl(fid
->src_ip
);
3046 pcb
= in_pcblookup_hash(pi
,
3047 dst_ip
, htons(fid
->dst_port
),
3048 src_ip
, htons(fid
->src_port
),
3051 pcb
= in_pcblookup_hash(pi
,
3052 src_ip
, htons(fid
->src_port
),
3053 dst_ip
, htons(fid
->dst_port
),
3056 if (pcb
== NULL
|| pcb
->inp_socket
== NULL
)
3059 if (opcode
== O_UID
) {
3060 #define socheckuid(a,b) ((a)->so_cred->cr_uid != (b))
3061 return !socheckuid(pcb
->inp_socket
, uid
);
3064 return groupmember(uid
, pcb
->inp_socket
->so_cred
);
3069 ipfw_match_ifip(ipfw_insn_ifip
*cmd
, const struct in_addr
*ip
)
3072 if (__predict_false((cmd
->o
.arg1
& IPFW_IFIP_VALID
) == 0)) {
3073 struct ifaddr_container
*ifac
;
3076 ifp
= ifunit_netisr(cmd
->ifname
);
3080 TAILQ_FOREACH(ifac
, &ifp
->if_addrheads
[mycpuid
], ifa_link
) {
3081 struct ifaddr
*ia
= ifac
->ifa
;
3083 if (ia
->ifa_addr
== NULL
)
3085 if (ia
->ifa_addr
->sa_family
!= AF_INET
)
3088 cmd
->mask
.s_addr
= INADDR_ANY
;
3089 if (cmd
->o
.arg1
& IPFW_IFIP_NET
) {
3090 cmd
->mask
= ((struct sockaddr_in
*)
3091 ia
->ifa_netmask
)->sin_addr
;
3093 if (cmd
->mask
.s_addr
== INADDR_ANY
)
3094 cmd
->mask
.s_addr
= INADDR_BROADCAST
;
3097 ((struct sockaddr_in
*)ia
->ifa_addr
)->sin_addr
;
3098 cmd
->addr
.s_addr
&= cmd
->mask
.s_addr
;
3100 cmd
->o
.arg1
|= IPFW_IFIP_VALID
;
3103 if ((cmd
->o
.arg1
& IPFW_IFIP_VALID
) == 0)
3106 return ((ip
->s_addr
& cmd
->mask
.s_addr
) == cmd
->addr
.s_addr
);
3110 ipfw_xlate(const struct ipfw_xlat
*x
, struct mbuf
*m
,
3111 struct in_addr
*old_addr
, uint16_t *old_port
)
3113 struct ip
*ip
= mtod(m
, struct ip
*);
3114 struct in_addr
*addr
;
3115 uint16_t *port
, *csum
, dlen
= 0;
3117 boolean_t pseudo
= FALSE
;
3119 if (x
->xlat_flags
& IPFW_STATE_F_XLATSRC
) {
3123 port
= &L3HDR(struct tcphdr
, ip
)->th_sport
;
3124 csum
= &L3HDR(struct tcphdr
, ip
)->th_sum
;
3127 port
= &L3HDR(struct udphdr
, ip
)->uh_sport
;
3128 csum
= &L3HDR(struct udphdr
, ip
)->uh_sum
;
3132 panic("ipfw: unsupported src xlate proto %u", ip
->ip_p
);
3138 port
= &L3HDR(struct tcphdr
, ip
)->th_dport
;
3139 csum
= &L3HDR(struct tcphdr
, ip
)->th_sum
;
3142 port
= &L3HDR(struct udphdr
, ip
)->uh_dport
;
3143 csum
= &L3HDR(struct udphdr
, ip
)->uh_sum
;
3147 panic("ipfw: unsupported dst xlate proto %u", ip
->ip_p
);
3150 if (old_addr
!= NULL
)
3152 if (old_port
!= NULL
) {
3153 if (x
->xlat_port
!= 0)
3159 if (m
->m_pkthdr
.csum_flags
& (CSUM_UDP
| CSUM_TCP
| CSUM_TSO
)) {
3160 if ((m
->m_pkthdr
.csum_flags
& CSUM_TSO
) == 0)
3161 dlen
= ntohs(ip
->ip_len
) - (ip
->ip_hl
<< 2);
3166 const uint16_t *oaddr
, *naddr
;
3168 oaddr
= (const uint16_t *)&addr
->s_addr
;
3169 naddr
= (const uint16_t *)&x
->xlat_addr
;
3171 ip
->ip_sum
= pfil_cksum_fixup(pfil_cksum_fixup(ip
->ip_sum
,
3172 oaddr
[0], naddr
[0], 0), oaddr
[1], naddr
[1], 0);
3173 *csum
= pfil_cksum_fixup(pfil_cksum_fixup(*csum
,
3174 oaddr
[0], naddr
[0], udp
), oaddr
[1], naddr
[1], udp
);
3176 addr
->s_addr
= x
->xlat_addr
;
3178 if (x
->xlat_port
!= 0) {
3180 *csum
= pfil_cksum_fixup(*csum
, *port
, x
->xlat_port
,
3183 *port
= x
->xlat_port
;
3187 *csum
= in_pseudo(ip
->ip_src
.s_addr
, ip
->ip_dst
.s_addr
,
3188 htons(dlen
+ ip
->ip_p
));
3193 ipfw_ip_xlate_dispatch(netmsg_t nmsg
)
3195 struct netmsg_genpkt
*nm
= (struct netmsg_genpkt
*)nmsg
;
3196 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
3197 struct mbuf
*m
= nm
->m
;
3198 struct ipfw_xlat
*x
= nm
->arg1
;
3199 struct ip_fw
*rule
= x
->xlat_rule
;
3201 ASSERT_NETISR_NCPUS(mycpuid
);
3202 KASSERT(rule
->cpuid
== mycpuid
,
3203 ("rule does not belong to cpu%d", mycpuid
));
3204 KASSERT(m
->m_pkthdr
.fw_flags
& IPFW_MBUF_CONTINUE
,
3205 ("mbuf does not have ipfw continue rule"));
3207 KASSERT(ctx
->ipfw_cont_rule
== NULL
,
3208 ("pending ipfw continue rule"));
3209 KASSERT(ctx
->ipfw_cont_xlat
== NULL
,
3210 ("pending ipfw continue xlat"));
3211 ctx
->ipfw_cont_rule
= rule
;
3212 ctx
->ipfw_cont_xlat
= x
;
3217 ip_output(m
, NULL
, NULL
, IP_FORWARDING
, NULL
, NULL
);
3219 /* May not be cleared, if ipfw was unload/disabled. */
3220 ctx
->ipfw_cont_rule
= NULL
;
3221 ctx
->ipfw_cont_xlat
= NULL
;
3224 * This state is no longer used; decrement its xlat_crefs,
3225 * so this state can be deleted.
3229 * This rule is no longer used; decrement its cross_refs,
3230 * so this rule can be deleted.
3233 * Decrement cross_refs in the last step of this function,
3234 * so that the module could be unloaded safely.
3240 ipfw_xlate_redispatch(struct mbuf
*m
, int cpuid
, struct ipfw_xlat
*x
,
3243 struct netmsg_genpkt
*nm
;
3245 KASSERT(x
->xlat_pcpu
== cpuid
, ("xlat paired cpu%d, target cpu%d",
3246 x
->xlat_pcpu
, cpuid
));
3249 * Bump cross_refs to prevent this rule and its siblings
3250 * from being deleted, while this mbuf is inflight. The
3251 * cross_refs of the sibling rule on the target cpu will
3252 * be decremented, once this mbuf is going to be filtered
3253 * on the target cpu.
3255 x
->xlat_rule
->cross_refs
++;
3257 * Bump xlat_crefs to prevent this state and its paired
3258 * state from being deleted, while this mbuf is inflight.
3259 * The xlat_crefs of the paired state on the target cpu
3260 * will be decremented, once this mbuf is going to be
3261 * filtered on the target cpu.
3265 m
->m_pkthdr
.fw_flags
|= IPFW_MBUF_CONTINUE
;
3266 if (flags
& IPFW_XLATE_INSERT
)
3267 m
->m_pkthdr
.fw_flags
|= IPFW_MBUF_XLATINS
;
3268 if (flags
& IPFW_XLATE_FORWARD
)
3269 m
->m_pkthdr
.fw_flags
|= IPFW_MBUF_XLATFWD
;
3272 * NOTE: We always leave ip_len and ip_off in network
3273 * order across all network layers.
3275 nm
= &m
->m_hdr
.mh_genmsg
;
3276 netmsg_init(&nm
->base
, NULL
, &netisr_apanic_rport
, 0,
3277 ipfw_ip_xlate_dispatch
);
3279 nm
->arg1
= x
->xlat_pair
;
3281 if (flags
& IPFW_XLATE_OUTPUT
)
3283 netisr_sendmsg(&nm
->base
, cpuid
);
3286 static struct mbuf
*
3287 ipfw_setup_local(struct mbuf
*m
, const int hlen
, struct ip_fw_args
*args
,
3288 struct ip_fw_local
*local
, struct ip
**ip0
)
3290 struct ip
*ip
= mtod(m
, struct ip
*);
3295 * Collect parameters into local variables for faster matching.
3297 if (hlen
== 0) { /* do not grab addresses for non-ip pkts */
3298 local
->proto
= args
->f_id
.proto
= 0; /* mark f_id invalid */
3302 local
->proto
= args
->f_id
.proto
= ip
->ip_p
;
3303 local
->src_ip
= ip
->ip_src
;
3304 local
->dst_ip
= ip
->ip_dst
;
3305 local
->offset
= ntohs(ip
->ip_off
) & IP_OFFMASK
;
3306 local
->ip_len
= ntohs(ip
->ip_len
);
3308 #define PULLUP_TO(len) \
3310 if (m->m_len < (len)) { \
3311 args->m = m = m_pullup(m, (len)); \
3316 ip = mtod(m, struct ip *); \
3320 if (local
->offset
== 0) {
3321 switch (local
->proto
) {
3323 PULLUP_TO(hlen
+ sizeof(struct tcphdr
));
3324 local
->tcp
= tcp
= L3HDR(struct tcphdr
, ip
);
3325 local
->dst_port
= tcp
->th_dport
;
3326 local
->src_port
= tcp
->th_sport
;
3327 args
->f_id
.flags
= tcp
->th_flags
;
3331 PULLUP_TO(hlen
+ sizeof(struct udphdr
));
3332 udp
= L3HDR(struct udphdr
, ip
);
3333 local
->dst_port
= udp
->uh_dport
;
3334 local
->src_port
= udp
->uh_sport
;
3338 PULLUP_TO(hlen
+ 4); /* type, code and checksum. */
3339 args
->f_id
.flags
= L3HDR(struct icmp
, ip
)->icmp_type
;
3349 args
->f_id
.src_ip
= ntohl(local
->src_ip
.s_addr
);
3350 args
->f_id
.dst_ip
= ntohl(local
->dst_ip
.s_addr
);
3351 args
->f_id
.src_port
= local
->src_port
= ntohs(local
->src_port
);
3352 args
->f_id
.dst_port
= local
->dst_port
= ntohs(local
->dst_port
);
3358 static struct mbuf
*
3359 ipfw_rehashm(struct mbuf
*m
, const int hlen
, struct ip_fw_args
*args
,
3360 struct ip_fw_local
*local
, struct ip
**ip0
)
3362 m
->m_flags
&= ~M_HASH
;
3369 KASSERT(m
->m_flags
& M_HASH
, ("no hash"));
3371 /* 'm' might be changed by ip_hashfn(). */
3372 return (ipfw_setup_local(m
, hlen
, args
, local
, ip0
));
3376 * The main check routine for the firewall.
3378 * All arguments are in args so we can modify them and return them
3379 * back to the caller.
3383 * args->m (in/out) The packet; we set to NULL when/if we nuke it.
3384 * Starts with the IP header.
3385 * args->eh (in) Mac header if present, or NULL for layer3 packet.
3386 * args->oif Outgoing interface, or NULL if packet is incoming.
3387 * The incoming interface is in the mbuf. (in)
3389 * args->rule Pointer to the last matching rule (in/out)
3390 * args->f_id Addresses grabbed from the packet (out)
3394 * If the packet was denied/rejected and has been dropped, *m is equal
3395 * to NULL upon return.
3397 * IP_FW_DENY the packet must be dropped.
3398 * IP_FW_PASS The packet is to be accepted and routed normally.
3399 * IP_FW_DIVERT Divert the packet to port (args->cookie)
3400 * IP_FW_TEE Tee the packet to port (args->cookie)
3401 * IP_FW_DUMMYNET Send the packet to pipe/queue (args->cookie)
3402 * IP_FW_CONTINUE Continue processing on another cpu.
3405 ipfw_chk(struct ip_fw_args
*args
)
3408 * Local variables hold state during the processing of a packet.
3410 * IMPORTANT NOTE: to speed up the processing of rules, there
3411 * are some assumption on the values of the variables, which
3412 * are documented here. Should you change them, please check
3413 * the implementation of the various instructions to make sure
3414 * that they still work.
3416 * args->eh The MAC header. It is non-null for a layer2
3417 * packet, it is NULL for a layer-3 packet.
3419 * m | args->m Pointer to the mbuf, as received from the caller.
3420 * It may change if ipfw_chk() does an m_pullup, or if it
3421 * consumes the packet because it calls send_reject().
3422 * XXX This has to change, so that ipfw_chk() never modifies
3423 * or consumes the buffer.
3424 * ip is simply an alias of the value of m, and it is kept
3425 * in sync with it (the packet is supposed to start with
3428 struct mbuf
*m
= args
->m
;
3429 struct ip
*ip
= mtod(m
, struct ip
*);
3432 * oif | args->oif If NULL, ipfw_chk has been called on the
3433 * inbound path (ether_input, ip_input).
3434 * If non-NULL, ipfw_chk has been called on the outbound path
3435 * (ether_output, ip_output).
3437 struct ifnet
*oif
= args
->oif
;
3439 struct ip_fw
*f
= NULL
; /* matching rule */
3440 int retval
= IP_FW_PASS
;
3442 struct divert_info
*divinfo
;
3443 struct ipfw_state
*s
;
3446 * hlen The length of the IPv4 header.
3447 * hlen >0 means we have an IPv4 packet.
3449 u_int hlen
= 0; /* hlen >0 means we have an IP pkt */
3451 struct ip_fw_local lc
;
3454 * dyn_dir = MATCH_UNKNOWN when rules unchecked,
3455 * MATCH_NONE when checked and not matched (dyn_f = NULL),
3456 * MATCH_FORWARD or MATCH_REVERSE otherwise (dyn_f != NULL)
3458 int dyn_dir
= MATCH_UNKNOWN
;
3459 struct ip_fw
*dyn_f
= NULL
;
3460 int cpuid
= mycpuid
;
3461 struct ipfw_context
*ctx
;
3463 ASSERT_NETISR_NCPUS(cpuid
);
3464 ctx
= ipfw_ctx
[cpuid
];
3466 if (m
->m_pkthdr
.fw_flags
& IPFW_MBUF_GENERATED
)
3467 return IP_FW_PASS
; /* accept */
3469 if (args
->eh
== NULL
|| /* layer 3 packet */
3470 (m
->m_pkthdr
.len
>= sizeof(struct ip
) &&
3471 ntohs(args
->eh
->ether_type
) == ETHERTYPE_IP
))
3472 hlen
= ip
->ip_hl
<< 2;
3474 memset(&lc
, 0, sizeof(lc
));
3476 m
= ipfw_setup_local(m
, hlen
, args
, &lc
, &ip
);
3482 * Packet has already been tagged. Look for the next rule
3483 * to restart processing.
3485 * If fw_one_pass != 0 then just accept it.
3486 * XXX should not happen here, but optimized out in
3489 if (fw_one_pass
&& (args
->flags
& IP_FWARG_F_CONT
) == 0)
3491 args
->flags
&= ~IP_FWARG_F_CONT
;
3493 /* This rule is being/has been flushed */
3497 KASSERT(args
->rule
->cpuid
== cpuid
,
3498 ("rule used on cpu%d", cpuid
));
3500 /* This rule was deleted */
3501 if (args
->rule
->rule_flags
& IPFW_RULE_F_INVALID
)
3504 if (args
->xlat
!= NULL
) {
3505 struct ipfw_xlat
*x
= args
->xlat
;
3507 /* This xlat is being deleted. */
3508 if (x
->xlat_invalid
)
3514 dyn_dir
= (args
->flags
& IP_FWARG_F_XLATFWD
) ?
3515 MATCH_FORWARD
: MATCH_REVERSE
;
3517 if (args
->flags
& IP_FWARG_F_XLATINS
) {
3518 KASSERT(x
->xlat_flags
& IPFW_STATE_F_XLATSLAVE
,
3519 ("not slave %u state", x
->xlat_type
));
3520 s
= ipfw_state_link(ctx
, &x
->xlat_st
);
3522 ctx
->ipfw_xlate_conflicts
++;
3523 if (IPFW_STATE_ISDEAD(s
)) {
3524 ipfw_state_remove(ctx
, s
);
3525 s
= ipfw_state_link(ctx
,
3532 "conflicts %u state\n",
3536 ipfw_xlat_invalidate(x
);
3539 ctx
->ipfw_xlate_cresolved
++;
3542 ipfw_state_update(&args
->f_id
, dyn_dir
,
3543 lc
.tcp
, &x
->xlat_st
);
3546 /* TODO: setup dyn_f, dyn_dir */
3548 f
= args
->rule
->next_rule
;
3550 f
= lookup_next_rule(args
->rule
);
3554 * Find the starting rule. It can be either the first
3555 * one, or the one after divert_rule if asked so.
3559 KKASSERT((args
->flags
&
3560 (IP_FWARG_F_XLATINS
| IP_FWARG_F_CONT
)) == 0);
3561 KKASSERT(args
->xlat
== NULL
);
3563 mtag
= m_tag_find(m
, PACKET_TAG_IPFW_DIVERT
, NULL
);
3565 divinfo
= m_tag_data(mtag
);
3566 skipto
= divinfo
->skipto
;
3571 f
= ctx
->ipfw_layer3_chain
;
3572 if (args
->eh
== NULL
&& skipto
!= 0) {
3573 /* No skipto during rule flushing */
3577 if (skipto
>= IPFW_DEFAULT_RULE
)
3578 return IP_FW_DENY
; /* invalid */
3580 while (f
&& f
->rulenum
<= skipto
)
3582 if (f
== NULL
) /* drop packet */
3584 } else if (ipfw_flushing
) {
3585 /* Rules are being flushed; skip to default rule */
3586 f
= ctx
->ipfw_default_rule
;
3589 if ((mtag
= m_tag_find(m
, PACKET_TAG_IPFW_DIVERT
, NULL
)) != NULL
)
3590 m_tag_delete(m
, mtag
);
3593 * Now scan the rules, and parse microinstructions for each rule.
3595 for (; f
; f
= f
->next
) {
3598 int skip_or
; /* skip rest of OR block */
3601 if (ctx
->ipfw_set_disable
& (1 << f
->set
)) {
3606 if (args
->xlat
!= NULL
) {
3608 l
= f
->cmd_len
- f
->act_ofs
;
3609 cmd
= ACTION_PTR(f
);
3616 for (; l
> 0; l
-= cmdlen
, cmd
+= cmdlen
) {
3620 * check_body is a jump target used when we find a
3621 * CHECK_STATE, and need to jump to the body of
3625 cmdlen
= F_LEN(cmd
);
3627 * An OR block (insn_1 || .. || insn_n) has the
3628 * F_OR bit set in all but the last instruction.
3629 * The first match will set "skip_or", and cause
3630 * the following instructions to be skipped until
3631 * past the one with the F_OR bit clear.
3633 if (skip_or
) { /* skip this instruction */
3634 if ((cmd
->len
& F_OR
) == 0)
3635 skip_or
= 0; /* next one is good */
3638 match
= 0; /* set to 1 if we succeed */
3640 switch (cmd
->opcode
) {
3642 * The first set of opcodes compares the packet's
3643 * fields with some pattern, setting 'match' if a
3644 * match is found. At the end of the loop there is
3645 * logic to deal with F_NOT and F_OR flags associated
3653 kprintf("ipfw: opcode %d unimplemented\n",
3660 * We only check offset == 0 && proto != 0,
3661 * as this ensures that we have an IPv4
3662 * packet with the ports info.
3667 match
= ipfw_match_uid(&args
->f_id
, oif
,
3669 (uid_t
)((ipfw_insn_u32
*)cmd
)->d
[0]);
3673 match
= iface_match(m
->m_pkthdr
.rcvif
,
3674 (ipfw_insn_if
*)cmd
);
3678 match
= iface_match(oif
, (ipfw_insn_if
*)cmd
);
3682 match
= iface_match(oif
? oif
:
3683 m
->m_pkthdr
.rcvif
, (ipfw_insn_if
*)cmd
);
3687 if (args
->eh
!= NULL
) { /* have MAC header */
3688 uint32_t *want
= (uint32_t *)
3689 ((ipfw_insn_mac
*)cmd
)->addr
;
3690 uint32_t *mask
= (uint32_t *)
3691 ((ipfw_insn_mac
*)cmd
)->mask
;
3692 uint32_t *hdr
= (uint32_t *)args
->eh
;
3695 (want
[0] == (hdr
[0] & mask
[0]) &&
3696 want
[1] == (hdr
[1] & mask
[1]) &&
3697 want
[2] == (hdr
[2] & mask
[2]));
3702 if (args
->eh
!= NULL
) {
3704 ntohs(args
->eh
->ether_type
);
3706 ((ipfw_insn_u16
*)cmd
)->ports
;
3709 /* Special vlan handling */
3710 if (m
->m_flags
& M_VLANTAG
)
3713 for (i
= cmdlen
- 1; !match
&& i
> 0;
3716 (t
>= p
[0] && t
<= p
[1]);
3722 match
= (hlen
> 0 && lc
.offset
!= 0);
3729 off
= ntohs(ip
->ip_off
);
3730 if (off
& (IP_MF
| IP_OFFMASK
))
3735 case O_IN
: /* "out" is "not in" */
3736 match
= (oif
== NULL
);
3740 match
= (args
->eh
!= NULL
);
3745 * We do not allow an arg of 0 so the
3746 * check of "proto" only suffices.
3748 match
= (lc
.proto
== cmd
->arg1
);
3752 match
= (hlen
> 0 &&
3753 ((ipfw_insn_ip
*)cmd
)->addr
.s_addr
==
3758 match
= (hlen
> 0 &&
3759 ((ipfw_insn_ip
*)cmd
)->addr
.s_addr
==
3761 ((ipfw_insn_ip
*)cmd
)->mask
.s_addr
));
3768 tif
= INADDR_TO_IFP(&lc
.src_ip
);
3769 match
= (tif
!= NULL
);
3773 case O_IP_SRC_TABLE
:
3774 match
= ipfw_table_lookup(ctx
, cmd
->arg1
,
3779 match
= ipfw_match_ifip((ipfw_insn_ifip
*)cmd
,
3786 uint32_t *d
= (uint32_t *)(cmd
+ 1);
3788 cmd
->opcode
== O_IP_DST_SET
?
3794 addr
-= d
[0]; /* subtract base */
3796 (addr
< cmd
->arg1
) &&
3797 (d
[1 + (addr
>> 5)] &
3798 (1 << (addr
& 0x1f)));
3803 match
= (hlen
> 0 &&
3804 ((ipfw_insn_ip
*)cmd
)->addr
.s_addr
==
3809 match
= (hlen
> 0) &&
3810 (((ipfw_insn_ip
*)cmd
)->addr
.s_addr
==
3812 ((ipfw_insn_ip
*)cmd
)->mask
.s_addr
));
3819 tif
= INADDR_TO_IFP(&lc
.dst_ip
);
3820 match
= (tif
!= NULL
);
3824 case O_IP_DST_TABLE
:
3825 match
= ipfw_table_lookup(ctx
, cmd
->arg1
,
3830 match
= ipfw_match_ifip((ipfw_insn_ifip
*)cmd
,
3837 * offset == 0 && proto != 0 is enough
3838 * to guarantee that we have an IPv4
3839 * packet with port info.
3841 if ((lc
.proto
==IPPROTO_UDP
||
3842 lc
.proto
==IPPROTO_TCP
)
3843 && lc
.offset
== 0) {
3845 (cmd
->opcode
== O_IP_SRCPORT
) ?
3846 lc
.src_port
: lc
.dst_port
;
3848 ((ipfw_insn_u16
*)cmd
)->ports
;
3851 for (i
= cmdlen
- 1; !match
&& i
> 0;
3854 (x
>= p
[0] && x
<= p
[1]);
3860 match
= (lc
.offset
== 0 &&
3861 lc
.proto
==IPPROTO_ICMP
&&
3862 icmpcode_match(ip
, (ipfw_insn_u32
*)cmd
));
3866 match
= (lc
.offset
== 0 &&
3867 lc
.proto
==IPPROTO_ICMP
&&
3868 icmptype_match(ip
, (ipfw_insn_u32
*)cmd
));
3872 match
= (hlen
> 0 && ipopts_match(ip
, cmd
));
3876 match
= (hlen
> 0 && cmd
->arg1
== ip
->ip_v
);
3880 match
= (hlen
> 0 && cmd
->arg1
== ip
->ip_ttl
);
3884 match
= (hlen
> 0 &&
3885 cmd
->arg1
== ntohs(ip
->ip_id
));
3889 match
= (hlen
> 0 && cmd
->arg1
== lc
.ip_len
);
3892 case O_IPPRECEDENCE
:
3893 match
= (hlen
> 0 &&
3894 (cmd
->arg1
== (ip
->ip_tos
& 0xe0)));
3898 match
= (hlen
> 0 &&
3899 flags_match(cmd
, ip
->ip_tos
));
3903 match
= (lc
.proto
== IPPROTO_TCP
&&
3906 L3HDR(struct tcphdr
,ip
)->th_flags
));
3910 match
= (lc
.proto
== IPPROTO_TCP
&&
3911 lc
.offset
== 0 && tcpopts_match(ip
, cmd
));
3915 match
= (lc
.proto
== IPPROTO_TCP
&&
3917 ((ipfw_insn_u32
*)cmd
)->d
[0] ==
3918 L3HDR(struct tcphdr
,ip
)->th_seq
);
3922 match
= (lc
.proto
== IPPROTO_TCP
&&
3924 ((ipfw_insn_u32
*)cmd
)->d
[0] ==
3925 L3HDR(struct tcphdr
,ip
)->th_ack
);
3929 match
= (lc
.proto
== IPPROTO_TCP
&&
3932 L3HDR(struct tcphdr
,ip
)->th_win
);
3936 /* reject packets which have SYN only */
3937 /* XXX should i also check for TH_ACK ? */
3938 match
= (lc
.proto
== IPPROTO_TCP
&&
3940 (L3HDR(struct tcphdr
,ip
)->th_flags
&
3941 (TH_RST
| TH_ACK
| TH_SYN
)) != TH_SYN
);
3946 ipfw_log(ctx
, f
, hlen
, args
->eh
, m
,
3953 match
= (krandom() <
3954 ((ipfw_insn_u32
*)cmd
)->d
[0]);
3958 * The second set of opcodes represents 'actions',
3959 * i.e. the terminal part of a rule once the packet
3960 * matches all previous patterns.
3961 * Typically there is only one action for each rule,
3962 * and the opcode is stored at the end of the rule
3963 * (but there are exceptions -- see below).
3965 * In general, here we set retval and terminate the
3966 * outer loop (would be a 'break 3' in some language,
3967 * but we need to do a 'goto done').
3970 * O_COUNT and O_SKIPTO actions:
3971 * instead of terminating, we jump to the next rule
3972 * ('goto next_rule', equivalent to a 'break 2'),
3973 * or to the SKIPTO target ('goto again' after
3974 * having set f, cmd and l), respectively.
3976 * O_LIMIT and O_KEEP_STATE, O_REDIRECT: these opcodes
3977 * are not real 'actions', and are stored right
3978 * before the 'action' part of the rule.
3979 * These opcodes try to install an entry in the
3980 * state tables; if successful, we continue with
3981 * the next opcode (match=1; break;), otherwise
3982 * the packet must be dropped ('goto done' after
3983 * setting retval). If static rules are changed
3984 * during the state installation, the packet will
3985 * be dropped and rule's stats will not beupdated
3986 * ('return IP_FW_DENY').
3988 * O_PROBE_STATE and O_CHECK_STATE: these opcodes
3989 * cause a lookup of the state table, and a jump
3990 * to the 'action' part of the parent rule
3991 * ('goto check_body') if an entry is found, or
3992 * (CHECK_STATE only) a jump to the next rule if
3993 * the entry is not found ('goto next_rule').
3994 * The result of the lookup is cached to make
3995 * further instances of these opcodes are
3996 * effectively NOPs. If static rules are changed
3997 * during the state looking up, the packet will
3998 * be dropped and rule's stats will not be updated
3999 * ('return IP_FW_DENY').
4002 if (f
->cross_rules
== NULL
) {
4004 * This rule was not completely setup;
4005 * move on to the next rule.
4010 * Apply redirect only on input path and
4011 * only to non-fragment TCP segments or
4014 * Does _not_ work with layer2 filtering.
4016 if (oif
!= NULL
|| args
->eh
!= NULL
||
4017 (ip
->ip_off
& htons(IP_MF
| IP_OFFMASK
)) ||
4018 (lc
.proto
!= IPPROTO_TCP
&&
4019 lc
.proto
!= IPPROTO_UDP
))
4026 s
= ipfw_state_install(ctx
, f
,
4027 (ipfw_insn_limit
*)cmd
, args
, lc
.tcp
);
4029 retval
= IP_FW_DENY
;
4030 goto done
; /* error/limit violation */
4033 s
->st_bcnt
+= lc
.ip_len
;
4035 if (s
->st_type
== O_REDIRECT
) {
4036 struct in_addr oaddr
;
4038 struct ipfw_xlat
*slave_x
, *x
;
4039 struct ipfw_state
*dup
;
4041 x
= (struct ipfw_xlat
*)s
;
4042 ipfw_xlate(x
, m
, &oaddr
, &oport
);
4043 m
= ipfw_rehashm(m
, hlen
, args
, &lc
,
4046 ipfw_state_del(ctx
, s
);
4050 cpuid
= netisr_hashcpu(
4053 slave_x
= (struct ipfw_xlat
*)
4054 ipfw_state_alloc(ctx
, &args
->f_id
,
4055 O_REDIRECT
, f
->cross_rules
[cpuid
],
4057 if (slave_x
== NULL
) {
4058 ipfw_state_del(ctx
, s
);
4059 retval
= IP_FW_DENY
;
4062 slave_x
->xlat_addr
= oaddr
.s_addr
;
4063 slave_x
->xlat_port
= oport
;
4064 slave_x
->xlat_dir
= MATCH_REVERSE
;
4065 slave_x
->xlat_flags
|=
4066 IPFW_STATE_F_XLATSRC
|
4067 IPFW_STATE_F_XLATSLAVE
;
4069 slave_x
->xlat_pair
= x
;
4070 slave_x
->xlat_pcpu
= mycpuid
;
4071 x
->xlat_pair
= slave_x
;
4072 x
->xlat_pcpu
= cpuid
;
4075 if (cpuid
!= mycpuid
) {
4076 ctx
->ipfw_xlate_split
++;
4077 ipfw_xlate_redispatch(
4080 IPFW_XLATE_FORWARD
);
4082 return (IP_FW_REDISPATCH
);
4085 dup
= ipfw_state_link(ctx
,
4088 ctx
->ipfw_xlate_conflicts
++;
4089 if (IPFW_STATE_ISDEAD(dup
)) {
4090 ipfw_state_remove(ctx
,
4092 dup
= ipfw_state_link(
4093 ctx
, &slave_x
->xlat_st
);
4104 ipfw_state_del(ctx
, s
);
4105 return (IP_FW_DENY
);
4107 ctx
->ipfw_xlate_cresolved
++;
4116 * States are checked at the first keep-state
4117 * check-state occurrence, with the result
4118 * being stored in dyn_dir. The compiler
4119 * introduces a PROBE_STATE instruction for
4120 * us when we have a KEEP_STATE/LIMIT/RDR
4121 * (because PROBE_STATE needs to be run first).
4124 if (dyn_dir
== MATCH_UNKNOWN
) {
4125 s
= ipfw_state_lookup(ctx
,
4126 &args
->f_id
, &dyn_dir
, lc
.tcp
);
4129 (s
->st_type
== O_REDIRECT
&&
4130 (args
->eh
!= NULL
||
4131 (ip
->ip_off
& htons(IP_MF
| IP_OFFMASK
)) ||
4132 (lc
.proto
!= IPPROTO_TCP
&&
4133 lc
.proto
!= IPPROTO_UDP
)))) {
4135 * State not found. If CHECK_STATE,
4136 * skip to next rule, if PROBE_STATE
4137 * just ignore and continue with next
4140 if (cmd
->opcode
== O_CHECK_STATE
)
4147 s
->st_bcnt
+= lc
.ip_len
;
4149 if (s
->st_type
== O_REDIRECT
) {
4150 struct ipfw_xlat
*x
=
4151 (struct ipfw_xlat
*)s
;
4154 x
->xlat_ifp
== NULL
) {
4155 KASSERT(x
->xlat_flags
&
4156 IPFW_STATE_F_XLATSLAVE
,
4157 ("master rdr state "
4161 (oif
!= NULL
&& x
->xlat_ifp
!=oif
) ||
4163 x
->xlat_ifp
!=m
->m_pkthdr
.rcvif
)) {
4164 retval
= IP_FW_DENY
;
4167 if (x
->xlat_dir
!= dyn_dir
)
4170 ipfw_xlate(x
, m
, NULL
, NULL
);
4171 m
= ipfw_rehashm(m
, hlen
, args
, &lc
,
4176 cpuid
= netisr_hashcpu(
4178 if (cpuid
!= mycpuid
) {
4185 if (dyn_dir
== MATCH_FORWARD
) {
4189 ipfw_xlate_redispatch(m
, cpuid
,
4192 return (IP_FW_REDISPATCH
);
4195 KKASSERT(x
->xlat_pcpu
== mycpuid
);
4196 ipfw_state_update(&args
->f_id
, dyn_dir
,
4197 lc
.tcp
, &x
->xlat_pair
->xlat_st
);
4201 * Found a rule from a state; jump to the
4202 * 'action' part of the rule.
4205 KKASSERT(f
->cpuid
== mycpuid
);
4207 cmd
= ACTION_PTR(f
);
4208 l
= f
->cmd_len
- f
->act_ofs
;
4213 retval
= IP_FW_PASS
; /* accept */
4217 if (f
->cross_rules
== NULL
) {
4219 * This rule was not completely setup;
4220 * move on to the next rule.
4226 * Don't defrag for l2 packets, output packets
4229 if (oif
!= NULL
|| args
->eh
!= NULL
||
4230 (ip
->ip_off
& htons(IP_MF
| IP_OFFMASK
)) == 0)
4237 retval
= IP_FW_PASS
;
4240 ctx
->ipfw_defraged
++;
4241 KASSERT((m
->m_flags
& M_HASH
) == 0,
4242 ("hash not cleared"));
4244 /* Update statistics */
4246 f
->bcnt
+= lc
.ip_len
;
4247 f
->timestamp
= time_second
;
4249 ip
= mtod(m
, struct ip
*);
4250 hlen
= ip
->ip_hl
<< 2;
4251 ip
->ip_len
= htons(ntohs(ip
->ip_len
) + hlen
);
4258 KASSERT(m
->m_flags
& M_HASH
, ("no hash"));
4259 cpuid
= netisr_hashcpu(m
->m_pkthdr
.hash
);
4260 if (cpuid
!= mycpuid
) {
4261 ctx
->ipfw_defrag_remote
++;
4262 ipfw_defrag_redispatch(m
, cpuid
, f
);
4264 return (IP_FW_REDISPATCH
);
4267 /* 'm' might be changed by ip_hashfn(). */
4268 ip
= mtod(m
, struct ip
*);
4270 m
= ipfw_setup_local(m
, hlen
, args
, &lc
, &ip
);
4279 args
->rule
= f
; /* report matching rule */
4280 args
->cookie
= cmd
->arg1
;
4281 retval
= IP_FW_DUMMYNET
;
4286 if (args
->eh
) /* not on layer 2 */
4289 mtag
= m_tag_get(PACKET_TAG_IPFW_DIVERT
,
4290 sizeof(*divinfo
), M_INTWAIT
| M_NULLOK
);
4292 retval
= IP_FW_DENY
;
4295 divinfo
= m_tag_data(mtag
);
4297 divinfo
->skipto
= f
->rulenum
;
4298 divinfo
->port
= cmd
->arg1
;
4299 divinfo
->tee
= (cmd
->opcode
== O_TEE
);
4300 m_tag_prepend(m
, mtag
);
4302 args
->cookie
= cmd
->arg1
;
4303 retval
= (cmd
->opcode
== O_DIVERT
) ?
4304 IP_FW_DIVERT
: IP_FW_TEE
;
4309 f
->pcnt
++; /* update stats */
4310 f
->bcnt
+= lc
.ip_len
;
4311 f
->timestamp
= time_second
;
4312 if (cmd
->opcode
== O_COUNT
)
4315 if (f
->next_rule
== NULL
)
4316 lookup_next_rule(f
);
4322 * Drop the packet and send a reject notice
4323 * if the packet is not ICMP (or is an ICMP
4324 * query), and it is not multicast/broadcast.
4327 (lc
.proto
!= IPPROTO_ICMP
||
4328 is_icmp_query(ip
)) &&
4329 !(m
->m_flags
& (M_BCAST
|M_MCAST
)) &&
4330 !IN_MULTICAST(ntohl(lc
.dst_ip
.s_addr
))) {
4331 send_reject(args
, cmd
->arg1
,
4332 lc
.offset
, lc
.ip_len
);
4333 retval
= IP_FW_DENY
;
4338 retval
= IP_FW_DENY
;
4342 if (args
->eh
) /* not valid on layer2 pkts */
4344 if (!dyn_f
|| dyn_dir
== MATCH_FORWARD
) {
4345 struct sockaddr_in
*sin
;
4347 mtag
= m_tag_get(PACKET_TAG_IPFORWARD
,
4348 sizeof(*sin
), M_INTWAIT
| M_NULLOK
);
4350 retval
= IP_FW_DENY
;
4353 sin
= m_tag_data(mtag
);
4355 /* Structure copy */
4356 *sin
= ((ipfw_insn_sa
*)cmd
)->sa
;
4358 m_tag_prepend(m
, mtag
);
4359 m
->m_pkthdr
.fw_flags
|=
4360 IPFORWARD_MBUF_TAGGED
;
4361 m
->m_pkthdr
.fw_flags
&=
4362 ~BRIDGE_MBUF_TAGGED
;
4364 retval
= IP_FW_PASS
;
4368 panic("-- unknown opcode %d", cmd
->opcode
);
4369 } /* end of switch() on opcodes */
4371 if (cmd
->len
& F_NOT
)
4375 if (cmd
->len
& F_OR
)
4378 if (!(cmd
->len
& F_OR
)) /* not an OR block, */
4379 break; /* try next rule */
4382 } /* end of inner for, scan opcodes */
4384 next_rule
:; /* try next rule */
4386 } /* end of outer for, scan rules */
4387 kprintf("+++ ipfw: ouch!, skip past end of rules, denying packet\n");
4391 /* Update statistics */
4393 f
->bcnt
+= lc
.ip_len
;
4394 f
->timestamp
= time_second
;
4399 kprintf("pullup failed\n");
4403 static struct mbuf
*
4404 ipfw_dummynet_io(struct mbuf
*m
, int pipe_nr
, int dir
, struct ip_fw_args
*fwa
)
4409 const struct ipfw_flow_id
*id
;
4410 struct dn_flow_id
*fid
;
4414 mtag
= m_tag_get(PACKET_TAG_DUMMYNET
, sizeof(*pkt
),
4415 M_INTWAIT
| M_NULLOK
);
4420 m_tag_prepend(m
, mtag
);
4422 pkt
= m_tag_data(mtag
);
4423 bzero(pkt
, sizeof(*pkt
));
4425 cmd
= fwa
->rule
->cmd
+ fwa
->rule
->act_ofs
;
4426 if (cmd
->opcode
== O_LOG
)
4428 KASSERT(cmd
->opcode
== O_PIPE
|| cmd
->opcode
== O_QUEUE
,
4429 ("Rule is not PIPE or QUEUE, opcode %d", cmd
->opcode
));
4432 pkt
->dn_flags
= (dir
& DN_FLAGS_DIR_MASK
);
4433 pkt
->ifp
= fwa
->oif
;
4434 pkt
->pipe_nr
= pipe_nr
;
4436 pkt
->cpuid
= mycpuid
;
4437 pkt
->msgport
= netisr_curport();
4441 fid
->fid_dst_ip
= id
->dst_ip
;
4442 fid
->fid_src_ip
= id
->src_ip
;
4443 fid
->fid_dst_port
= id
->dst_port
;
4444 fid
->fid_src_port
= id
->src_port
;
4445 fid
->fid_proto
= id
->proto
;
4446 fid
->fid_flags
= id
->flags
;
4448 ipfw_ref_rule(fwa
->rule
);
4449 pkt
->dn_priv
= fwa
->rule
;
4450 pkt
->dn_unref_priv
= ipfw_unref_rule
;
4452 if (cmd
->opcode
== O_PIPE
)
4453 pkt
->dn_flags
|= DN_FLAGS_IS_PIPE
;
4455 m
->m_pkthdr
.fw_flags
|= DUMMYNET_MBUF_TAGGED
;
4460 * When a rule is added/deleted, clear the next_rule pointers in all rules.
4461 * These will be reconstructed on the fly as packets are matched.
4464 ipfw_flush_rule_ptrs(struct ipfw_context
*ctx
)
4468 for (rule
= ctx
->ipfw_layer3_chain
; rule
; rule
= rule
->next
)
4469 rule
->next_rule
= NULL
;
4473 ipfw_inc_static_count(struct ip_fw
*rule
)
4475 /* Static rule's counts are updated only on CPU0 */
4476 KKASSERT(mycpuid
== 0);
4479 static_ioc_len
+= IOC_RULESIZE(rule
);
4483 ipfw_dec_static_count(struct ip_fw
*rule
)
4485 int l
= IOC_RULESIZE(rule
);
4487 /* Static rule's counts are updated only on CPU0 */
4488 KKASSERT(mycpuid
== 0);
4490 KASSERT(static_count
> 0, ("invalid static count %u", static_count
));
4493 KASSERT(static_ioc_len
>= l
,
4494 ("invalid static len %u", static_ioc_len
));
4495 static_ioc_len
-= l
;
4499 ipfw_link_sibling(struct netmsg_ipfw
*fwmsg
, struct ip_fw
*rule
)
4501 if (fwmsg
->sibling
!= NULL
) {
4502 KKASSERT(mycpuid
> 0 && fwmsg
->sibling
->cpuid
== mycpuid
- 1);
4503 fwmsg
->sibling
->sibling
= rule
;
4505 fwmsg
->sibling
= rule
;
4508 static struct ip_fw
*
4509 ipfw_create_rule(const struct ipfw_ioc_rule
*ioc_rule
, uint32_t rule_flags
)
4513 rule
= kmalloc(RULESIZE(ioc_rule
), M_IPFW
, M_WAITOK
| M_ZERO
);
4515 rule
->act_ofs
= ioc_rule
->act_ofs
;
4516 rule
->cmd_len
= ioc_rule
->cmd_len
;
4517 rule
->rulenum
= ioc_rule
->rulenum
;
4518 rule
->set
= ioc_rule
->set
;
4519 rule
->usr_flags
= ioc_rule
->usr_flags
;
4521 bcopy(ioc_rule
->cmd
, rule
->cmd
, rule
->cmd_len
* 4 /* XXX */);
4524 rule
->cpuid
= mycpuid
;
4525 rule
->rule_flags
= rule_flags
;
4531 ipfw_add_rule_dispatch(netmsg_t nmsg
)
4533 struct netmsg_ipfw
*fwmsg
= (struct netmsg_ipfw
*)nmsg
;
4534 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
4537 ASSERT_NETISR_NCPUS(mycpuid
);
4539 rule
= ipfw_create_rule(fwmsg
->ioc_rule
, fwmsg
->rule_flags
);
4542 * Insert rule into the pre-determined position
4544 if (fwmsg
->prev_rule
!= NULL
) {
4545 struct ip_fw
*prev
, *next
;
4547 prev
= fwmsg
->prev_rule
;
4548 KKASSERT(prev
->cpuid
== mycpuid
);
4550 next
= fwmsg
->next_rule
;
4551 KKASSERT(next
->cpuid
== mycpuid
);
4557 * Move to the position on the next CPU
4558 * before the msg is forwarded.
4560 fwmsg
->prev_rule
= prev
->sibling
;
4561 fwmsg
->next_rule
= next
->sibling
;
4563 KKASSERT(fwmsg
->next_rule
== NULL
);
4564 rule
->next
= ctx
->ipfw_layer3_chain
;
4565 ctx
->ipfw_layer3_chain
= rule
;
4568 /* Link rule CPU sibling */
4569 ipfw_link_sibling(fwmsg
, rule
);
4571 ipfw_flush_rule_ptrs(ctx
);
4574 /* Statistics only need to be updated once */
4575 ipfw_inc_static_count(rule
);
4577 /* Return the rule on CPU0 */
4578 nmsg
->lmsg
.u
.ms_resultp
= rule
;
4581 if (rule
->rule_flags
& IPFW_RULE_F_GENTRACK
)
4582 rule
->track_ruleid
= (uintptr_t)nmsg
->lmsg
.u
.ms_resultp
;
4584 if (fwmsg
->cross_rules
!= NULL
) {
4585 /* Save rules for later use. */
4586 fwmsg
->cross_rules
[mycpuid
] = rule
;
4589 netisr_forwardmsg(&nmsg
->base
, mycpuid
+ 1);
4593 ipfw_crossref_rule_dispatch(netmsg_t nmsg
)
4595 struct netmsg_ipfw
*fwmsg
= (struct netmsg_ipfw
*)nmsg
;
4596 struct ip_fw
*rule
= fwmsg
->sibling
;
4597 int sz
= sizeof(struct ip_fw
*) * netisr_ncpus
;
4599 ASSERT_NETISR_NCPUS(mycpuid
);
4600 KASSERT(rule
->rule_flags
& IPFW_RULE_F_CROSSREF
,
4601 ("not crossref rule"));
4603 rule
->cross_rules
= kmalloc(sz
, M_IPFW
, M_WAITOK
);
4604 memcpy(rule
->cross_rules
, fwmsg
->cross_rules
, sz
);
4606 fwmsg
->sibling
= rule
->sibling
;
4607 netisr_forwardmsg(&fwmsg
->base
, mycpuid
+ 1);
4611 * Add a new rule to the list. Copy the rule into a malloc'ed area,
4612 * then possibly create a rule number and add the rule to the list.
4613 * Update the rule_number in the input struct so the caller knows
4617 ipfw_add_rule(struct ipfw_ioc_rule
*ioc_rule
, uint32_t rule_flags
)
4619 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
4620 struct netmsg_ipfw fwmsg
;
4621 struct ip_fw
*f
, *prev
, *rule
;
4626 * If rulenum is 0, find highest numbered rule before the
4627 * default rule, and add rule number incremental step.
4629 if (ioc_rule
->rulenum
== 0) {
4630 int step
= autoinc_step
;
4632 KKASSERT(step
>= IPFW_AUTOINC_STEP_MIN
&&
4633 step
<= IPFW_AUTOINC_STEP_MAX
);
4636 * Locate the highest numbered rule before default
4638 for (f
= ctx
->ipfw_layer3_chain
; f
; f
= f
->next
) {
4639 if (f
->rulenum
== IPFW_DEFAULT_RULE
)
4641 ioc_rule
->rulenum
= f
->rulenum
;
4643 if (ioc_rule
->rulenum
< IPFW_DEFAULT_RULE
- step
)
4644 ioc_rule
->rulenum
+= step
;
4646 KASSERT(ioc_rule
->rulenum
!= IPFW_DEFAULT_RULE
&&
4647 ioc_rule
->rulenum
!= 0,
4648 ("invalid rule num %d", ioc_rule
->rulenum
));
4651 * Now find the right place for the new rule in the sorted list.
4653 for (prev
= NULL
, f
= ctx
->ipfw_layer3_chain
; f
;
4654 prev
= f
, f
= f
->next
) {
4655 if (f
->rulenum
> ioc_rule
->rulenum
) {
4656 /* Found the location */
4660 KASSERT(f
!= NULL
, ("no default rule?!"));
4663 * Duplicate the rule onto each CPU.
4664 * The rule duplicated on CPU0 will be returned.
4666 bzero(&fwmsg
, sizeof(fwmsg
));
4667 netmsg_init(&fwmsg
.base
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
4668 ipfw_add_rule_dispatch
);
4669 fwmsg
.ioc_rule
= ioc_rule
;
4670 fwmsg
.prev_rule
= prev
;
4671 fwmsg
.next_rule
= prev
== NULL
? NULL
: f
;
4672 fwmsg
.rule_flags
= rule_flags
;
4673 if (rule_flags
& IPFW_RULE_F_CROSSREF
) {
4674 fwmsg
.cross_rules
= kmalloc(
4675 sizeof(struct ip_fw
*) * netisr_ncpus
, M_TEMP
,
4679 netisr_domsg_global(&fwmsg
.base
);
4680 KKASSERT(fwmsg
.prev_rule
== NULL
&& fwmsg
.next_rule
== NULL
);
4682 rule
= fwmsg
.base
.lmsg
.u
.ms_resultp
;
4683 KKASSERT(rule
!= NULL
&& rule
->cpuid
== mycpuid
);
4685 if (fwmsg
.cross_rules
!= NULL
) {
4686 netmsg_init(&fwmsg
.base
, NULL
, &curthread
->td_msgport
,
4687 MSGF_PRIORITY
, ipfw_crossref_rule_dispatch
);
4688 fwmsg
.sibling
= rule
;
4689 netisr_domsg_global(&fwmsg
.base
);
4690 KKASSERT(fwmsg
.sibling
== NULL
);
4692 kfree(fwmsg
.cross_rules
, M_TEMP
);
4695 atomic_add_int(&ipfw_gd
.ipfw_refcnt
, 1);
4699 DPRINTF("++ installed rule %d, static count now %d\n",
4700 rule
->rulenum
, static_count
);
4704 * Free storage associated with a static rule (including derived
4706 * The caller is in charge of clearing rule pointers to avoid
4707 * dangling pointers.
4708 * @return a pointer to the next entry.
4709 * Arguments are not checked, so they better be correct.
4711 static struct ip_fw
*
4712 ipfw_delete_rule(struct ipfw_context
*ctx
,
4713 struct ip_fw
*prev
, struct ip_fw
*rule
)
4719 ctx
->ipfw_layer3_chain
= n
;
4723 /* Mark the rule as invalid */
4724 rule
->rule_flags
|= IPFW_RULE_F_INVALID
;
4725 rule
->next_rule
= NULL
;
4726 rule
->sibling
= NULL
;
4728 /* Don't reset cpuid here; keep various assertion working */
4732 /* Statistics only need to be updated once */
4734 ipfw_dec_static_count(rule
);
4736 if ((rule
->rule_flags
& IPFW_RULE_F_CROSSREF
) == 0) {
4737 /* Try to free this rule */
4738 ipfw_free_rule(rule
);
4740 /* TODO: check staging area. */
4742 rule
->next
= ipfw_gd
.ipfw_crossref_free
;
4743 ipfw_gd
.ipfw_crossref_free
= rule
;
4747 /* Return the next rule */
4752 ipfw_flush_dispatch(netmsg_t nmsg
)
4754 int kill_default
= nmsg
->lmsg
.u
.ms_result
;
4755 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
4758 ASSERT_NETISR_NCPUS(mycpuid
);
4763 ipfw_state_flush(ctx
, NULL
);
4764 KASSERT(ctx
->ipfw_state_cnt
== 0,
4765 ("%d pcpu states remain", ctx
->ipfw_state_cnt
));
4766 ctx
->ipfw_state_loosecnt
= 0;
4767 ctx
->ipfw_state_lastexp
= 0;
4772 ipfw_track_flush(ctx
, NULL
);
4773 ctx
->ipfw_track_lastexp
= 0;
4774 if (ctx
->ipfw_trkcnt_spare
!= NULL
) {
4775 kfree(ctx
->ipfw_trkcnt_spare
, M_IPFW
);
4776 ctx
->ipfw_trkcnt_spare
= NULL
;
4779 ipfw_flush_rule_ptrs(ctx
); /* more efficient to do outside the loop */
4781 while ((rule
= ctx
->ipfw_layer3_chain
) != NULL
&&
4782 (kill_default
|| rule
->rulenum
!= IPFW_DEFAULT_RULE
))
4783 ipfw_delete_rule(ctx
, NULL
, rule
);
4785 netisr_forwardmsg(&nmsg
->base
, mycpuid
+ 1);
4789 * Deletes all rules from a chain (including the default rule
4790 * if the second argument is set).
4793 ipfw_flush(int kill_default
)
4795 struct netmsg_base nmsg
;
4797 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
4804 * If 'kill_default' then caller has done the necessary
4805 * msgport syncing; unnecessary to do it again.
4807 if (!kill_default
) {
4809 * Let ipfw_chk() know the rules are going to
4810 * be flushed, so it could jump directly to
4814 /* XXX use priority sync */
4815 netmsg_service_sync();
4819 * Press the 'flush' button
4821 bzero(&nmsg
, sizeof(nmsg
));
4822 netmsg_init(&nmsg
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
4823 ipfw_flush_dispatch
);
4824 nmsg
.lmsg
.u
.ms_result
= kill_default
;
4825 netisr_domsg_global(&nmsg
);
4826 ipfw_gd
.ipfw_state_loosecnt
= 0;
4827 ipfw_gd
.ipfw_state_globexp
= 0;
4828 ipfw_gd
.ipfw_track_globexp
= 0;
4831 state_cnt
= ipfw_state_cntcoll();
4832 KASSERT(state_cnt
== 0, ("%d states remain", state_cnt
));
4834 KASSERT(ipfw_gd
.ipfw_trkcnt_cnt
== 0,
4835 ("%d trkcnts remain", ipfw_gd
.ipfw_trkcnt_cnt
));
4838 KASSERT(static_count
== 0,
4839 ("%u static rules remain", static_count
));
4840 KASSERT(static_ioc_len
== 0,
4841 ("%u bytes of static rules remain", static_ioc_len
));
4843 KASSERT(static_count
== 1,
4844 ("%u static rules remain", static_count
));
4845 KASSERT(static_ioc_len
== IOC_RULESIZE(ctx
->ipfw_default_rule
),
4846 ("%u bytes of static rules remain, should be %lu",
4848 (u_long
)IOC_RULESIZE(ctx
->ipfw_default_rule
)));
4857 ipfw_alt_delete_rule_dispatch(netmsg_t nmsg
)
4859 struct netmsg_del
*dmsg
= (struct netmsg_del
*)nmsg
;
4860 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
4861 struct ip_fw
*rule
, *prev
;
4863 ASSERT_NETISR_NCPUS(mycpuid
);
4865 rule
= dmsg
->start_rule
;
4866 KKASSERT(rule
->cpuid
== mycpuid
);
4867 dmsg
->start_rule
= rule
->sibling
;
4869 prev
= dmsg
->prev_rule
;
4871 KKASSERT(prev
->cpuid
== mycpuid
);
4874 * Move to the position on the next CPU
4875 * before the msg is forwarded.
4877 dmsg
->prev_rule
= prev
->sibling
;
4881 * flush pointers outside the loop, then delete all matching
4882 * rules. 'prev' remains the same throughout the cycle.
4884 ipfw_flush_rule_ptrs(ctx
);
4885 while (rule
&& rule
->rulenum
== dmsg
->rulenum
) {
4886 if (rule
->rule_flags
& IPFW_RULE_F_GENSTATE
) {
4887 /* Flush states generated by this rule. */
4888 ipfw_state_flush(ctx
, rule
);
4890 if (rule
->rule_flags
& IPFW_RULE_F_GENTRACK
) {
4891 /* Flush tracks generated by this rule. */
4892 ipfw_track_flush(ctx
, rule
);
4894 rule
= ipfw_delete_rule(ctx
, prev
, rule
);
4897 netisr_forwardmsg(&nmsg
->base
, mycpuid
+ 1);
4901 ipfw_alt_delete_rule(uint16_t rulenum
)
4903 struct ip_fw
*prev
, *rule
;
4904 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
4905 struct netmsg_del dmsg
;
4910 * Locate first rule to delete
4912 for (prev
= NULL
, rule
= ctx
->ipfw_layer3_chain
;
4913 rule
&& rule
->rulenum
< rulenum
;
4914 prev
= rule
, rule
= rule
->next
)
4916 if (rule
->rulenum
!= rulenum
)
4920 * Get rid of the rule duplications on all CPUs
4922 bzero(&dmsg
, sizeof(dmsg
));
4923 netmsg_init(&dmsg
.base
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
4924 ipfw_alt_delete_rule_dispatch
);
4925 dmsg
.prev_rule
= prev
;
4926 dmsg
.start_rule
= rule
;
4927 dmsg
.rulenum
= rulenum
;
4929 netisr_domsg_global(&dmsg
.base
);
4930 KKASSERT(dmsg
.prev_rule
== NULL
&& dmsg
.start_rule
== NULL
);
4935 ipfw_alt_delete_ruleset_dispatch(netmsg_t nmsg
)
4937 struct netmsg_del
*dmsg
= (struct netmsg_del
*)nmsg
;
4938 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
4939 struct ip_fw
*prev
, *rule
;
4944 ASSERT_NETISR_NCPUS(mycpuid
);
4946 ipfw_flush_rule_ptrs(ctx
);
4949 rule
= ctx
->ipfw_layer3_chain
;
4950 while (rule
!= NULL
) {
4951 if (rule
->set
== dmsg
->from_set
) {
4952 if (rule
->rule_flags
& IPFW_RULE_F_GENSTATE
) {
4953 /* Flush states generated by this rule. */
4954 ipfw_state_flush(ctx
, rule
);
4956 if (rule
->rule_flags
& IPFW_RULE_F_GENTRACK
) {
4957 /* Flush tracks generated by this rule. */
4958 ipfw_track_flush(ctx
, rule
);
4960 rule
= ipfw_delete_rule(ctx
, prev
, rule
);
4969 KASSERT(del
, ("no match set?!"));
4971 netisr_forwardmsg(&nmsg
->base
, mycpuid
+ 1);
4975 ipfw_alt_delete_ruleset(uint8_t set
)
4977 struct netmsg_del dmsg
;
4980 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
4985 * Check whether the 'set' exists. If it exists,
4986 * then check whether any rules within the set will
4987 * try to create states.
4990 for (rule
= ctx
->ipfw_layer3_chain
; rule
; rule
= rule
->next
) {
4991 if (rule
->set
== set
)
4995 return 0; /* XXX EINVAL? */
5000 bzero(&dmsg
, sizeof(dmsg
));
5001 netmsg_init(&dmsg
.base
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
5002 ipfw_alt_delete_ruleset_dispatch
);
5003 dmsg
.from_set
= set
;
5004 netisr_domsg_global(&dmsg
.base
);
5010 ipfw_alt_move_rule_dispatch(netmsg_t nmsg
)
5012 struct netmsg_del
*dmsg
= (struct netmsg_del
*)nmsg
;
5015 ASSERT_NETISR_NCPUS(mycpuid
);
5017 rule
= dmsg
->start_rule
;
5018 KKASSERT(rule
->cpuid
== mycpuid
);
5021 * Move to the position on the next CPU
5022 * before the msg is forwarded.
5024 dmsg
->start_rule
= rule
->sibling
;
5026 while (rule
&& rule
->rulenum
<= dmsg
->rulenum
) {
5027 if (rule
->rulenum
== dmsg
->rulenum
)
5028 rule
->set
= dmsg
->to_set
;
5031 netisr_forwardmsg(&nmsg
->base
, mycpuid
+ 1);
5035 ipfw_alt_move_rule(uint16_t rulenum
, uint8_t set
)
5037 struct netmsg_del dmsg
;
5038 struct netmsg_base
*nmsg
;
5040 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
5045 * Locate first rule to move
5047 for (rule
= ctx
->ipfw_layer3_chain
; rule
&& rule
->rulenum
<= rulenum
;
5048 rule
= rule
->next
) {
5049 if (rule
->rulenum
== rulenum
&& rule
->set
!= set
)
5052 if (rule
== NULL
|| rule
->rulenum
> rulenum
)
5053 return 0; /* XXX error? */
5055 bzero(&dmsg
, sizeof(dmsg
));
5057 netmsg_init(nmsg
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
5058 ipfw_alt_move_rule_dispatch
);
5059 dmsg
.start_rule
= rule
;
5060 dmsg
.rulenum
= rulenum
;
5063 netisr_domsg_global(nmsg
);
5064 KKASSERT(dmsg
.start_rule
== NULL
);
5069 ipfw_alt_move_ruleset_dispatch(netmsg_t nmsg
)
5071 struct netmsg_del
*dmsg
= (struct netmsg_del
*)nmsg
;
5072 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
5075 ASSERT_NETISR_NCPUS(mycpuid
);
5077 for (rule
= ctx
->ipfw_layer3_chain
; rule
; rule
= rule
->next
) {
5078 if (rule
->set
== dmsg
->from_set
)
5079 rule
->set
= dmsg
->to_set
;
5081 netisr_forwardmsg(&nmsg
->base
, mycpuid
+ 1);
5085 ipfw_alt_move_ruleset(uint8_t from_set
, uint8_t to_set
)
5087 struct netmsg_del dmsg
;
5088 struct netmsg_base
*nmsg
;
5092 bzero(&dmsg
, sizeof(dmsg
));
5094 netmsg_init(nmsg
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
5095 ipfw_alt_move_ruleset_dispatch
);
5096 dmsg
.from_set
= from_set
;
5097 dmsg
.to_set
= to_set
;
5099 netisr_domsg_global(nmsg
);
5104 ipfw_alt_swap_ruleset_dispatch(netmsg_t nmsg
)
5106 struct netmsg_del
*dmsg
= (struct netmsg_del
*)nmsg
;
5107 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
5110 ASSERT_NETISR_NCPUS(mycpuid
);
5112 for (rule
= ctx
->ipfw_layer3_chain
; rule
; rule
= rule
->next
) {
5113 if (rule
->set
== dmsg
->from_set
)
5114 rule
->set
= dmsg
->to_set
;
5115 else if (rule
->set
== dmsg
->to_set
)
5116 rule
->set
= dmsg
->from_set
;
5118 netisr_forwardmsg(&nmsg
->base
, mycpuid
+ 1);
5122 ipfw_alt_swap_ruleset(uint8_t set1
, uint8_t set2
)
5124 struct netmsg_del dmsg
;
5125 struct netmsg_base
*nmsg
;
5129 bzero(&dmsg
, sizeof(dmsg
));
5131 netmsg_init(nmsg
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
5132 ipfw_alt_swap_ruleset_dispatch
);
5133 dmsg
.from_set
= set1
;
5136 netisr_domsg_global(nmsg
);
5141 * Remove all rules with given number, and also do set manipulation.
5143 * The argument is an uint32_t. The low 16 bit are the rule or set number,
5144 * the next 8 bits are the new set, the top 8 bits are the command:
5146 * 0 delete rules with given number
5147 * 1 delete rules with given set number
5148 * 2 move rules with given number to new set
5149 * 3 move rules with given set number to new set
5150 * 4 swap sets with given numbers
5153 ipfw_ctl_alter(uint32_t arg
)
5156 uint8_t cmd
, new_set
;
5161 rulenum
= arg
& 0xffff;
5162 cmd
= (arg
>> 24) & 0xff;
5163 new_set
= (arg
>> 16) & 0xff;
5167 if (new_set
>= IPFW_DEFAULT_SET
)
5169 if (cmd
== 0 || cmd
== 2) {
5170 if (rulenum
== IPFW_DEFAULT_RULE
)
5173 if (rulenum
>= IPFW_DEFAULT_SET
)
5178 case 0: /* delete rules with given number */
5179 error
= ipfw_alt_delete_rule(rulenum
);
5182 case 1: /* delete all rules with given set number */
5183 error
= ipfw_alt_delete_ruleset(rulenum
);
5186 case 2: /* move rules with given number to new set */
5187 error
= ipfw_alt_move_rule(rulenum
, new_set
);
5190 case 3: /* move rules with given set number to new set */
5191 error
= ipfw_alt_move_ruleset(rulenum
, new_set
);
5194 case 4: /* swap two sets */
5195 error
= ipfw_alt_swap_ruleset(rulenum
, new_set
);
5202 * Clear counters for a specific rule.
5205 clear_counters(struct ip_fw
*rule
, int log_only
)
5207 ipfw_insn_log
*l
= (ipfw_insn_log
*)ACTION_PTR(rule
);
5209 if (log_only
== 0) {
5210 rule
->bcnt
= rule
->pcnt
= 0;
5211 rule
->timestamp
= 0;
5213 if (l
->o
.opcode
== O_LOG
)
5214 l
->log_left
= l
->max_log
;
5218 ipfw_zero_entry_dispatch(netmsg_t nmsg
)
5220 struct netmsg_zent
*zmsg
= (struct netmsg_zent
*)nmsg
;
5221 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
5224 ASSERT_NETISR_NCPUS(mycpuid
);
5226 if (zmsg
->rulenum
== 0) {
5227 KKASSERT(zmsg
->start_rule
== NULL
);
5229 ctx
->ipfw_norule_counter
= 0;
5230 for (rule
= ctx
->ipfw_layer3_chain
; rule
; rule
= rule
->next
)
5231 clear_counters(rule
, zmsg
->log_only
);
5233 struct ip_fw
*start
= zmsg
->start_rule
;
5235 KKASSERT(start
->cpuid
== mycpuid
);
5236 KKASSERT(start
->rulenum
== zmsg
->rulenum
);
5239 * We can have multiple rules with the same number, so we
5240 * need to clear them all.
5242 for (rule
= start
; rule
&& rule
->rulenum
== zmsg
->rulenum
;
5244 clear_counters(rule
, zmsg
->log_only
);
5247 * Move to the position on the next CPU
5248 * before the msg is forwarded.
5250 zmsg
->start_rule
= start
->sibling
;
5252 netisr_forwardmsg(&nmsg
->base
, mycpuid
+ 1);
5256 * Reset some or all counters on firewall rules.
5257 * @arg frwl is null to clear all entries, or contains a specific
5259 * @arg log_only is 1 if we only want to reset logs, zero otherwise.
5262 ipfw_ctl_zero_entry(int rulenum
, int log_only
)
5264 struct netmsg_zent zmsg
;
5265 struct netmsg_base
*nmsg
;
5267 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
5271 bzero(&zmsg
, sizeof(zmsg
));
5273 netmsg_init(nmsg
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
5274 ipfw_zero_entry_dispatch
);
5275 zmsg
.log_only
= log_only
;
5278 msg
= log_only
? "ipfw: All logging counts reset.\n"
5279 : "ipfw: Accounting cleared.\n";
5284 * Locate the first rule with 'rulenum'
5286 for (rule
= ctx
->ipfw_layer3_chain
; rule
; rule
= rule
->next
) {
5287 if (rule
->rulenum
== rulenum
)
5290 if (rule
== NULL
) /* we did not find any matching rules */
5292 zmsg
.start_rule
= rule
;
5293 zmsg
.rulenum
= rulenum
;
5295 msg
= log_only
? "ipfw: Entry %d logging count reset.\n"
5296 : "ipfw: Entry %d cleared.\n";
5298 netisr_domsg_global(nmsg
);
5299 KKASSERT(zmsg
.start_rule
== NULL
);
5302 log(LOG_SECURITY
| LOG_NOTICE
, msg
, rulenum
);
5307 * Check validity of the structure before insert.
5308 * Fortunately rules are simple, so this mostly need to check rule sizes.
5311 ipfw_check_ioc_rule(struct ipfw_ioc_rule
*rule
, int size
, uint32_t *rule_flags
)
5314 int have_action
= 0;
5319 /* Check for valid size */
5320 if (size
< sizeof(*rule
)) {
5321 kprintf("ipfw: rule too short\n");
5324 l
= IOC_RULESIZE(rule
);
5326 kprintf("ipfw: size mismatch (have %d want %d)\n", size
, l
);
5330 /* Check rule number */
5331 if (rule
->rulenum
== IPFW_DEFAULT_RULE
) {
5332 kprintf("ipfw: invalid rule number\n");
5337 * Now go for the individual checks. Very simple ones, basically only
5338 * instruction sizes.
5340 for (l
= rule
->cmd_len
, cmd
= rule
->cmd
; l
> 0;
5341 l
-= cmdlen
, cmd
+= cmdlen
) {
5342 cmdlen
= F_LEN(cmd
);
5344 kprintf("ipfw: opcode %d size truncated\n",
5349 DPRINTF("ipfw: opcode %d\n", cmd
->opcode
);
5351 if (cmd
->opcode
== O_KEEP_STATE
|| cmd
->opcode
== O_LIMIT
||
5352 IPFW_ISXLAT(cmd
->opcode
)) {
5353 /* This rule will generate states. */
5354 *rule_flags
|= IPFW_RULE_F_GENSTATE
;
5355 if (cmd
->opcode
== O_LIMIT
)
5356 *rule_flags
|= IPFW_RULE_F_GENTRACK
;
5358 if (cmd
->opcode
== O_DEFRAG
|| IPFW_ISXLAT(cmd
->opcode
))
5359 *rule_flags
|= IPFW_RULE_F_CROSSREF
;
5360 if (cmd
->opcode
== O_IP_SRC_IFIP
||
5361 cmd
->opcode
== O_IP_DST_IFIP
) {
5362 *rule_flags
|= IPFW_RULE_F_DYNIFADDR
;
5363 cmd
->arg1
&= IPFW_IFIP_SETTINGS
;
5366 switch (cmd
->opcode
) {
5381 case O_IPPRECEDENCE
:
5388 if (cmdlen
!= F_INSN_SIZE(ipfw_insn
))
5392 case O_IP_SRC_TABLE
:
5393 case O_IP_DST_TABLE
:
5394 if (cmdlen
!= F_INSN_SIZE(ipfw_insn
))
5396 if (cmd
->arg1
>= ipfw_table_max
) {
5397 kprintf("ipfw: invalid table id %u, max %d\n",
5398 cmd
->arg1
, ipfw_table_max
);
5405 if (cmdlen
!= F_INSN_SIZE(ipfw_insn_ifip
))
5411 if (cmdlen
< F_INSN_SIZE(ipfw_insn_u32
))
5422 if (cmdlen
!= F_INSN_SIZE(ipfw_insn_u32
))
5427 if (cmdlen
!= F_INSN_SIZE(ipfw_insn_limit
))
5431 if (cmdlen
!= F_INSN_SIZE(ipfw_insn_rdr
))
5436 if (cmdlen
!= F_INSN_SIZE(ipfw_insn_log
))
5439 ((ipfw_insn_log
*)cmd
)->log_left
=
5440 ((ipfw_insn_log
*)cmd
)->max_log
;
5446 if (cmdlen
!= F_INSN_SIZE(ipfw_insn_ip
))
5448 if (((ipfw_insn_ip
*)cmd
)->mask
.s_addr
== 0) {
5449 kprintf("ipfw: opcode %d, useless rule\n",
5457 if (cmd
->arg1
== 0 || cmd
->arg1
> 256) {
5458 kprintf("ipfw: invalid set size %d\n",
5462 if (cmdlen
!= F_INSN_SIZE(ipfw_insn_u32
) +
5468 if (cmdlen
!= F_INSN_SIZE(ipfw_insn_mac
))
5474 case O_IP_DSTPORT
: /* XXX artificial limit, 30 port pairs */
5475 if (cmdlen
< 2 || cmdlen
> 31)
5482 if (cmdlen
!= F_INSN_SIZE(ipfw_insn_if
))
5488 if (cmdlen
!= F_INSN_SIZE(ipfw_insn_pipe
))
5493 if (cmdlen
!= F_INSN_SIZE(ipfw_insn_sa
)) {
5498 fwd_addr
= ((ipfw_insn_sa
*)cmd
)->
5500 if (IN_MULTICAST(ntohl(fwd_addr
))) {
5501 kprintf("ipfw: try forwarding to "
5502 "multicast address\n");
5508 case O_FORWARD_MAC
: /* XXX not implemented yet */
5518 if (cmdlen
!= F_INSN_SIZE(ipfw_insn
))
5522 kprintf("ipfw: opcode %d, multiple actions"
5529 kprintf("ipfw: opcode %d, action must be"
5536 kprintf("ipfw: opcode %d, unknown opcode\n",
5541 if (have_action
== 0) {
5542 kprintf("ipfw: missing action\n");
5548 kprintf("ipfw: opcode %d size %d wrong\n",
5549 cmd
->opcode
, cmdlen
);
5554 ipfw_ctl_add_rule(struct sockopt
*sopt
)
5556 struct ipfw_ioc_rule
*ioc_rule
;
5558 uint32_t rule_flags
;
5563 size
= sopt
->sopt_valsize
;
5564 if (size
> (sizeof(uint32_t) * IPFW_RULE_SIZE_MAX
) ||
5565 size
< sizeof(*ioc_rule
)) {
5568 if (size
!= (sizeof(uint32_t) * IPFW_RULE_SIZE_MAX
)) {
5569 sopt
->sopt_val
= krealloc(sopt
->sopt_val
, sizeof(uint32_t) *
5570 IPFW_RULE_SIZE_MAX
, M_TEMP
, M_WAITOK
);
5572 ioc_rule
= sopt
->sopt_val
;
5574 error
= ipfw_check_ioc_rule(ioc_rule
, size
, &rule_flags
);
5578 ipfw_add_rule(ioc_rule
, rule_flags
);
5580 if (sopt
->sopt_dir
== SOPT_GET
)
5581 sopt
->sopt_valsize
= IOC_RULESIZE(ioc_rule
);
5586 ipfw_copy_rule(const struct ipfw_context
*ctx
, const struct ip_fw
*rule
,
5587 struct ipfw_ioc_rule
*ioc_rule
)
5589 const struct ip_fw
*sibling
;
5595 KASSERT(rule
->cpuid
== 0, ("rule does not belong to cpu0"));
5597 ioc_rule
->act_ofs
= rule
->act_ofs
;
5598 ioc_rule
->cmd_len
= rule
->cmd_len
;
5599 ioc_rule
->rulenum
= rule
->rulenum
;
5600 ioc_rule
->set
= rule
->set
;
5601 ioc_rule
->usr_flags
= rule
->usr_flags
;
5603 ioc_rule
->set_disable
= ctx
->ipfw_set_disable
;
5604 ioc_rule
->static_count
= static_count
;
5605 ioc_rule
->static_len
= static_ioc_len
;
5608 * Visit (read-only) all of the rule's duplications to get
5609 * the necessary statistics
5616 ioc_rule
->timestamp
= 0;
5617 for (sibling
= rule
; sibling
!= NULL
; sibling
= sibling
->sibling
) {
5618 ioc_rule
->pcnt
+= sibling
->pcnt
;
5619 ioc_rule
->bcnt
+= sibling
->bcnt
;
5620 if (sibling
->timestamp
> ioc_rule
->timestamp
)
5621 ioc_rule
->timestamp
= sibling
->timestamp
;
5626 KASSERT(i
== netisr_ncpus
,
5627 ("static rule is not duplicated on netisr_ncpus %d", netisr_ncpus
));
5629 bcopy(rule
->cmd
, ioc_rule
->cmd
, ioc_rule
->cmd_len
* 4 /* XXX */);
5631 return ((uint8_t *)ioc_rule
+ IOC_RULESIZE(ioc_rule
));
5635 ipfw_track_copy(const struct ipfw_trkcnt
*trk
, struct ipfw_ioc_state
*ioc_state
)
5637 struct ipfw_ioc_flowid
*ioc_id
;
5639 if (trk
->tc_expire
== 0) {
5640 /* Not a scanned one. */
5644 ioc_state
->expire
= TIME_LEQ(trk
->tc_expire
, time_uptime
) ?
5645 0 : trk
->tc_expire
- time_uptime
;
5646 ioc_state
->pcnt
= 0;
5647 ioc_state
->bcnt
= 0;
5649 ioc_state
->dyn_type
= O_LIMIT_PARENT
;
5650 ioc_state
->count
= trk
->tc_count
;
5652 ioc_state
->rulenum
= trk
->tc_rulenum
;
5654 ioc_id
= &ioc_state
->id
;
5655 ioc_id
->type
= ETHERTYPE_IP
;
5656 ioc_id
->u
.ip
.proto
= trk
->tc_proto
;
5657 ioc_id
->u
.ip
.src_ip
= trk
->tc_saddr
;
5658 ioc_id
->u
.ip
.dst_ip
= trk
->tc_daddr
;
5659 ioc_id
->u
.ip
.src_port
= trk
->tc_sport
;
5660 ioc_id
->u
.ip
.dst_port
= trk
->tc_dport
;
5666 ipfw_state_copy(const struct ipfw_state
*s
, struct ipfw_ioc_state
*ioc_state
)
5668 struct ipfw_ioc_flowid
*ioc_id
;
5670 if (IPFW_STATE_SCANSKIP(s
))
5673 ioc_state
->expire
= TIME_LEQ(s
->st_expire
, time_uptime
) ?
5674 0 : s
->st_expire
- time_uptime
;
5675 ioc_state
->pcnt
= s
->st_pcnt
;
5676 ioc_state
->bcnt
= s
->st_bcnt
;
5678 ioc_state
->dyn_type
= s
->st_type
;
5679 ioc_state
->count
= 0;
5681 ioc_state
->rulenum
= s
->st_rule
->rulenum
;
5683 ioc_id
= &ioc_state
->id
;
5684 ioc_id
->type
= ETHERTYPE_IP
;
5685 ioc_id
->u
.ip
.proto
= s
->st_proto
;
5686 ipfw_key_4tuple(&s
->st_key
,
5687 &ioc_id
->u
.ip
.src_ip
, &ioc_id
->u
.ip
.src_port
,
5688 &ioc_id
->u
.ip
.dst_ip
, &ioc_id
->u
.ip
.dst_port
);
5690 if (IPFW_ISXLAT(s
->st_type
)) {
5691 const struct ipfw_xlat
*x
= (const struct ipfw_xlat
*)s
;
5693 if (x
->xlat_port
== 0)
5694 ioc_state
->xlat_port
= ioc_id
->u
.ip
.dst_port
;
5696 ioc_state
->xlat_port
= ntohs(x
->xlat_port
);
5697 ioc_state
->xlat_addr
= ntohl(x
->xlat_addr
);
5699 ioc_state
->pcnt
+= x
->xlat_pair
->xlat_pcnt
;
5700 ioc_state
->bcnt
+= x
->xlat_pair
->xlat_bcnt
;
5707 ipfw_state_copy_dispatch(netmsg_t nmsg
)
5709 struct netmsg_cpstate
*nm
= (struct netmsg_cpstate
*)nmsg
;
5710 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
5711 const struct ipfw_state
*s
;
5712 const struct ipfw_track
*t
;
5714 ASSERT_NETISR_NCPUS(mycpuid
);
5715 KASSERT(nm
->state_cnt
< nm
->state_cntmax
,
5716 ("invalid state count %d, max %d",
5717 nm
->state_cnt
, nm
->state_cntmax
));
5719 TAILQ_FOREACH(s
, &ctx
->ipfw_state_list
, st_link
) {
5720 if (ipfw_state_copy(s
, nm
->ioc_state
)) {
5723 if (nm
->state_cnt
== nm
->state_cntmax
)
5729 * Prepare tracks in the global track tree for userland.
5731 TAILQ_FOREACH(t
, &ctx
->ipfw_track_list
, t_link
) {
5732 struct ipfw_trkcnt
*trk
;
5734 if (t
->t_count
== NULL
) /* anchor */
5739 * Only one netisr can run this function at
5740 * any time, and only this function accesses
5741 * trkcnt's tc_expire, so this is safe w/o
5742 * ipfw_gd.ipfw_trkcnt_token.
5744 if (trk
->tc_expire
> t
->t_expire
)
5746 trk
->tc_expire
= t
->t_expire
;
5750 * Copy tracks in the global track tree to userland in
5753 if (mycpuid
== netisr_ncpus
- 1) {
5754 struct ipfw_trkcnt
*trk
;
5756 KASSERT(nm
->state_cnt
< nm
->state_cntmax
,
5757 ("invalid state count %d, max %d",
5758 nm
->state_cnt
, nm
->state_cntmax
));
5761 RB_FOREACH(trk
, ipfw_trkcnt_tree
, &ipfw_gd
.ipfw_trkcnt_tree
) {
5762 if (ipfw_track_copy(trk
, nm
->ioc_state
)) {
5765 if (nm
->state_cnt
== nm
->state_cntmax
) {
5774 if (nm
->state_cnt
== nm
->state_cntmax
) {
5775 /* No more space; done. */
5776 netisr_replymsg(&nm
->base
, 0);
5778 netisr_forwardmsg(&nm
->base
, mycpuid
+ 1);
5783 ipfw_ctl_get_rules(struct sockopt
*sopt
)
5785 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
5794 * pass up a copy of the current rules. Static rules
5795 * come first (the last of which has number IPFW_DEFAULT_RULE),
5796 * followed by a possibly empty list of states.
5799 size
= static_ioc_len
; /* size of static rules */
5802 * Size of the states.
5803 * XXX take tracks as state for userland compat.
5805 state_cnt
= ipfw_state_cntcoll() + ipfw_gd
.ipfw_trkcnt_cnt
;
5806 state_cnt
= (state_cnt
* 5) / 4; /* leave 25% headroom */
5807 size
+= state_cnt
* sizeof(struct ipfw_ioc_state
);
5809 if (sopt
->sopt_valsize
< size
) {
5810 /* short length, no need to return incomplete rules */
5811 /* XXX: if superuser, no need to zero buffer */
5812 bzero(sopt
->sopt_val
, sopt
->sopt_valsize
);
5815 bp
= sopt
->sopt_val
;
5817 for (rule
= ctx
->ipfw_layer3_chain
; rule
; rule
= rule
->next
)
5818 bp
= ipfw_copy_rule(ctx
, rule
, bp
);
5821 struct netmsg_cpstate nm
;
5823 size_t old_size
= size
;
5826 netmsg_init(&nm
.base
, NULL
, &curthread
->td_msgport
,
5827 MSGF_PRIORITY
, ipfw_state_copy_dispatch
);
5829 nm
.state_cntmax
= state_cnt
;
5831 netisr_domsg_global(&nm
.base
);
5834 * The # of states may be shrinked after the snapshot
5835 * of the state count was taken. To give user a correct
5836 * state count, nm->state_cnt is used to recalculate
5839 size
= static_ioc_len
+
5840 (nm
.state_cnt
* sizeof(struct ipfw_ioc_state
));
5841 KKASSERT(size
<= old_size
);
5844 sopt
->sopt_valsize
= size
;
5849 ipfw_set_disable_dispatch(netmsg_t nmsg
)
5851 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
5853 ASSERT_NETISR_NCPUS(mycpuid
);
5855 ctx
->ipfw_set_disable
= nmsg
->lmsg
.u
.ms_result32
;
5856 netisr_forwardmsg(&nmsg
->base
, mycpuid
+ 1);
5860 ipfw_ctl_set_disable(uint32_t disable
, uint32_t enable
)
5862 struct netmsg_base nmsg
;
5863 uint32_t set_disable
;
5867 /* IPFW_DEFAULT_SET is always enabled */
5868 enable
|= (1 << IPFW_DEFAULT_SET
);
5869 set_disable
= (ipfw_ctx
[mycpuid
]->ipfw_set_disable
| disable
) & ~enable
;
5871 bzero(&nmsg
, sizeof(nmsg
));
5872 netmsg_init(&nmsg
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
5873 ipfw_set_disable_dispatch
);
5874 nmsg
.lmsg
.u
.ms_result32
= set_disable
;
5876 netisr_domsg_global(&nmsg
);
5880 ipfw_table_create_dispatch(netmsg_t nm
)
5882 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
5883 int tblid
= nm
->lmsg
.u
.ms_result
;
5885 ASSERT_NETISR_NCPUS(mycpuid
);
5887 if (!rn_inithead(&ctx
->ipfw_tables
[tblid
], rn_cpumaskhead(mycpuid
),
5888 offsetof(struct sockaddr_in
, sin_addr
)))
5889 panic("ipfw: create table%d failed", tblid
);
5891 netisr_forwardmsg(&nm
->base
, mycpuid
+ 1);
5895 ipfw_table_create(struct sockopt
*sopt
)
5897 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
5898 struct ipfw_ioc_table
*tbl
;
5899 struct netmsg_base nm
;
5903 if (sopt
->sopt_valsize
!= sizeof(*tbl
))
5906 tbl
= sopt
->sopt_val
;
5907 if (tbl
->tableid
< 0 || tbl
->tableid
>= ipfw_table_max
)
5910 if (ctx
->ipfw_tables
[tbl
->tableid
] != NULL
)
5913 netmsg_init(&nm
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
5914 ipfw_table_create_dispatch
);
5915 nm
.lmsg
.u
.ms_result
= tbl
->tableid
;
5916 netisr_domsg_global(&nm
);
5922 ipfw_table_killent(struct radix_node
*rn
)
5924 struct ipfw_tblent
*te
;
5926 te
= (struct ipfw_tblent
*)rn
;
5931 ipfw_table_flush_oncpu(struct ipfw_context
*ctx
, int tableid
,
5934 struct radix_node_head
*rnh
;
5936 ASSERT_NETISR_NCPUS(mycpuid
);
5938 rnh
= ctx
->ipfw_tables
[tableid
];
5939 rn_flush(rnh
, ipfw_table_killent
);
5942 ctx
->ipfw_tables
[tableid
] = NULL
;
5947 ipfw_table_flush_dispatch(netmsg_t nmsg
)
5949 struct netmsg_tblflush
*nm
= (struct netmsg_tblflush
*)nmsg
;
5950 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
5952 ASSERT_NETISR_NCPUS(mycpuid
);
5954 ipfw_table_flush_oncpu(ctx
, nm
->tableid
, nm
->destroy
);
5955 netisr_forwardmsg(&nm
->base
, mycpuid
+ 1);
5959 ipfw_table_flushall_oncpu(struct ipfw_context
*ctx
, int destroy
)
5963 ASSERT_NETISR_NCPUS(mycpuid
);
5965 for (i
= 0; i
< ipfw_table_max
; ++i
) {
5966 if (ctx
->ipfw_tables
[i
] != NULL
)
5967 ipfw_table_flush_oncpu(ctx
, i
, destroy
);
5972 ipfw_table_flushall_dispatch(netmsg_t nmsg
)
5974 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
5976 ASSERT_NETISR_NCPUS(mycpuid
);
5978 ipfw_table_flushall_oncpu(ctx
, 0);
5979 netisr_forwardmsg(&nmsg
->base
, mycpuid
+ 1);
5983 ipfw_table_flush(struct sockopt
*sopt
)
5985 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
5986 struct ipfw_ioc_table
*tbl
;
5987 struct netmsg_tblflush nm
;
5991 if (sopt
->sopt_valsize
!= sizeof(*tbl
))
5994 tbl
= sopt
->sopt_val
;
5995 if (sopt
->sopt_name
== IP_FW_TBL_FLUSH
&& tbl
->tableid
< 0) {
5996 netmsg_init(&nm
.base
, NULL
, &curthread
->td_msgport
,
5997 MSGF_PRIORITY
, ipfw_table_flushall_dispatch
);
5998 netisr_domsg_global(&nm
.base
);
6002 if (tbl
->tableid
< 0 || tbl
->tableid
>= ipfw_table_max
)
6005 if (ctx
->ipfw_tables
[tbl
->tableid
] == NULL
)
6008 netmsg_init(&nm
.base
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
6009 ipfw_table_flush_dispatch
);
6010 nm
.tableid
= tbl
->tableid
;
6012 if (sopt
->sopt_name
== IP_FW_TBL_DESTROY
)
6014 netisr_domsg_global(&nm
.base
);
6020 ipfw_table_cntent(struct radix_node
*rn __unused
, void *xcnt
)
6029 ipfw_table_cpent(struct radix_node
*rn
, void *xcp
)
6031 struct ipfw_table_cp
*cp
= xcp
;
6032 struct ipfw_tblent
*te
= (struct ipfw_tblent
*)rn
;
6033 struct ipfw_ioc_tblent
*ioc_te
;
6038 KASSERT(cp
->te_idx
< cp
->te_cnt
, ("invalid table cp idx %d, cnt %d",
6039 cp
->te_idx
, cp
->te_cnt
));
6040 ioc_te
= &cp
->te
[cp
->te_idx
];
6042 if (te
->te_nodes
->rn_mask
!= NULL
) {
6043 memcpy(&ioc_te
->netmask
, te
->te_nodes
->rn_mask
,
6044 *te
->te_nodes
->rn_mask
);
6046 ioc_te
->netmask
.sin_len
= 0;
6048 memcpy(&ioc_te
->key
, &te
->te_key
, sizeof(ioc_te
->key
));
6050 ioc_te
->use
= te
->te_use
;
6051 ioc_te
->last_used
= te
->te_lastuse
;
6056 while ((te
= te
->te_sibling
) != NULL
) {
6060 ioc_te
->use
+= te
->te_use
;
6061 if (te
->te_lastuse
> ioc_te
->last_used
)
6062 ioc_te
->last_used
= te
->te_lastuse
;
6064 KASSERT(cnt
== netisr_ncpus
,
6065 ("invalid # of tblent %d, should be %d", cnt
, netisr_ncpus
));
6073 ipfw_table_get(struct sockopt
*sopt
)
6075 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
6076 struct radix_node_head
*rnh
;
6077 struct ipfw_ioc_table
*tbl
;
6078 struct ipfw_ioc_tblcont
*cont
;
6079 struct ipfw_table_cp cp
;
6084 if (sopt
->sopt_valsize
< sizeof(*tbl
))
6087 tbl
= sopt
->sopt_val
;
6088 if (tbl
->tableid
< 0) {
6089 struct ipfw_ioc_tbllist
*list
;
6093 * List available table ids.
6095 for (i
= 0; i
< ipfw_table_max
; ++i
) {
6096 if (ctx
->ipfw_tables
[i
] != NULL
)
6100 sz
= __offsetof(struct ipfw_ioc_tbllist
, tables
[cnt
]);
6101 if (sopt
->sopt_valsize
< sz
) {
6102 bzero(sopt
->sopt_val
, sopt
->sopt_valsize
);
6105 list
= sopt
->sopt_val
;
6106 list
->tablecnt
= cnt
;
6109 for (i
= 0; i
< ipfw_table_max
; ++i
) {
6110 if (ctx
->ipfw_tables
[i
] != NULL
) {
6111 KASSERT(cnt
< list
->tablecnt
,
6112 ("invalid idx %d, cnt %d",
6113 cnt
, list
->tablecnt
));
6114 list
->tables
[cnt
++] = i
;
6117 sopt
->sopt_valsize
= sz
;
6119 } else if (tbl
->tableid
>= ipfw_table_max
) {
6123 rnh
= ctx
->ipfw_tables
[tbl
->tableid
];
6126 rnh
->rnh_walktree(rnh
, ipfw_table_cntent
, &cnt
);
6128 sz
= __offsetof(struct ipfw_ioc_tblcont
, ent
[cnt
]);
6129 if (sopt
->sopt_valsize
< sz
) {
6130 bzero(sopt
->sopt_val
, sopt
->sopt_valsize
);
6133 cont
= sopt
->sopt_val
;
6139 rnh
->rnh_walktree(rnh
, ipfw_table_cpent
, &cp
);
6141 sopt
->sopt_valsize
= sz
;
6146 ipfw_table_add_dispatch(netmsg_t nmsg
)
6148 struct netmsg_tblent
*nm
= (struct netmsg_tblent
*)nmsg
;
6149 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
6150 struct radix_node_head
*rnh
;
6151 struct ipfw_tblent
*te
;
6153 ASSERT_NETISR_NCPUS(mycpuid
);
6155 rnh
= ctx
->ipfw_tables
[nm
->tableid
];
6157 te
= kmalloc(sizeof(*te
), M_IPFW
, M_WAITOK
| M_ZERO
);
6158 te
->te_nodes
->rn_key
= (char *)&te
->te_key
;
6159 memcpy(&te
->te_key
, nm
->key
, sizeof(te
->te_key
));
6161 if (rnh
->rnh_addaddr(&te
->te_key
, nm
->netmask
, rnh
, te
->te_nodes
)
6165 netisr_replymsg(&nm
->base
, EEXIST
);
6168 panic("rnh_addaddr failed");
6171 /* Link siblings. */
6172 if (nm
->sibling
!= NULL
)
6173 nm
->sibling
->te_sibling
= te
;
6176 netisr_forwardmsg(&nm
->base
, mycpuid
+ 1);
6180 ipfw_table_del_dispatch(netmsg_t nmsg
)
6182 struct netmsg_tblent
*nm
= (struct netmsg_tblent
*)nmsg
;
6183 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
6184 struct radix_node_head
*rnh
;
6185 struct radix_node
*rn
;
6187 ASSERT_NETISR_NCPUS(mycpuid
);
6189 rnh
= ctx
->ipfw_tables
[nm
->tableid
];
6190 rn
= rnh
->rnh_deladdr(nm
->key
, nm
->netmask
, rnh
);
6193 netisr_replymsg(&nm
->base
, ESRCH
);
6196 panic("rnh_deladdr failed");
6200 netisr_forwardmsg(&nm
->base
, mycpuid
+ 1);
6204 ipfw_table_alt(struct sockopt
*sopt
)
6206 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
6207 struct ipfw_ioc_tblcont
*tbl
;
6208 struct ipfw_ioc_tblent
*te
;
6209 struct sockaddr_in key0
;
6210 struct sockaddr
*netmask
= NULL
, *key
;
6211 struct netmsg_tblent nm
;
6215 if (sopt
->sopt_valsize
!= sizeof(*tbl
))
6217 tbl
= sopt
->sopt_val
;
6219 if (tbl
->tableid
< 0 || tbl
->tableid
>= ipfw_table_max
)
6221 if (tbl
->entcnt
!= 1)
6224 if (ctx
->ipfw_tables
[tbl
->tableid
] == NULL
)
6228 if (te
->key
.sin_family
!= AF_INET
||
6229 te
->key
.sin_port
!= 0 ||
6230 te
->key
.sin_len
!= sizeof(struct sockaddr_in
))
6232 key
= (struct sockaddr
*)&te
->key
;
6234 if (te
->netmask
.sin_len
!= 0) {
6235 if (te
->netmask
.sin_port
!= 0 ||
6236 te
->netmask
.sin_len
> sizeof(struct sockaddr_in
))
6238 netmask
= (struct sockaddr
*)&te
->netmask
;
6239 sa_maskedcopy(key
, (struct sockaddr
*)&key0
, netmask
);
6240 key
= (struct sockaddr
*)&key0
;
6243 if (sopt
->sopt_name
== IP_FW_TBL_ADD
) {
6244 netmsg_init(&nm
.base
, NULL
, &curthread
->td_msgport
,
6245 MSGF_PRIORITY
, ipfw_table_add_dispatch
);
6247 netmsg_init(&nm
.base
, NULL
, &curthread
->td_msgport
,
6248 MSGF_PRIORITY
, ipfw_table_del_dispatch
);
6251 nm
.netmask
= netmask
;
6252 nm
.tableid
= tbl
->tableid
;
6254 return (netisr_domsg_global(&nm
.base
));
6258 ipfw_table_zeroent(struct radix_node
*rn
, void *arg __unused
)
6260 struct ipfw_tblent
*te
= (struct ipfw_tblent
*)rn
;
6268 ipfw_table_zero_dispatch(netmsg_t nmsg
)
6270 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
6271 struct radix_node_head
*rnh
;
6273 ASSERT_NETISR_NCPUS(mycpuid
);
6275 rnh
= ctx
->ipfw_tables
[nmsg
->lmsg
.u
.ms_result
];
6276 rnh
->rnh_walktree(rnh
, ipfw_table_zeroent
, NULL
);
6278 netisr_forwardmsg(&nmsg
->base
, mycpuid
+ 1);
6282 ipfw_table_zeroall_dispatch(netmsg_t nmsg
)
6284 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
6287 ASSERT_NETISR_NCPUS(mycpuid
);
6289 for (i
= 0; i
< ipfw_table_max
; ++i
) {
6290 struct radix_node_head
*rnh
= ctx
->ipfw_tables
[i
];
6293 rnh
->rnh_walktree(rnh
, ipfw_table_zeroent
, NULL
);
6295 netisr_forwardmsg(&nmsg
->base
, mycpuid
+ 1);
6299 ipfw_table_zero(struct sockopt
*sopt
)
6301 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
6302 struct netmsg_base nm
;
6303 struct ipfw_ioc_table
*tbl
;
6307 if (sopt
->sopt_valsize
!= sizeof(*tbl
))
6309 tbl
= sopt
->sopt_val
;
6311 if (tbl
->tableid
< 0) {
6312 netmsg_init(&nm
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
6313 ipfw_table_zeroall_dispatch
);
6314 netisr_domsg_global(&nm
);
6316 } else if (tbl
->tableid
>= ipfw_table_max
) {
6318 } else if (ctx
->ipfw_tables
[tbl
->tableid
] == NULL
) {
6322 netmsg_init(&nm
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
6323 ipfw_table_zero_dispatch
);
6324 nm
.lmsg
.u
.ms_result
= tbl
->tableid
;
6325 netisr_domsg_global(&nm
);
6331 ipfw_table_killexp(struct radix_node
*rn
, void *xnm
)
6333 struct netmsg_tblexp
*nm
= xnm
;
6334 struct ipfw_tblent
*te
= (struct ipfw_tblent
*)rn
;
6335 struct radix_node
*ret
;
6337 if (te
->te_expired
) {
6338 ret
= nm
->rnh
->rnh_deladdr(rn
->rn_key
, rn
->rn_mask
, nm
->rnh
);
6340 panic("deleted other table entry");
6348 ipfw_table_expire_dispatch(netmsg_t nmsg
)
6350 struct netmsg_tblexp
*nm
= (struct netmsg_tblexp
*)nmsg
;
6351 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
6352 struct radix_node_head
*rnh
;
6354 ASSERT_NETISR_NCPUS(mycpuid
);
6356 rnh
= ctx
->ipfw_tables
[nm
->tableid
];
6358 rnh
->rnh_walktree(rnh
, ipfw_table_killexp
, nm
);
6360 KASSERT(nm
->expcnt
== nm
->cnt
* (mycpuid
+ 1),
6361 ("not all expired addresses (%d) were deleted (%d)",
6362 nm
->cnt
* (mycpuid
+ 1), nm
->expcnt
));
6364 netisr_forwardmsg(&nm
->base
, mycpuid
+ 1);
6368 ipfw_table_expireall_dispatch(netmsg_t nmsg
)
6370 struct netmsg_tblexp
*nm
= (struct netmsg_tblexp
*)nmsg
;
6371 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
6374 ASSERT_NETISR_NCPUS(mycpuid
);
6376 for (i
= 0; i
< ipfw_table_max
; ++i
) {
6377 struct radix_node_head
*rnh
= ctx
->ipfw_tables
[i
];
6382 rnh
->rnh_walktree(rnh
, ipfw_table_killexp
, nm
);
6385 KASSERT(nm
->expcnt
== nm
->cnt
* (mycpuid
+ 1),
6386 ("not all expired addresses (%d) were deleted (%d)",
6387 nm
->cnt
* (mycpuid
+ 1), nm
->expcnt
));
6389 netisr_forwardmsg(&nm
->base
, mycpuid
+ 1);
6393 ipfw_table_markexp(struct radix_node
*rn
, void *xnm
)
6395 struct netmsg_tblexp
*nm
= xnm
;
6396 struct ipfw_tblent
*te
;
6399 te
= (struct ipfw_tblent
*)rn
;
6400 lastuse
= te
->te_lastuse
;
6402 while ((te
= te
->te_sibling
) != NULL
) {
6403 if (te
->te_lastuse
> lastuse
)
6404 lastuse
= te
->te_lastuse
;
6406 if (!TIME_LEQ(lastuse
+ nm
->expire
, time_second
)) {
6411 te
= (struct ipfw_tblent
*)rn
;
6413 while ((te
= te
->te_sibling
) != NULL
)
6421 ipfw_table_expire(struct sockopt
*sopt
)
6423 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
6424 struct netmsg_tblexp nm
;
6425 struct ipfw_ioc_tblexp
*tbl
;
6426 struct radix_node_head
*rnh
;
6430 if (sopt
->sopt_valsize
!= sizeof(*tbl
))
6432 tbl
= sopt
->sopt_val
;
6437 nm
.expire
= tbl
->expire
;
6439 if (tbl
->tableid
< 0) {
6442 for (i
= 0; i
< ipfw_table_max
; ++i
) {
6443 rnh
= ctx
->ipfw_tables
[i
];
6446 rnh
->rnh_walktree(rnh
, ipfw_table_markexp
, &nm
);
6449 /* No addresses can be expired. */
6452 tbl
->expcnt
= nm
.cnt
;
6454 netmsg_init(&nm
.base
, NULL
, &curthread
->td_msgport
,
6455 MSGF_PRIORITY
, ipfw_table_expireall_dispatch
);
6457 netisr_domsg_global(&nm
.base
);
6458 KASSERT(nm
.expcnt
== nm
.cnt
* netisr_ncpus
,
6459 ("not all expired addresses (%d) were deleted (%d)",
6460 nm
.cnt
* netisr_ncpus
, nm
.expcnt
));
6463 } else if (tbl
->tableid
>= ipfw_table_max
) {
6467 rnh
= ctx
->ipfw_tables
[tbl
->tableid
];
6470 rnh
->rnh_walktree(rnh
, ipfw_table_markexp
, &nm
);
6472 /* No addresses can be expired. */
6475 tbl
->expcnt
= nm
.cnt
;
6477 netmsg_init(&nm
.base
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
6478 ipfw_table_expire_dispatch
);
6479 nm
.tableid
= tbl
->tableid
;
6480 netisr_domsg_global(&nm
.base
);
6481 KASSERT(nm
.expcnt
== nm
.cnt
* netisr_ncpus
,
6482 ("not all expired addresses (%d) were deleted (%d)",
6483 nm
.cnt
* netisr_ncpus
, nm
.expcnt
));
6488 ipfw_crossref_free_dispatch(netmsg_t nmsg
)
6490 struct ip_fw
*rule
= nmsg
->lmsg
.u
.ms_resultp
;
6492 KKASSERT((rule
->rule_flags
&
6493 (IPFW_RULE_F_CROSSREF
| IPFW_RULE_F_INVALID
)) ==
6494 (IPFW_RULE_F_CROSSREF
| IPFW_RULE_F_INVALID
));
6495 ipfw_free_rule(rule
);
6497 netisr_replymsg(&nmsg
->base
, 0);
6501 ipfw_crossref_reap(void)
6503 struct ip_fw
*rule
, *prev
= NULL
;
6507 rule
= ipfw_gd
.ipfw_crossref_free
;
6508 while (rule
!= NULL
) {
6509 uint64_t inflight
= 0;
6512 for (i
= 0; i
< netisr_ncpus
; ++i
)
6513 inflight
+= rule
->cross_rules
[i
]->cross_refs
;
6514 if (inflight
== 0) {
6515 struct ip_fw
*f
= rule
;
6524 ipfw_gd
.ipfw_crossref_free
= rule
;
6529 for (i
= 1; i
< netisr_ncpus
; ++i
) {
6530 struct netmsg_base nm
;
6532 netmsg_init(&nm
, NULL
, &curthread
->td_msgport
,
6533 MSGF_PRIORITY
, ipfw_crossref_free_dispatch
);
6534 nm
.lmsg
.u
.ms_resultp
= f
->cross_rules
[i
];
6535 netisr_domsg(&nm
, i
);
6537 KKASSERT((f
->rule_flags
&
6538 (IPFW_RULE_F_CROSSREF
| IPFW_RULE_F_INVALID
)) ==
6539 (IPFW_RULE_F_CROSSREF
| IPFW_RULE_F_INVALID
));
6547 if (ipfw_gd
.ipfw_crossref_free
!= NULL
) {
6548 callout_reset(&ipfw_gd
.ipfw_crossref_ch
, hz
,
6549 ipfw_crossref_timeo
, NULL
);
6554 * {set|get}sockopt parser.
6557 ipfw_ctl(struct sockopt
*sopt
)
6567 switch (sopt
->sopt_name
) {
6569 error
= ipfw_ctl_get_rules(sopt
);
6573 ipfw_flush(0 /* keep default rule */);
6577 error
= ipfw_ctl_add_rule(sopt
);
6582 * IP_FW_DEL is used for deleting single rules or sets,
6583 * and (ab)used to atomically manipulate sets.
6584 * Argument size is used to distinguish between the two:
6586 * delete single rule or set of rules,
6587 * or reassign rules (or sets) to a different set.
6588 * 2 * sizeof(uint32_t)
6589 * atomic disable/enable sets.
6590 * first uint32_t contains sets to be disabled,
6591 * second uint32_t contains sets to be enabled.
6593 masks
= sopt
->sopt_val
;
6594 size
= sopt
->sopt_valsize
;
6595 if (size
== sizeof(*masks
)) {
6597 * Delete or reassign static rule
6599 error
= ipfw_ctl_alter(masks
[0]);
6600 } else if (size
== (2 * sizeof(*masks
))) {
6602 * Set enable/disable
6604 ipfw_ctl_set_disable(masks
[0], masks
[1]);
6611 case IP_FW_RESETLOG
: /* argument is an int, the rule number */
6614 if (sopt
->sopt_val
!= 0) {
6615 error
= soopt_to_kbuf(sopt
, &rulenum
,
6616 sizeof(int), sizeof(int));
6620 error
= ipfw_ctl_zero_entry(rulenum
,
6621 sopt
->sopt_name
== IP_FW_RESETLOG
);
6624 case IP_FW_TBL_CREATE
:
6625 error
= ipfw_table_create(sopt
);
6630 error
= ipfw_table_alt(sopt
);
6633 case IP_FW_TBL_FLUSH
:
6634 case IP_FW_TBL_DESTROY
:
6635 error
= ipfw_table_flush(sopt
);
6639 error
= ipfw_table_get(sopt
);
6642 case IP_FW_TBL_ZERO
:
6643 error
= ipfw_table_zero(sopt
);
6646 case IP_FW_TBL_EXPIRE
:
6647 error
= ipfw_table_expire(sopt
);
6651 kprintf("ipfw_ctl invalid option %d\n", sopt
->sopt_name
);
6655 ipfw_crossref_reap();
6660 ipfw_keepalive_done(struct ipfw_context
*ctx
)
6663 KASSERT(ctx
->ipfw_flags
& IPFW_FLAG_KEEPALIVE
,
6664 ("keepalive is not in progress"));
6665 ctx
->ipfw_flags
&= ~IPFW_FLAG_KEEPALIVE
;
6666 callout_reset(&ctx
->ipfw_keepalive_ch
, dyn_keepalive_period
* hz
,
6667 ipfw_keepalive
, NULL
);
6671 ipfw_keepalive_more(struct ipfw_context
*ctx
)
6673 struct netmsg_base
*nm
= &ctx
->ipfw_keepalive_more
;
6675 KASSERT(ctx
->ipfw_flags
& IPFW_FLAG_KEEPALIVE
,
6676 ("keepalive is not in progress"));
6677 KASSERT(nm
->lmsg
.ms_flags
& MSGF_DONE
,
6678 ("keepalive more did not finish"));
6679 netisr_sendmsg_oncpu(nm
);
6683 ipfw_keepalive_loop(struct ipfw_context
*ctx
, struct ipfw_state
*anchor
)
6685 struct ipfw_state
*s
;
6686 int scanned
= 0, expired
= 0, kept
= 0;
6688 KASSERT(ctx
->ipfw_flags
& IPFW_FLAG_KEEPALIVE
,
6689 ("keepalive is not in progress"));
6691 while ((s
= TAILQ_NEXT(anchor
, st_link
)) != NULL
) {
6692 uint32_t ack_rev
, ack_fwd
;
6693 struct ipfw_flow_id id
;
6696 if (scanned
++ >= ipfw_state_scan_max
) {
6697 ipfw_keepalive_more(ctx
);
6701 TAILQ_REMOVE(&ctx
->ipfw_state_list
, anchor
, st_link
);
6702 TAILQ_INSERT_AFTER(&ctx
->ipfw_state_list
, s
, anchor
, st_link
);
6706 * Don't use IPFW_STATE_SCANSKIP; need to perform keepalive
6709 if (s
->st_type
== O_ANCHOR
)
6712 if (IPFW_STATE_ISDEAD(s
)) {
6713 ipfw_state_remove(ctx
, s
);
6714 if (++expired
>= ipfw_state_expire_max
) {
6715 ipfw_keepalive_more(ctx
);
6722 * Keep alive processing
6725 if (s
->st_proto
!= IPPROTO_TCP
)
6727 if ((s
->st_state
& IPFW_STATE_TCPSTATES
) != BOTH_SYN
)
6729 if (TIME_LEQ(time_uptime
+ dyn_keepalive_interval
,
6731 continue; /* too early */
6733 ipfw_key_4tuple(&s
->st_key
, &id
.src_ip
, &id
.src_port
,
6734 &id
.dst_ip
, &id
.dst_port
);
6735 ack_rev
= s
->st_ack_rev
;
6736 ack_fwd
= s
->st_ack_fwd
;
6738 #define SEND_FWD 0x1
6739 #define SEND_REV 0x2
6741 if (IPFW_ISXLAT(s
->st_type
)) {
6742 const struct ipfw_xlat
*x
= (const struct ipfw_xlat
*)s
;
6744 if (x
->xlat_dir
== MATCH_FORWARD
)
6745 send_dir
= SEND_FWD
;
6747 send_dir
= SEND_REV
;
6749 send_dir
= SEND_FWD
| SEND_REV
;
6752 if (send_dir
& SEND_REV
)
6753 send_pkt(&id
, ack_rev
- 1, ack_fwd
, TH_SYN
);
6754 if (send_dir
& SEND_FWD
)
6755 send_pkt(&id
, ack_fwd
- 1, ack_rev
, 0);
6760 if (++kept
>= ipfw_keepalive_max
) {
6761 ipfw_keepalive_more(ctx
);
6765 TAILQ_REMOVE(&ctx
->ipfw_state_list
, anchor
, st_link
);
6766 ipfw_keepalive_done(ctx
);
6770 ipfw_keepalive_more_dispatch(netmsg_t nm
)
6772 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
6773 struct ipfw_state
*anchor
;
6775 ASSERT_NETISR_NCPUS(mycpuid
);
6776 KASSERT(ctx
->ipfw_flags
& IPFW_FLAG_KEEPALIVE
,
6777 ("keepalive is not in progress"));
6780 netisr_replymsg(&nm
->base
, 0);
6782 anchor
= &ctx
->ipfw_keepalive_anch
;
6783 if (!dyn_keepalive
|| ctx
->ipfw_state_cnt
== 0) {
6784 TAILQ_REMOVE(&ctx
->ipfw_state_list
, anchor
, st_link
);
6785 ipfw_keepalive_done(ctx
);
6788 ipfw_keepalive_loop(ctx
, anchor
);
6792 * This procedure is only used to handle keepalives. It is invoked
6793 * every dyn_keepalive_period
6796 ipfw_keepalive_dispatch(netmsg_t nm
)
6798 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
6799 struct ipfw_state
*anchor
;
6801 ASSERT_NETISR_NCPUS(mycpuid
);
6802 KASSERT((ctx
->ipfw_flags
& IPFW_FLAG_KEEPALIVE
) == 0,
6803 ("keepalive is in progress"));
6804 ctx
->ipfw_flags
|= IPFW_FLAG_KEEPALIVE
;
6808 netisr_replymsg(&nm
->base
, 0);
6811 if (!dyn_keepalive
|| ctx
->ipfw_state_cnt
== 0) {
6812 ipfw_keepalive_done(ctx
);
6816 anchor
= &ctx
->ipfw_keepalive_anch
;
6817 TAILQ_INSERT_HEAD(&ctx
->ipfw_state_list
, anchor
, st_link
);
6818 ipfw_keepalive_loop(ctx
, anchor
);
6822 * This procedure is only used to handle keepalives. It is invoked
6823 * every dyn_keepalive_period
6826 ipfw_keepalive(void *dummy __unused
)
6828 struct netmsg_base
*msg
;
6830 KKASSERT(mycpuid
< netisr_ncpus
);
6831 msg
= &ipfw_ctx
[mycpuid
]->ipfw_keepalive_nm
;
6834 if (msg
->lmsg
.ms_flags
& MSGF_DONE
)
6835 netisr_sendmsg_oncpu(msg
);
6840 ipfw_ip_input_dispatch(netmsg_t nmsg
)
6842 struct netmsg_genpkt
*nm
= (struct netmsg_genpkt
*)nmsg
;
6843 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
6844 struct mbuf
*m
= nm
->m
;
6845 struct ip_fw
*rule
= nm
->arg1
;
6847 ASSERT_NETISR_NCPUS(mycpuid
);
6848 KASSERT(rule
->cpuid
== mycpuid
,
6849 ("rule does not belong to cpu%d", mycpuid
));
6850 KASSERT(m
->m_pkthdr
.fw_flags
& IPFW_MBUF_CONTINUE
,
6851 ("mbuf does not have ipfw continue rule"));
6853 KASSERT(ctx
->ipfw_cont_rule
== NULL
,
6854 ("pending ipfw continue rule"));
6855 ctx
->ipfw_cont_rule
= rule
;
6858 /* May not be cleared, if ipfw was unload/disabled. */
6859 ctx
->ipfw_cont_rule
= NULL
;
6862 * This rule is no longer used; decrement its cross_refs,
6863 * so this rule can be deleted.
6869 ipfw_defrag_redispatch(struct mbuf
*m
, int cpuid
, struct ip_fw
*rule
)
6871 struct netmsg_genpkt
*nm
;
6873 KASSERT(cpuid
!= mycpuid
, ("continue on the same cpu%d", cpuid
));
6877 * Bump cross_refs to prevent this rule and its siblings
6878 * from being deleted, while this mbuf is inflight. The
6879 * cross_refs of the sibling rule on the target cpu will
6880 * be decremented, once this mbuf is going to be filtered
6881 * on the target cpu.
6884 m
->m_pkthdr
.fw_flags
|= IPFW_MBUF_CONTINUE
;
6886 nm
= &m
->m_hdr
.mh_genmsg
;
6887 netmsg_init(&nm
->base
, NULL
, &netisr_apanic_rport
, 0,
6888 ipfw_ip_input_dispatch
);
6890 nm
->arg1
= rule
->cross_rules
[cpuid
];
6891 netisr_sendmsg(&nm
->base
, cpuid
);
6895 ipfw_init_args(struct ip_fw_args
*args
, struct mbuf
*m
, struct ifnet
*oif
)
6902 if (m
->m_pkthdr
.fw_flags
& DUMMYNET_MBUF_TAGGED
) {
6905 /* Extract info from dummynet tag */
6906 mtag
= m_tag_find(m
, PACKET_TAG_DUMMYNET
, NULL
);
6907 KKASSERT(mtag
!= NULL
);
6908 args
->rule
= ((struct dn_pkt
*)m_tag_data(mtag
))->dn_priv
;
6909 KKASSERT(args
->rule
!= NULL
);
6911 m_tag_delete(m
, mtag
);
6912 m
->m_pkthdr
.fw_flags
&= ~DUMMYNET_MBUF_TAGGED
;
6913 } else if (m
->m_pkthdr
.fw_flags
& IPFW_MBUF_CONTINUE
) {
6914 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
6916 KKASSERT(ctx
->ipfw_cont_rule
!= NULL
);
6917 args
->rule
= ctx
->ipfw_cont_rule
;
6918 ctx
->ipfw_cont_rule
= NULL
;
6920 if (ctx
->ipfw_cont_xlat
!= NULL
) {
6921 args
->xlat
= ctx
->ipfw_cont_xlat
;
6922 ctx
->ipfw_cont_xlat
= NULL
;
6923 if (m
->m_pkthdr
.fw_flags
& IPFW_MBUF_XLATINS
) {
6924 args
->flags
|= IP_FWARG_F_XLATINS
;
6925 m
->m_pkthdr
.fw_flags
&= ~IPFW_MBUF_XLATINS
;
6927 if (m
->m_pkthdr
.fw_flags
& IPFW_MBUF_XLATFWD
) {
6928 args
->flags
|= IP_FWARG_F_XLATFWD
;
6929 m
->m_pkthdr
.fw_flags
&= ~IPFW_MBUF_XLATFWD
;
6932 KKASSERT((m
->m_pkthdr
.fw_flags
&
6933 (IPFW_MBUF_XLATINS
| IPFW_MBUF_XLATFWD
)) == 0);
6935 args
->flags
|= IP_FWARG_F_CONT
;
6936 m
->m_pkthdr
.fw_flags
&= ~IPFW_MBUF_CONTINUE
;
6945 ipfw_check_in(void *arg
, struct mbuf
**m0
, struct ifnet
*ifp
, int dir
)
6947 struct ip_fw_args args
;
6948 struct mbuf
*m
= *m0
;
6949 int tee
= 0, error
= 0, ret
;
6951 ipfw_init_args(&args
, m
, NULL
);
6953 ret
= ipfw_chk(&args
);
6956 if (ret
!= IP_FW_REDISPATCH
)
6971 case IP_FW_DUMMYNET
:
6972 /* Send packet to the appropriate pipe */
6973 m
= ipfw_dummynet_io(m
, args
.cookie
, DN_TO_IP_IN
, &args
);
6982 * Must clear bridge tag when changing
6984 m
->m_pkthdr
.fw_flags
&= ~BRIDGE_MBUF_TAGGED
;
6985 if (ip_divert_p
!= NULL
) {
6986 m
= ip_divert_p(m
, tee
, 1);
6990 /* not sure this is the right error msg */
6996 panic("unknown ipfw return value: %d", ret
);
7004 ipfw_check_out(void *arg
, struct mbuf
**m0
, struct ifnet
*ifp
, int dir
)
7006 struct ip_fw_args args
;
7007 struct mbuf
*m
= *m0
;
7008 int tee
= 0, error
= 0, ret
;
7010 ipfw_init_args(&args
, m
, ifp
);
7012 ret
= ipfw_chk(&args
);
7015 if (ret
!= IP_FW_REDISPATCH
)
7030 case IP_FW_DUMMYNET
:
7031 m
= ipfw_dummynet_io(m
, args
.cookie
, DN_TO_IP_OUT
, &args
);
7039 if (ip_divert_p
!= NULL
) {
7040 m
= ip_divert_p(m
, tee
, 0);
7044 /* not sure this is the right error msg */
7050 panic("unknown ipfw return value: %d", ret
);
7060 struct pfil_head
*pfh
;
7064 pfh
= pfil_head_get(PFIL_TYPE_AF
, AF_INET
);
7068 pfil_add_hook(ipfw_check_in
, NULL
, PFIL_IN
, pfh
);
7069 pfil_add_hook(ipfw_check_out
, NULL
, PFIL_OUT
, pfh
);
7075 struct pfil_head
*pfh
;
7079 pfh
= pfil_head_get(PFIL_TYPE_AF
, AF_INET
);
7083 pfil_remove_hook(ipfw_check_in
, NULL
, PFIL_IN
, pfh
);
7084 pfil_remove_hook(ipfw_check_out
, NULL
, PFIL_OUT
, pfh
);
7088 ipfw_sysctl_dyncnt(SYSCTL_HANDLER_ARGS
)
7092 dyn_cnt
= ipfw_state_cntcoll();
7093 dyn_cnt
+= ipfw_gd
.ipfw_trkcnt_cnt
;
7095 return (sysctl_handle_int(oidp
, &dyn_cnt
, 0, req
));
7099 ipfw_sysctl_statecnt(SYSCTL_HANDLER_ARGS
)
7103 state_cnt
= ipfw_state_cntcoll();
7104 return (sysctl_handle_int(oidp
, &state_cnt
, 0, req
));
7108 ipfw_sysctl_statemax(SYSCTL_HANDLER_ARGS
)
7110 int state_max
, error
;
7112 state_max
= ipfw_state_max
;
7113 error
= sysctl_handle_int(oidp
, &state_max
, 0, req
);
7114 if (error
|| req
->newptr
== NULL
)
7120 ipfw_state_max_set(state_max
);
7125 ipfw_sysctl_dynmax(SYSCTL_HANDLER_ARGS
)
7129 dyn_max
= ipfw_state_max
+ ipfw_track_max
;
7131 error
= sysctl_handle_int(oidp
, &dyn_max
, 0, req
);
7132 if (error
|| req
->newptr
== NULL
)
7138 ipfw_state_max_set(dyn_max
/ 2);
7139 ipfw_track_max
= dyn_max
/ 2;
7144 ipfw_sysctl_enable_dispatch(netmsg_t nmsg
)
7146 int enable
= nmsg
->lmsg
.u
.ms_result
;
7150 if (fw_enable
== enable
)
7159 netisr_replymsg(&nmsg
->base
, 0);
7163 ipfw_sysctl_enable(SYSCTL_HANDLER_ARGS
)
7165 struct netmsg_base nmsg
;
7169 error
= sysctl_handle_int(oidp
, &enable
, 0, req
);
7170 if (error
|| req
->newptr
== NULL
)
7173 netmsg_init(&nmsg
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
7174 ipfw_sysctl_enable_dispatch
);
7175 nmsg
.lmsg
.u
.ms_result
= enable
;
7177 return netisr_domsg(&nmsg
, 0);
7181 ipfw_sysctl_autoinc_step(SYSCTL_HANDLER_ARGS
)
7183 return sysctl_int_range(oidp
, arg1
, arg2
, req
,
7184 IPFW_AUTOINC_STEP_MIN
, IPFW_AUTOINC_STEP_MAX
);
7188 ipfw_sysctl_scancnt(SYSCTL_HANDLER_ARGS
)
7191 return sysctl_int_range(oidp
, arg1
, arg2
, req
, 1, INT_MAX
);
7195 ipfw_sysctl_stat(SYSCTL_HANDLER_ARGS
)
7200 for (cpu
= 0; cpu
< netisr_ncpus
; ++cpu
)
7201 stat
+= *((u_long
*)((uint8_t *)ipfw_ctx
[cpu
] + arg2
));
7203 error
= sysctl_handle_long(oidp
, &stat
, 0, req
);
7204 if (error
|| req
->newptr
== NULL
)
7207 /* Zero out this stat. */
7208 for (cpu
= 0; cpu
< netisr_ncpus
; ++cpu
)
7209 *((u_long
*)((uint8_t *)ipfw_ctx
[cpu
] + arg2
)) = 0;
7214 ipfw_ctx_init_dispatch(netmsg_t nmsg
)
7216 struct netmsg_ipfw
*fwmsg
= (struct netmsg_ipfw
*)nmsg
;
7217 struct ipfw_context
*ctx
;
7218 struct ip_fw
*def_rule
;
7220 ASSERT_NETISR_NCPUS(mycpuid
);
7222 ctx
= kmalloc(__offsetof(struct ipfw_context
,
7223 ipfw_tables
[ipfw_table_max
]), M_IPFW
, M_WAITOK
| M_ZERO
);
7225 RB_INIT(&ctx
->ipfw_state_tree
);
7226 TAILQ_INIT(&ctx
->ipfw_state_list
);
7228 RB_INIT(&ctx
->ipfw_track_tree
);
7229 TAILQ_INIT(&ctx
->ipfw_track_list
);
7231 callout_init_mp(&ctx
->ipfw_stateto_ch
);
7232 netmsg_init(&ctx
->ipfw_stateexp_nm
, NULL
, &netisr_adone_rport
,
7233 MSGF_DROPABLE
| MSGF_PRIORITY
, ipfw_state_expire_dispatch
);
7234 ctx
->ipfw_stateexp_anch
.st_type
= O_ANCHOR
;
7235 netmsg_init(&ctx
->ipfw_stateexp_more
, NULL
, &netisr_adone_rport
,
7236 MSGF_DROPABLE
, ipfw_state_expire_more_dispatch
);
7238 callout_init_mp(&ctx
->ipfw_trackto_ch
);
7239 netmsg_init(&ctx
->ipfw_trackexp_nm
, NULL
, &netisr_adone_rport
,
7240 MSGF_DROPABLE
| MSGF_PRIORITY
, ipfw_track_expire_dispatch
);
7241 netmsg_init(&ctx
->ipfw_trackexp_more
, NULL
, &netisr_adone_rport
,
7242 MSGF_DROPABLE
, ipfw_track_expire_more_dispatch
);
7244 callout_init_mp(&ctx
->ipfw_keepalive_ch
);
7245 netmsg_init(&ctx
->ipfw_keepalive_nm
, NULL
, &netisr_adone_rport
,
7246 MSGF_DROPABLE
| MSGF_PRIORITY
, ipfw_keepalive_dispatch
);
7247 ctx
->ipfw_keepalive_anch
.st_type
= O_ANCHOR
;
7248 netmsg_init(&ctx
->ipfw_keepalive_more
, NULL
, &netisr_adone_rport
,
7249 MSGF_DROPABLE
, ipfw_keepalive_more_dispatch
);
7251 callout_init_mp(&ctx
->ipfw_xlatreap_ch
);
7252 netmsg_init(&ctx
->ipfw_xlatreap_nm
, NULL
, &netisr_adone_rport
,
7253 MSGF_DROPABLE
| MSGF_PRIORITY
, ipfw_xlat_reap_dispatch
);
7254 TAILQ_INIT(&ctx
->ipfw_xlatreap
);
7256 ipfw_ctx
[mycpuid
] = ctx
;
7258 def_rule
= kmalloc(sizeof(*def_rule
), M_IPFW
, M_WAITOK
| M_ZERO
);
7260 def_rule
->act_ofs
= 0;
7261 def_rule
->rulenum
= IPFW_DEFAULT_RULE
;
7262 def_rule
->cmd_len
= 1;
7263 def_rule
->set
= IPFW_DEFAULT_SET
;
7265 def_rule
->cmd
[0].len
= 1;
7266 #ifdef IPFIREWALL_DEFAULT_TO_ACCEPT
7267 def_rule
->cmd
[0].opcode
= O_ACCEPT
;
7269 if (filters_default_to_accept
)
7270 def_rule
->cmd
[0].opcode
= O_ACCEPT
;
7272 def_rule
->cmd
[0].opcode
= O_DENY
;
7275 def_rule
->refcnt
= 1;
7276 def_rule
->cpuid
= mycpuid
;
7278 /* Install the default rule */
7279 ctx
->ipfw_default_rule
= def_rule
;
7280 ctx
->ipfw_layer3_chain
= def_rule
;
7282 /* Link rule CPU sibling */
7283 ipfw_link_sibling(fwmsg
, def_rule
);
7285 /* Statistics only need to be updated once */
7287 ipfw_inc_static_count(def_rule
);
7289 netisr_forwardmsg(&nmsg
->base
, mycpuid
+ 1);
7293 ipfw_crossref_reap_dispatch(netmsg_t nmsg
)
7298 netisr_replymsg(&nmsg
->base
, 0);
7300 ipfw_crossref_reap();
7304 ipfw_crossref_timeo(void *dummy __unused
)
7306 struct netmsg_base
*msg
= &ipfw_gd
.ipfw_crossref_nm
;
7308 KKASSERT(mycpuid
== 0);
7311 if (msg
->lmsg
.ms_flags
& MSGF_DONE
)
7312 netisr_sendmsg_oncpu(msg
);
7317 ipfw_ifaddr_dispatch(netmsg_t nmsg
)
7319 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
7320 struct ifnet
*ifp
= nmsg
->lmsg
.u
.ms_resultp
;
7323 ASSERT_NETISR_NCPUS(mycpuid
);
7325 for (f
= ctx
->ipfw_layer3_chain
; f
!= NULL
; f
= f
->next
) {
7329 if ((f
->rule_flags
& IPFW_RULE_F_DYNIFADDR
) == 0)
7332 for (l
= f
->cmd_len
, cmd
= f
->cmd
; l
> 0;
7333 l
-= cmdlen
, cmd
+= cmdlen
) {
7334 cmdlen
= F_LEN(cmd
);
7335 if (cmd
->opcode
== O_IP_SRC_IFIP
||
7336 cmd
->opcode
== O_IP_DST_IFIP
) {
7337 if (strncmp(ifp
->if_xname
,
7338 ((ipfw_insn_ifip
*)cmd
)->ifname
,
7340 cmd
->arg1
&= ~IPFW_IFIP_VALID
;
7344 netisr_forwardmsg(&nmsg
->base
, mycpuid
+ 1);
7348 ipfw_ifaddr(void *arg __unused
, struct ifnet
*ifp
,
7349 enum ifaddr_event event __unused
, struct ifaddr
*ifa __unused
)
7351 struct netmsg_base nm
;
7353 netmsg_init(&nm
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
7354 ipfw_ifaddr_dispatch
);
7355 nm
.lmsg
.u
.ms_resultp
= ifp
;
7356 netisr_domsg_global(&nm
);
7360 ipfw_init_dispatch(netmsg_t nmsg
)
7362 struct netmsg_ipfw fwmsg
;
7368 kprintf("IP firewall already loaded\n");
7373 if (ipfw_table_max
> UINT16_MAX
|| ipfw_table_max
<= 0)
7374 ipfw_table_max
= UINT16_MAX
;
7376 /* Initialize global track tree. */
7377 RB_INIT(&ipfw_gd
.ipfw_trkcnt_tree
);
7378 IPFW_TRKCNT_TOKINIT
;
7380 /* GC for freed crossref rules. */
7381 callout_init_mp(&ipfw_gd
.ipfw_crossref_ch
);
7382 netmsg_init(&ipfw_gd
.ipfw_crossref_nm
, NULL
, &netisr_adone_rport
,
7383 MSGF_PRIORITY
| MSGF_DROPABLE
, ipfw_crossref_reap_dispatch
);
7385 ipfw_state_max_set(ipfw_state_max
);
7386 ipfw_state_headroom
= 8 * netisr_ncpus
;
7388 bzero(&fwmsg
, sizeof(fwmsg
));
7389 netmsg_init(&fwmsg
.base
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
7390 ipfw_ctx_init_dispatch
);
7391 netisr_domsg_global(&fwmsg
.base
);
7393 ip_fw_chk_ptr
= ipfw_chk
;
7394 ip_fw_ctl_ptr
= ipfw_ctl
;
7395 ip_fw_dn_io_ptr
= ipfw_dummynet_io
;
7397 kprintf("ipfw2 initialized, default to %s, logging ",
7398 ipfw_ctx
[mycpuid
]->ipfw_default_rule
->cmd
[0].opcode
==
7399 O_ACCEPT
? "accept" : "deny");
7401 #ifdef IPFIREWALL_VERBOSE
7404 #ifdef IPFIREWALL_VERBOSE_LIMIT
7405 verbose_limit
= IPFIREWALL_VERBOSE_LIMIT
;
7407 if (fw_verbose
== 0) {
7408 kprintf("disabled\n");
7409 } else if (verbose_limit
== 0) {
7410 kprintf("unlimited\n");
7412 kprintf("limited to %d packets/entry by default\n",
7417 for (cpu
= 0; cpu
< netisr_ncpus
; ++cpu
) {
7418 callout_reset_bycpu(&ipfw_ctx
[cpu
]->ipfw_stateto_ch
, hz
,
7419 ipfw_state_expire_ipifunc
, NULL
, cpu
);
7420 callout_reset_bycpu(&ipfw_ctx
[cpu
]->ipfw_trackto_ch
, hz
,
7421 ipfw_track_expire_ipifunc
, NULL
, cpu
);
7422 callout_reset_bycpu(&ipfw_ctx
[cpu
]->ipfw_keepalive_ch
, hz
,
7423 ipfw_keepalive
, NULL
, cpu
);
7429 ipfw_ifaddr_event
= EVENTHANDLER_REGISTER(ifaddr_event
, ipfw_ifaddr
,
7430 NULL
, EVENTHANDLER_PRI_ANY
);
7431 if (ipfw_ifaddr_event
== NULL
)
7432 kprintf("ipfw: ifaddr_event register failed\n");
7435 netisr_replymsg(&nmsg
->base
, error
);
7441 struct netmsg_base smsg
;
7443 netmsg_init(&smsg
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
7444 ipfw_init_dispatch
);
7445 return netisr_domsg(&smsg
, 0);
7451 ipfw_ctx_fini_dispatch(netmsg_t nmsg
)
7453 struct ipfw_context
*ctx
= ipfw_ctx
[mycpuid
];
7455 ASSERT_NETISR_NCPUS(mycpuid
);
7457 callout_cancel(&ctx
->ipfw_stateto_ch
);
7458 callout_cancel(&ctx
->ipfw_trackto_ch
);
7459 callout_cancel(&ctx
->ipfw_keepalive_ch
);
7460 callout_cancel(&ctx
->ipfw_xlatreap_ch
);
7463 netisr_dropmsg(&ctx
->ipfw_stateexp_more
);
7464 netisr_dropmsg(&ctx
->ipfw_stateexp_nm
);
7465 netisr_dropmsg(&ctx
->ipfw_trackexp_more
);
7466 netisr_dropmsg(&ctx
->ipfw_trackexp_nm
);
7467 netisr_dropmsg(&ctx
->ipfw_keepalive_more
);
7468 netisr_dropmsg(&ctx
->ipfw_keepalive_nm
);
7469 netisr_dropmsg(&ctx
->ipfw_xlatreap_nm
);
7472 ipfw_table_flushall_oncpu(ctx
, 1);
7474 netisr_forwardmsg(&nmsg
->base
, mycpuid
+ 1);
7478 ipfw_fini_dispatch(netmsg_t nmsg
)
7480 struct netmsg_base nm
;
7485 ipfw_crossref_reap();
7487 if (ipfw_gd
.ipfw_refcnt
!= 0) {
7495 /* Synchronize any inflight state/track expire IPIs. */
7496 lwkt_synchronize_ipiqs("ipfwfini");
7498 netmsg_init(&nm
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
7499 ipfw_ctx_fini_dispatch
);
7500 netisr_domsg_global(&nm
);
7502 callout_cancel(&ipfw_gd
.ipfw_crossref_ch
);
7504 netisr_dropmsg(&ipfw_gd
.ipfw_crossref_nm
);
7507 if (ipfw_ifaddr_event
!= NULL
)
7508 EVENTHANDLER_DEREGISTER(ifaddr_event
, ipfw_ifaddr_event
);
7510 ip_fw_chk_ptr
= NULL
;
7511 ip_fw_ctl_ptr
= NULL
;
7512 ip_fw_dn_io_ptr
= NULL
;
7513 ipfw_flush(1 /* kill default rule */);
7515 /* Free pre-cpu context */
7516 for (cpu
= 0; cpu
< netisr_ncpus
; ++cpu
)
7517 kfree(ipfw_ctx
[cpu
], M_IPFW
);
7519 kprintf("IP firewall unloaded\n");
7521 netisr_replymsg(&nmsg
->base
, error
);
7525 ipfw_fflush_dispatch(netmsg_t nmsg
)
7528 ipfw_flush(0 /* keep default rule */);
7529 ipfw_crossref_reap();
7530 netisr_replymsg(&nmsg
->base
, 0);
7536 struct netmsg_base smsg
;
7540 netmsg_init(&smsg
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
7541 ipfw_fflush_dispatch
);
7542 netisr_domsg(&smsg
, 0);
7544 if (ipfw_gd
.ipfw_refcnt
== 0)
7546 kprintf("ipfw: flush pending %d\n", ++i
);
7547 tsleep(&smsg
, 0, "ipfwff", (3 * hz
) / 2);
7550 netmsg_init(&smsg
, NULL
, &curthread
->td_msgport
, MSGF_PRIORITY
,
7551 ipfw_fini_dispatch
);
7552 return netisr_domsg(&smsg
, 0);
7555 #endif /* KLD_MODULE */
7558 ipfw_modevent(module_t mod
, int type
, void *unused
)
7569 kprintf("ipfw statically compiled, cannot unload\n");
7581 static moduledata_t ipfwmod
= {
7586 DECLARE_MODULE(ipfw
, ipfwmod
, SI_SUB_PROTO_END
, SI_ORDER_ANY
);
7587 MODULE_VERSION(ipfw
, 1);