2 * Copyright (C) 1993-2001, 2003 by Darren Reed.
4 * See the IPFILTER.LICENCE file for details on licencing.
6 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
8 * Copyright (c) 2015, Joyent, Inc. All rights reserved.
12 static const char sccsid
[] = "@(#)ip_fil_solaris.c 1.7 07/22/06 (C) 1993-2000 Darren Reed";
13 static const char rcsid
[] = "@(#)$Id: ip_fil_solaris.c,v 2.62.2.19 2005/07/13 21:40:46 darrenr Exp $";
16 #include <sys/types.h>
17 #include <sys/errno.h>
18 #include <sys/param.h>
19 #include <sys/cpuvar.h>
21 #include <sys/ioctl.h>
22 #include <sys/filio.h>
23 #include <sys/systm.h>
24 #include <sys/strsubr.h>
27 #include <sys/sunddi.h>
28 #include <sys/ksynch.h>
30 #include <sys/mkdev.h>
31 #include <sys/protosw.h>
32 #include <sys/socket.h>
33 #include <sys/dditypes.h>
34 #include <sys/cmn_err.h>
38 #include <net/route.h>
39 #include <netinet/in.h>
40 #include <netinet/in_systm.h>
41 #include <netinet/ip.h>
42 #include <netinet/ip_var.h>
43 #include <netinet/tcp.h>
44 #include <netinet/udp.h>
45 #include <netinet/tcpip.h>
46 #include <netinet/ip_icmp.h>
47 #include "netinet/ip_compat.h"
49 # include <netinet/icmp6.h>
51 #include "netinet/ip_fil.h"
52 #include "netinet/ip_nat.h"
53 #include "netinet/ip_frag.h"
54 #include "netinet/ip_state.h"
55 #include "netinet/ip_auth.h"
56 #include "netinet/ip_proxy.h"
57 #include "netinet/ipf_stack.h"
58 #ifdef IPFILTER_LOOKUP
59 # include "netinet/ip_lookup.h"
61 #include <inet/ip_ire.h>
66 static int frzerostats
__P((caddr_t
, ipf_stack_t
*));
67 static int fr_setipfloopback
__P((int, ipf_stack_t
*));
68 static int fr_enableipf
__P((ipf_stack_t
*, int));
69 static int fr_send_ip
__P((fr_info_t
*fin
, mblk_t
*m
, mblk_t
**mp
));
70 static int ipf_nic_event_v4
__P((hook_event_token_t
, hook_data_t
, void *));
71 static int ipf_nic_event_v6
__P((hook_event_token_t
, hook_data_t
, void *));
72 static int ipf_hook
__P((hook_data_t
, int, int, void *));
73 static int ipf_hook4_in
__P((hook_event_token_t
, hook_data_t
, void *));
74 static int ipf_hook4_out
__P((hook_event_token_t
, hook_data_t
, void *));
75 static int ipf_hook4_loop_out
__P((hook_event_token_t
, hook_data_t
,
77 static int ipf_hook4_loop_in
__P((hook_event_token_t
, hook_data_t
, void *));
78 static int ipf_hook4
__P((hook_data_t
, int, int, void *));
79 static int ipf_hook6_out
__P((hook_event_token_t
, hook_data_t
, void *));
80 static int ipf_hook6_in
__P((hook_event_token_t
, hook_data_t
, void *));
81 static int ipf_hook6_loop_out
__P((hook_event_token_t
, hook_data_t
,
83 static int ipf_hook6_loop_in
__P((hook_event_token_t
, hook_data_t
,
85 static int ipf_hook6
__P((hook_data_t
, int, int, void *));
86 extern int ipf_geniter
__P((ipftoken_t
*, ipfgeniter_t
*, ipf_stack_t
*));
87 extern int ipf_frruleiter
__P((void *, int, void *, ipf_stack_t
*));
91 u_int
*ip_ttl_ptr
= NULL
;
92 u_int
*ip_mtudisc
= NULL
;
94 int *ip_forwarding
= NULL
;
95 u_int
*ip6_forwarding
= NULL
;
97 u_int
*ip_forwarding
= NULL
;
100 u_long
*ip_ttl_ptr
= NULL
;
101 u_long
*ip_mtudisc
= NULL
;
102 u_long
*ip_forwarding
= NULL
;
106 vmem_t
*ipf_minor
; /* minor number arena */
107 void *ipf_state
; /* DDI state */
110 * GZ-controlled and per-zone stacks:
112 * For each non-global zone, we create two ipf stacks: the per-zone stack and
113 * the GZ-controlled stack. The per-zone stack can be controlled and observed
114 * from inside the zone or from the global zone. The GZ-controlled stack can
115 * only be controlled and observed from the global zone (though the rules
116 * still only affect that non-global zone).
118 * The two hooks are always arranged so that the GZ-controlled stack is always
119 * "outermost" with respect to the zone. The traffic flow then looks like
124 * nic ---> [ GZ-controlled rules ] ---> [ per-zone rules ] ---> zone
128 * nic <--- [ GZ-controlled rules ] <--- [ per-zone rules ] <--- zone
131 /* IPv4 hook names */
132 char *hook4_nicevents
= "ipfilter_hook4_nicevents";
133 char *hook4_nicevents_gz
= "ipfilter_hook4_nicevents_gz";
134 char *hook4_in
= "ipfilter_hook4_in";
135 char *hook4_in_gz
= "ipfilter_hook4_in_gz";
136 char *hook4_out
= "ipfilter_hook4_out";
137 char *hook4_out_gz
= "ipfilter_hook4_out_gz";
138 char *hook4_loop_in
= "ipfilter_hook4_loop_in";
139 char *hook4_loop_in_gz
= "ipfilter_hook4_loop_in_gz";
140 char *hook4_loop_out
= "ipfilter_hook4_loop_out";
141 char *hook4_loop_out_gz
= "ipfilter_hook4_loop_out_gz";
143 /* IPv6 hook names */
144 char *hook6_nicevents
= "ipfilter_hook6_nicevents";
145 char *hook6_nicevents_gz
= "ipfilter_hook6_nicevents_gz";
146 char *hook6_in
= "ipfilter_hook6_in";
147 char *hook6_in_gz
= "ipfilter_hook6_in_gz";
148 char *hook6_out
= "ipfilter_hook6_out";
149 char *hook6_out_gz
= "ipfilter_hook6_out_gz";
150 char *hook6_loop_in
= "ipfilter_hook6_loop_in";
151 char *hook6_loop_in_gz
= "ipfilter_hook6_loop_in_gz";
152 char *hook6_loop_out
= "ipfilter_hook6_loop_out";
153 char *hook6_loop_out_gz
= "ipfilter_hook6_loop_out_gz";
155 /* ------------------------------------------------------------------------ */
156 /* Function: ipldetach */
157 /* Returns: int - 0 == success, else error. */
158 /* Parameters: Nil */
160 /* This function is responsible for undoing anything that might have been */
161 /* done in a call to iplattach(). It must be able to clean up from a call */
162 /* to iplattach() that did not succeed. Why might that happen? Someone */
163 /* configures a table to be so large that we cannot allocate enough memory */
165 /* ------------------------------------------------------------------------ */
170 ASSERT(RW_WRITE_HELD(&ifs
->ifs_ipf_global
.ipf_lk
));
174 if (ifs
->ifs_fr_control_forwarding
& 2) {
175 if (ip_forwarding
!= NULL
)
178 if (ip6_forwarding
!= NULL
)
185 * This lock needs to be dropped around the net_hook_unregister calls
186 * because we can deadlock here with:
187 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs
188 * R(hook_family)->R(hei_lock)->R(ipf_global) (active hook running)
190 RWLOCK_EXIT(&ifs
->ifs_ipf_global
);
192 #define UNDO_HOOK(_f, _b, _e, _h) \
194 if (ifs->_f != NULL) { \
196 int tmp = net_hook_unregister(ifs->_f, \
198 ifs->_b = (tmp != 0 && tmp != ENXIO); \
199 if (!ifs->_b && ifs->_h != NULL) { \
200 hook_free(ifs->_h); \
203 } else if (ifs->_h != NULL) { \
204 hook_free(ifs->_h); \
214 if (ifs
->ifs_ipf_ipv6
!= NULL
) {
215 UNDO_HOOK(ifs_ipf_ipv6
, ifs_hook6_physical_in
,
216 NH_PHYSICAL_IN
, ifs_ipfhook6_in
);
217 UNDO_HOOK(ifs_ipf_ipv6
, ifs_hook6_physical_out
,
218 NH_PHYSICAL_OUT
, ifs_ipfhook6_out
);
219 UNDO_HOOK(ifs_ipf_ipv6
, ifs_hook6_nic_events
,
220 NH_NIC_EVENTS
, ifs_ipfhook6_nicevents
);
221 UNDO_HOOK(ifs_ipf_ipv6
, ifs_hook6_loopback_in
,
222 NH_LOOPBACK_IN
, ifs_ipfhook6_loop_in
);
223 UNDO_HOOK(ifs_ipf_ipv6
, ifs_hook6_loopback_out
,
224 NH_LOOPBACK_OUT
, ifs_ipfhook6_loop_out
);
226 if (net_protocol_release(ifs
->ifs_ipf_ipv6
) != 0)
228 ifs
->ifs_ipf_ipv6
= NULL
;
234 if (ifs
->ifs_ipf_ipv4
!= NULL
) {
235 UNDO_HOOK(ifs_ipf_ipv4
, ifs_hook4_physical_in
,
236 NH_PHYSICAL_IN
, ifs_ipfhook4_in
);
237 UNDO_HOOK(ifs_ipf_ipv4
, ifs_hook4_physical_out
,
238 NH_PHYSICAL_OUT
, ifs_ipfhook4_out
);
239 UNDO_HOOK(ifs_ipf_ipv4
, ifs_hook4_nic_events
,
240 NH_NIC_EVENTS
, ifs_ipfhook4_nicevents
);
241 UNDO_HOOK(ifs_ipf_ipv4
, ifs_hook4_loopback_in
,
242 NH_LOOPBACK_IN
, ifs_ipfhook4_loop_in
);
243 UNDO_HOOK(ifs_ipf_ipv4
, ifs_hook4_loopback_out
,
244 NH_LOOPBACK_OUT
, ifs_ipfhook4_loop_out
);
246 if (net_protocol_release(ifs
->ifs_ipf_ipv4
) != 0)
248 ifs
->ifs_ipf_ipv4
= NULL
;
254 cmn_err(CE_CONT
, "ipldetach()\n");
257 WRITE_ENTER(&ifs
->ifs_ipf_global
);
258 fr_deinitialise(ifs
);
260 (void) frflush(IPL_LOGIPF
, 0, FR_INQUE
|FR_OUTQUE
|FR_INACTIVE
, ifs
);
261 (void) frflush(IPL_LOGIPF
, 0, FR_INQUE
|FR_OUTQUE
, ifs
);
263 if (ifs
->ifs_ipf_locks_done
== 1) {
264 MUTEX_DESTROY(&ifs
->ifs_ipf_timeoutlock
);
265 MUTEX_DESTROY(&ifs
->ifs_ipf_rw
);
266 RW_DESTROY(&ifs
->ifs_ipf_tokens
);
267 RW_DESTROY(&ifs
->ifs_ipf_ipidfrag
);
268 ifs
->ifs_ipf_locks_done
= 0;
271 if (ifs
->ifs_hook4_physical_in
|| ifs
->ifs_hook4_physical_out
||
272 ifs
->ifs_hook4_nic_events
|| ifs
->ifs_hook4_loopback_in
||
273 ifs
->ifs_hook4_loopback_out
|| ifs
->ifs_hook6_nic_events
||
274 ifs
->ifs_hook6_physical_in
|| ifs
->ifs_hook6_physical_out
||
275 ifs
->ifs_hook6_loopback_in
|| ifs
->ifs_hook6_loopback_out
)
281 WRITE_ENTER(&ifs
->ifs_ipf_global
);
291 netid_t id
= ifs
->ifs_netid
;
294 cmn_err(CE_CONT
, "iplattach()\n");
297 ASSERT(RW_WRITE_HELD(&ifs
->ifs_ipf_global
.ipf_lk
));
298 ifs
->ifs_fr_flags
= IPF_LOGGING
;
300 ifs
->ifs_fr_update_ipid
= 0;
302 ifs
->ifs_fr_update_ipid
= 1;
304 ifs
->ifs_fr_minttl
= 4;
305 ifs
->ifs_fr_icmpminfragmtu
= 68;
306 #if defined(IPFILTER_DEFAULT_BLOCK)
307 ifs
->ifs_fr_pass
= FR_BLOCK
|FR_NOMATCH
;
309 ifs
->ifs_fr_pass
= (IPF_DEFAULT_PASS
)|FR_NOMATCH
;
312 bzero((char *)ifs
->ifs_frcache
, sizeof(ifs
->ifs_frcache
));
313 MUTEX_INIT(&ifs
->ifs_ipf_rw
, "ipf rw mutex");
314 MUTEX_INIT(&ifs
->ifs_ipf_timeoutlock
, "ipf timeout lock mutex");
315 RWLOCK_INIT(&ifs
->ifs_ipf_ipidfrag
, "ipf IP NAT-Frag rwlock");
316 RWLOCK_INIT(&ifs
->ifs_ipf_tokens
, "ipf token rwlock");
317 ifs
->ifs_ipf_locks_done
= 1;
319 if (fr_initialise(ifs
) < 0)
323 * For incoming packets, we want the GZ-controlled hooks to run before
324 * the per-zone hooks, regardless of what order they're are installed.
325 * See the "GZ-controlled and per-zone stacks" comment block at the top
328 #define HOOK_INIT_GZ_BEFORE(x, fn, n, gzn, a) \
329 HOOK_INIT(x, fn, ifs->ifs_gz_controlled ? gzn : n, ifs); \
330 (x)->h_hint = ifs->ifs_gz_controlled ? HH_BEFORE : HH_AFTER; \
331 (x)->h_hintvalue = (uintptr_t) (ifs->ifs_gz_controlled ? n : gzn);
333 HOOK_INIT_GZ_BEFORE(ifs
->ifs_ipfhook4_nicevents
, ipf_nic_event_v4
,
334 hook4_nicevents
, hook4_nicevents_gz
, ifs
);
335 HOOK_INIT_GZ_BEFORE(ifs
->ifs_ipfhook4_in
, ipf_hook4_in
,
336 hook4_in
, hook4_in_gz
, ifs
);
337 HOOK_INIT_GZ_BEFORE(ifs
->ifs_ipfhook4_loop_in
, ipf_hook4_loop_in
,
338 hook4_loop_in
, hook4_loop_in_gz
, ifs
);
341 * For outgoing packets, we want the GZ-controlled hooks to run after
342 * the per-zone hooks, regardless of what order they're are installed.
343 * See the "GZ-controlled and per-zone stacks" comment block at the top
346 #define HOOK_INIT_GZ_AFTER(x, fn, n, gzn, a) \
347 HOOK_INIT(x, fn, ifs->ifs_gz_controlled ? gzn : n, ifs); \
348 (x)->h_hint = ifs->ifs_gz_controlled ? HH_AFTER : HH_BEFORE; \
349 (x)->h_hintvalue = (uintptr_t) (ifs->ifs_gz_controlled ? n : gzn);
351 HOOK_INIT_GZ_AFTER(ifs
->ifs_ipfhook4_out
, ipf_hook4_out
,
352 hook4_out
, hook4_out_gz
, ifs
);
353 HOOK_INIT_GZ_AFTER(ifs
->ifs_ipfhook4_loop_out
, ipf_hook4_loop_out
,
354 hook4_loop_out
, hook4_loop_out_gz
, ifs
);
357 * If we hold this lock over all of the net_hook_register calls, we
358 * can cause a deadlock to occur with the following lock ordering:
359 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs
360 * R(hook_family)->R(hei_lock)->R(ipf_global) (packet path)
362 RWLOCK_EXIT(&ifs
->ifs_ipf_global
);
367 ifs
->ifs_ipf_ipv4
= net_protocol_lookup(id
, NHF_INET
);
368 if (ifs
->ifs_ipf_ipv4
== NULL
)
371 ifs
->ifs_hook4_nic_events
= (net_hook_register(ifs
->ifs_ipf_ipv4
,
372 NH_NIC_EVENTS
, ifs
->ifs_ipfhook4_nicevents
) == 0);
373 if (!ifs
->ifs_hook4_nic_events
)
376 ifs
->ifs_hook4_physical_in
= (net_hook_register(ifs
->ifs_ipf_ipv4
,
377 NH_PHYSICAL_IN
, ifs
->ifs_ipfhook4_in
) == 0);
378 if (!ifs
->ifs_hook4_physical_in
)
381 ifs
->ifs_hook4_physical_out
= (net_hook_register(ifs
->ifs_ipf_ipv4
,
382 NH_PHYSICAL_OUT
, ifs
->ifs_ipfhook4_out
) == 0);
383 if (!ifs
->ifs_hook4_physical_out
)
386 if (ifs
->ifs_ipf_loopback
) {
387 ifs
->ifs_hook4_loopback_in
= (net_hook_register(
388 ifs
->ifs_ipf_ipv4
, NH_LOOPBACK_IN
,
389 ifs
->ifs_ipfhook4_loop_in
) == 0);
390 if (!ifs
->ifs_hook4_loopback_in
)
393 ifs
->ifs_hook4_loopback_out
= (net_hook_register(
394 ifs
->ifs_ipf_ipv4
, NH_LOOPBACK_OUT
,
395 ifs
->ifs_ipfhook4_loop_out
) == 0);
396 if (!ifs
->ifs_hook4_loopback_out
)
403 ifs
->ifs_ipf_ipv6
= net_protocol_lookup(id
, NHF_INET6
);
404 if (ifs
->ifs_ipf_ipv6
== NULL
)
407 HOOK_INIT_GZ_BEFORE(ifs
->ifs_ipfhook6_nicevents
, ipf_nic_event_v6
,
408 hook6_nicevents
, hook6_nicevents_gz
, ifs
);
409 HOOK_INIT_GZ_BEFORE(ifs
->ifs_ipfhook6_in
, ipf_hook6_in
,
410 hook6_in
, hook6_in_gz
, ifs
);
411 HOOK_INIT_GZ_BEFORE(ifs
->ifs_ipfhook6_loop_in
, ipf_hook6_loop_in
,
412 hook6_loop_in
, hook6_loop_in_gz
, ifs
);
413 HOOK_INIT_GZ_AFTER(ifs
->ifs_ipfhook6_out
, ipf_hook6_out
,
414 hook6_out
, hook6_out_gz
, ifs
);
415 HOOK_INIT_GZ_AFTER(ifs
->ifs_ipfhook6_loop_out
, ipf_hook6_loop_out
,
416 hook6_loop_out
, hook6_loop_out_gz
, ifs
);
418 ifs
->ifs_hook6_nic_events
= (net_hook_register(ifs
->ifs_ipf_ipv6
,
419 NH_NIC_EVENTS
, ifs
->ifs_ipfhook6_nicevents
) == 0);
420 if (!ifs
->ifs_hook6_nic_events
)
423 ifs
->ifs_hook6_physical_in
= (net_hook_register(ifs
->ifs_ipf_ipv6
,
424 NH_PHYSICAL_IN
, ifs
->ifs_ipfhook6_in
) == 0);
425 if (!ifs
->ifs_hook6_physical_in
)
428 ifs
->ifs_hook6_physical_out
= (net_hook_register(ifs
->ifs_ipf_ipv6
,
429 NH_PHYSICAL_OUT
, ifs
->ifs_ipfhook6_out
) == 0);
430 if (!ifs
->ifs_hook6_physical_out
)
433 if (ifs
->ifs_ipf_loopback
) {
434 ifs
->ifs_hook6_loopback_in
= (net_hook_register(
435 ifs
->ifs_ipf_ipv6
, NH_LOOPBACK_IN
,
436 ifs
->ifs_ipfhook6_loop_in
) == 0);
437 if (!ifs
->ifs_hook6_loopback_in
)
440 ifs
->ifs_hook6_loopback_out
= (net_hook_register(
441 ifs
->ifs_ipf_ipv6
, NH_LOOPBACK_OUT
,
442 ifs
->ifs_ipfhook6_loop_out
) == 0);
443 if (!ifs
->ifs_hook6_loopback_out
)
448 * Reacquire ipf_global, now it is safe.
450 WRITE_ENTER(&ifs
->ifs_ipf_global
);
452 /* Do not use private interface ip_params_arr[] in Solaris 10 */
456 ip_forwarding
= &ip_g_forward
;
459 * XXX - There is no terminator for this array, so it is not possible
460 * to tell if what we are looking for is missing and go off the end
466 if (!strcmp(ip_param_arr
[i
].ip_param_name
, "ip_def_ttl")) {
467 ip_ttl_ptr
= &ip_param_arr
[i
].ip_param_value
;
468 } else if (!strcmp(ip_param_arr
[i
].ip_param_name
,
469 "ip_path_mtu_discovery")) {
470 ip_mtudisc
= &ip_param_arr
[i
].ip_param_value
;
473 else if (!strcmp(ip_param_arr
[i
].ip_param_name
,
475 ip_forwarding
= &ip_param_arr
[i
].ip_param_value
;
478 else if (!strcmp(ip_param_arr
[i
].ip_param_name
,
480 ip6_forwarding
= &ip_param_arr
[i
].ip_param_value
;
484 if (ip_mtudisc
!= NULL
&& ip_ttl_ptr
!= NULL
&&
486 ip6_forwarding
!= NULL
&&
488 ip_forwarding
!= NULL
)
493 if (ifs
->ifs_fr_control_forwarding
& 1) {
494 if (ip_forwarding
!= NULL
)
497 if (ip6_forwarding
!= NULL
)
506 WRITE_ENTER(&ifs
->ifs_ipf_global
);
510 static int fr_setipfloopback(set
, ifs
)
514 if (ifs
->ifs_ipf_ipv4
== NULL
|| ifs
->ifs_ipf_ipv6
== NULL
)
517 if (set
&& !ifs
->ifs_ipf_loopback
) {
518 ifs
->ifs_ipf_loopback
= 1;
520 ifs
->ifs_hook4_loopback_in
= (net_hook_register(
521 ifs
->ifs_ipf_ipv4
, NH_LOOPBACK_IN
,
522 ifs
->ifs_ipfhook4_loop_in
) == 0);
523 if (!ifs
->ifs_hook4_loopback_in
)
526 ifs
->ifs_hook4_loopback_out
= (net_hook_register(
527 ifs
->ifs_ipf_ipv4
, NH_LOOPBACK_OUT
,
528 ifs
->ifs_ipfhook4_loop_out
) == 0);
529 if (!ifs
->ifs_hook4_loopback_out
)
532 ifs
->ifs_hook6_loopback_in
= (net_hook_register(
533 ifs
->ifs_ipf_ipv6
, NH_LOOPBACK_IN
,
534 ifs
->ifs_ipfhook6_loop_in
) == 0);
535 if (!ifs
->ifs_hook6_loopback_in
)
538 ifs
->ifs_hook6_loopback_out
= (net_hook_register(
539 ifs
->ifs_ipf_ipv6
, NH_LOOPBACK_OUT
,
540 ifs
->ifs_ipfhook6_loop_out
) == 0);
541 if (!ifs
->ifs_hook6_loopback_out
)
544 } else if (!set
&& ifs
->ifs_ipf_loopback
) {
545 ifs
->ifs_ipf_loopback
= 0;
547 ifs
->ifs_hook4_loopback_in
=
548 (net_hook_unregister(ifs
->ifs_ipf_ipv4
,
549 NH_LOOPBACK_IN
, ifs
->ifs_ipfhook4_loop_in
) != 0);
550 if (ifs
->ifs_hook4_loopback_in
)
553 ifs
->ifs_hook4_loopback_out
=
554 (net_hook_unregister(ifs
->ifs_ipf_ipv4
,
555 NH_LOOPBACK_OUT
, ifs
->ifs_ipfhook4_loop_out
) != 0);
556 if (ifs
->ifs_hook4_loopback_out
)
559 ifs
->ifs_hook6_loopback_in
=
560 (net_hook_unregister(ifs
->ifs_ipf_ipv6
,
561 NH_LOOPBACK_IN
, ifs
->ifs_ipfhook4_loop_in
) != 0);
562 if (ifs
->ifs_hook6_loopback_in
)
565 ifs
->ifs_hook6_loopback_out
=
566 (net_hook_unregister(ifs
->ifs_ipf_ipv6
,
567 NH_LOOPBACK_OUT
, ifs
->ifs_ipfhook6_loop_out
) != 0);
568 if (ifs
->ifs_hook6_loopback_out
)
576 * Filter ioctl interface.
579 int iplioctl(dev
, cmd
, data
, mode
, cp
, rp
)
600 cmn_err(CE_CONT
, "iplioctl(%x,%x,%x,%d,%x,%d)\n",
601 dev
, cmd
, data
, mode
, cp
, rp
);
603 unit
= getminor(dev
);
605 isp
= ddi_get_soft_state(ipf_state
, unit
);
608 unit
= isp
->ipfs_minor
;
610 zid
= crgetzoneid(cp
);
611 if (cmd
== SIOCIPFZONESET
) {
612 if (zid
== GLOBAL_ZONEID
)
613 return fr_setzoneid(isp
, (caddr_t
) data
);
618 * ipf_find_stack returns with a read lock on ifs_ipf_global
620 ifs
= ipf_find_stack(zid
, isp
);
624 if (ifs
->ifs_fr_running
<= 0) {
625 if (unit
!= IPL_LOGIPF
) {
626 RWLOCK_EXIT(&ifs
->ifs_ipf_global
);
629 if (cmd
!= SIOCIPFGETNEXT
&& cmd
!= SIOCIPFGET
&&
630 cmd
!= SIOCIPFSET
&& cmd
!= SIOCFRENB
&&
631 cmd
!= SIOCGETFS
&& cmd
!= SIOCGETFF
) {
632 RWLOCK_EXIT(&ifs
->ifs_ipf_global
);
637 if (ifs
->ifs_fr_enable_active
!= 0) {
638 RWLOCK_EXIT(&ifs
->ifs_ipf_global
);
642 error
= fr_ioctlswitch(unit
, (caddr_t
)data
, cmd
, mode
, crgetuid(cp
),
645 RWLOCK_EXIT(&ifs
->ifs_ipf_global
);
653 if (!(mode
& FWRITE
))
656 error
= COPYIN((caddr_t
)data
, (caddr_t
)&enable
,
663 RWLOCK_EXIT(&ifs
->ifs_ipf_global
);
664 WRITE_ENTER(&ifs
->ifs_ipf_global
);
667 * We must recheck fr_enable_active here, since we've
668 * dropped ifs_ipf_global from R in order to get it
671 if (ifs
->ifs_fr_enable_active
== 0) {
672 ifs
->ifs_fr_enable_active
= 1;
673 error
= fr_enableipf(ifs
, enable
);
674 ifs
->ifs_fr_enable_active
= 0;
679 if (!(mode
& FWRITE
)) {
684 case SIOCIPFGETNEXT
:
686 error
= fr_ipftune(cmd
, (void *)data
, ifs
);
689 if (!(mode
& FWRITE
))
692 error
= COPYIN((caddr_t
)data
,
693 (caddr_t
)&ifs
->ifs_fr_flags
,
694 sizeof(ifs
->ifs_fr_flags
));
700 error
= COPYIN((caddr_t
)data
, (caddr_t
)&tmp
,
705 error
= fr_setipfloopback(tmp
, ifs
);
708 error
= COPYOUT((caddr_t
)&ifs
->ifs_fr_flags
, (caddr_t
)data
,
709 sizeof(ifs
->ifs_fr_flags
));
714 error
= fr_resolvefunc((void *)data
);
720 if (!(mode
& FWRITE
))
723 error
= frrequest(unit
, cmd
, (caddr_t
)data
,
724 ifs
->ifs_fr_active
, 1, ifs
);
729 if (!(mode
& FWRITE
))
732 error
= frrequest(unit
, cmd
, (caddr_t
)data
,
733 1 - ifs
->ifs_fr_active
, 1, ifs
);
736 if (!(mode
& FWRITE
))
739 WRITE_ENTER(&ifs
->ifs_ipf_mutex
);
740 bzero((char *)ifs
->ifs_frcache
,
741 sizeof (ifs
->ifs_frcache
));
742 error
= COPYOUT((caddr_t
)&ifs
->ifs_fr_active
,
744 sizeof(ifs
->ifs_fr_active
));
748 ifs
->ifs_fr_active
= 1 - ifs
->ifs_fr_active
;
749 RWLOCK_EXIT(&ifs
->ifs_ipf_mutex
);
753 fr_getstat(&fio
, ifs
);
754 error
= fr_outobj((void *)data
, &fio
, IPFOBJ_IPFSTAT
);
757 if (!(mode
& FWRITE
))
760 error
= fr_zerostats((caddr_t
)data
, ifs
);
763 if (!(mode
& FWRITE
))
766 error
= COPYIN((caddr_t
)data
, (caddr_t
)&tmp
,
769 tmp
= frflush(unit
, 4, tmp
, ifs
);
770 error
= COPYOUT((caddr_t
)&tmp
, (caddr_t
)data
,
780 if (!(mode
& FWRITE
))
783 error
= COPYIN((caddr_t
)data
, (caddr_t
)&tmp
,
786 tmp
= frflush(unit
, 6, tmp
, ifs
);
787 error
= COPYOUT((caddr_t
)&tmp
, (caddr_t
)data
,
797 error
= COPYIN((caddr_t
)data
, (caddr_t
)&tmp
, sizeof(tmp
));
799 ifs
->ifs_fr_state_lock
= tmp
;
800 ifs
->ifs_fr_nat_lock
= tmp
;
801 ifs
->ifs_fr_frag_lock
= tmp
;
802 ifs
->ifs_fr_auth_lock
= tmp
;
808 if (!(mode
& FWRITE
))
811 tmp
= ipflog_clear(unit
, ifs
);
812 error
= COPYOUT((caddr_t
)&tmp
, (caddr_t
)data
,
818 #endif /* IPFILTER_LOG */
820 if (!(mode
& FWRITE
))
823 RWLOCK_EXIT(&ifs
->ifs_ipf_global
);
824 WRITE_ENTER(&ifs
->ifs_ipf_global
);
826 frsync(IPFSYNC_RESYNC
, 0, NULL
, NULL
, ifs
);
827 fr_natifpsync(IPFSYNC_RESYNC
, 0, NULL
, NULL
, ifs
);
828 fr_nataddrsync(0, NULL
, NULL
, ifs
);
829 fr_statesync(IPFSYNC_RESYNC
, 0, NULL
, NULL
, ifs
);
834 error
= fr_outobj((void *)data
, fr_fragstats(ifs
),
839 tmp
= (int)ifs
->ifs_iplused
[IPL_LOGIPF
];
841 error
= COPYOUT((caddr_t
)&tmp
, (caddr_t
)data
, sizeof(tmp
));
847 error
= ipf_frruleiter((caddr_t
)data
, crgetuid(cp
),
852 error
= ipf_genericiter((caddr_t
)data
, crgetuid(cp
),
857 error
= BCOPYIN((caddr_t
)data
, (caddr_t
)&tmp
, sizeof(tmp
));
861 error
= ipf_deltoken(tmp
, crgetuid(cp
), curproc
, ifs
);
867 cmn_err(CE_NOTE
, "Unknown: cmd 0x%x data %p",
873 RWLOCK_EXIT(&ifs
->ifs_ipf_global
);
878 static int fr_enableipf(ifs
, enable
)
885 error
= ipldetach(ifs
);
887 ifs
->ifs_fr_running
= -1;
891 if (ifs
->ifs_fr_running
> 0)
894 error
= iplattach(ifs
);
896 if (ifs
->ifs_fr_timer_id
== NULL
) {
897 int hz
= drv_usectohz(500000);
899 ifs
->ifs_fr_timer_id
= timeout(fr_slowtimer
,
903 ifs
->ifs_fr_running
= 1;
905 (void) ipldetach(ifs
);
911 phy_if_t
get_unit(name
, v
, ifs
)
919 nif
= ifs
->ifs_ipf_ipv4
;
921 nif
= ifs
->ifs_ipf_ipv6
;
925 return (net_phylookup(nif
, name
));
929 * routines below for saving IP headers to buffer
932 int iplopen(devp
, flags
, otype
, cred
)
938 minor_t min
= getminor(*devp
);
942 cmn_err(CE_CONT
, "iplopen(%x,%x,%x,%x)\n", devp
, flags
, otype
, cred
);
944 if (!(otype
& OTYP_CHR
))
947 if (IPL_LOGMAX
< min
)
950 minor
= (minor_t
)(uintptr_t)vmem_alloc(ipf_minor
, 1,
951 VM_BESTFIT
| VM_SLEEP
);
953 if (ddi_soft_state_zalloc(ipf_state
, minor
) != 0) {
954 vmem_free(ipf_minor
, (void *)(uintptr_t)minor
, 1);
958 *devp
= makedevice(getmajor(*devp
), minor
);
959 isp
= ddi_get_soft_state(ipf_state
, minor
);
962 isp
->ipfs_minor
= min
;
963 isp
->ipfs_zoneid
= IPFS_ZONE_UNSET
;
970 int iplclose(dev
, flags
, otype
, cred
)
975 minor_t min
= getminor(dev
);
978 cmn_err(CE_CONT
, "iplclose(%x,%x,%x,%x)\n", dev
, flags
, otype
, cred
);
981 if (IPL_LOGMAX
< min
)
984 ddi_soft_state_free(ipf_state
, min
);
985 vmem_free(ipf_minor
, (void *)(uintptr_t)min
, 1);
993 * both of these must operate with at least splnet() lest they be
994 * called during packet processing and cause an inconsistancy to appear in
998 int iplread(dev
, uio
, cp
)
1000 register struct uio
*uio
;
1006 ipf_devstate_t
*isp
;
1008 unit
= getminor(dev
);
1009 isp
= ddi_get_soft_state(ipf_state
, unit
);
1012 unit
= isp
->ipfs_minor
;
1016 * ipf_find_stack returns with a read lock on ifs_ipf_global
1018 ifs
= ipf_find_stack(crgetzoneid(cp
), isp
);
1023 cmn_err(CE_CONT
, "iplread(%x,%x,%x)\n", dev
, uio
, cp
);
1026 if (ifs
->ifs_fr_running
< 1) {
1027 RWLOCK_EXIT(&ifs
->ifs_ipf_global
);
1031 # ifdef IPFILTER_SYNC
1032 if (unit
== IPL_LOGSYNC
) {
1033 RWLOCK_EXIT(&ifs
->ifs_ipf_global
);
1034 return ipfsync_read(uio
);
1038 ret
= ipflog_read(unit
, uio
, ifs
);
1039 RWLOCK_EXIT(&ifs
->ifs_ipf_global
);
1042 #endif /* IPFILTER_LOG */
1047 * both of these must operate with at least splnet() lest they be
1048 * called during packet processing and cause an inconsistancy to appear in
1051 int iplwrite(dev
, uio
, cp
)
1053 register struct uio
*uio
;
1058 ipf_devstate_t
*isp
;
1060 unit
= getminor(dev
);
1061 isp
= ddi_get_soft_state(ipf_state
, unit
);
1064 unit
= isp
->ipfs_minor
;
1067 * ipf_find_stack returns with a read lock on ifs_ipf_global
1069 ifs
= ipf_find_stack(crgetzoneid(cp
), isp
);
1074 cmn_err(CE_CONT
, "iplwrite(%x,%x,%x)\n", dev
, uio
, cp
);
1077 if (ifs
->ifs_fr_running
< 1) {
1078 RWLOCK_EXIT(&ifs
->ifs_ipf_global
);
1082 #ifdef IPFILTER_SYNC
1083 if (getminor(dev
) == IPL_LOGSYNC
) {
1084 RWLOCK_EXIT(&ifs
->ifs_ipf_global
);
1085 return ipfsync_write(uio
);
1087 #endif /* IPFILTER_SYNC */
1088 dev
= dev
; /* LINT */
1089 uio
= uio
; /* LINT */
1091 RWLOCK_EXIT(&ifs
->ifs_ipf_global
);
1097 * fr_send_reset - this could conceivably be a call to tcp_respond(), but that
1098 * requires a large amount of setting up and isn't any more efficient.
1100 int fr_send_reset(fin
)
1103 tcphdr_t
*tcp
, *tcp2
;
1112 if (tcp
->th_flags
& TH_RST
)
1115 #ifndef IPFILTER_CKSUM
1116 if (fr_checkl4sum(fin
) == -1)
1120 tlen
= (tcp
->th_flags
& (TH_SYN
|TH_FIN
)) ? 1 : 0;
1122 if (fin
->fin_v
== 6)
1123 hlen
= sizeof(ip6_t
);
1126 hlen
= sizeof(ip_t
);
1127 hlen
+= sizeof(*tcp2
);
1128 if ((m
= (mblk_t
*)allocb(hlen
+ 64, BPRI_HI
)) == NULL
)
1133 m
->b_wptr
= m
->b_rptr
+ hlen
;
1134 ip
= (ip_t
*)m
->b_rptr
;
1135 bzero((char *)ip
, hlen
);
1136 tcp2
= (struct tcphdr
*)(m
->b_rptr
+ hlen
- sizeof(*tcp2
));
1137 tcp2
->th_dport
= tcp
->th_sport
;
1138 tcp2
->th_sport
= tcp
->th_dport
;
1139 if (tcp
->th_flags
& TH_ACK
) {
1140 tcp2
->th_seq
= tcp
->th_ack
;
1141 tcp2
->th_flags
= TH_RST
;
1143 tcp2
->th_ack
= ntohl(tcp
->th_seq
);
1144 tcp2
->th_ack
+= tlen
;
1145 tcp2
->th_ack
= htonl(tcp2
->th_ack
);
1146 tcp2
->th_flags
= TH_RST
|TH_ACK
;
1148 tcp2
->th_off
= sizeof(struct tcphdr
) >> 2;
1150 ip
->ip_v
= fin
->fin_v
;
1152 if (fin
->fin_v
== 6) {
1153 ip6
= (ip6_t
*)m
->b_rptr
;
1154 ip6
->ip6_flow
= ((ip6_t
*)fin
->fin_ip
)->ip6_flow
;
1155 ip6
->ip6_src
= fin
->fin_dst6
.in6
;
1156 ip6
->ip6_dst
= fin
->fin_src6
.in6
;
1157 ip6
->ip6_plen
= htons(sizeof(*tcp
));
1158 ip6
->ip6_nxt
= IPPROTO_TCP
;
1159 tcp2
->th_sum
= fr_cksum(m
, (ip_t
*)ip6
, IPPROTO_TCP
, tcp2
);
1163 ip
->ip_src
.s_addr
= fin
->fin_daddr
;
1164 ip
->ip_dst
.s_addr
= fin
->fin_saddr
;
1165 ip
->ip_id
= fr_nextipid(fin
);
1166 ip
->ip_hl
= sizeof(*ip
) >> 2;
1167 ip
->ip_p
= IPPROTO_TCP
;
1168 ip
->ip_len
= sizeof(*ip
) + sizeof(*tcp
);
1169 ip
->ip_tos
= fin
->fin_ip
->ip_tos
;
1170 tcp2
->th_sum
= fr_cksum(m
, ip
, IPPROTO_TCP
, tcp2
);
1172 return fr_send_ip(fin
, m
, &m
);
1176 * Function: fr_send_ip
1177 * Returns: 0: success
1180 * fin: packet information
1181 * m: the message block where ip head starts
1183 * Send a new packet through the IP stack.
1185 * For IPv4 packets, ip_len must be in host byte order, and ip_v,
1186 * ip_ttl, ip_off, and ip_sum are ignored (filled in by this
1189 * For IPv6 packets, ip6_flow, ip6_vfc, and ip6_hlim are filled
1190 * in by this function.
1192 * All other portions of the packet must be in on-the-wire format.
1195 static int fr_send_ip(fin
, m
, mpp
)
1199 qpktinfo_t qpi
, *qpip
;
1203 ipf_stack_t
*ifs
= fin
->fin_ifs
;
1205 ip
= (ip_t
*)m
->b_rptr
;
1206 bzero((char *)&fnew
, sizeof(fnew
));
1209 if (fin
->fin_v
== 6) {
1213 ip6
->ip6_vfc
= 0x60;
1214 ip6
->ip6_hlim
= 127;
1216 hlen
= sizeof(*ip6
);
1217 fnew
.fin_plen
= ntohs(ip6
->ip6_plen
) + hlen
;
1224 if (net_getpmtuenabled(ifs
->ifs_ipf_ipv4
) == 1)
1225 ip
->ip_off
= htons(IP_DF
);
1227 if (ip_ttl_ptr
!= NULL
)
1228 ip
->ip_ttl
= (u_char
)(*ip_ttl_ptr
);
1231 if (ip_mtudisc
!= NULL
)
1232 ip
->ip_off
= htons(*ip_mtudisc
? IP_DF
: 0);
1234 ip
->ip_off
= htons(IP_DF
);
1237 * The dance with byte order and ip_len/ip_off is because in
1238 * fr_fastroute, it expects them to be in host byte order but
1239 * ipf_cksum expects them to be in network byte order.
1241 ip
->ip_len
= htons(ip
->ip_len
);
1242 ip
->ip_sum
= ipf_cksum((u_short
*)ip
, sizeof(*ip
));
1243 ip
->ip_len
= ntohs(ip
->ip_len
);
1244 ip
->ip_off
= ntohs(ip
->ip_off
);
1246 fnew
.fin_plen
= ip
->ip_len
;
1249 qpip
= fin
->fin_qpi
;
1251 qpi
.qpi_ill
= qpip
->qpi_ill
;
1254 fnew
.fin_qpi
= &qpi
;
1255 fnew
.fin_ifp
= fin
->fin_ifp
;
1256 fnew
.fin_flx
= FI_NOCKSUM
;
1261 fnew
.fin_hlen
= hlen
;
1262 fnew
.fin_dp
= (char *)ip
+ hlen
;
1263 fnew
.fin_ifs
= fin
->fin_ifs
;
1264 (void) fr_makefrip(hlen
, ip
, &fnew
);
1266 i
= fr_fastroute(m
, mpp
, &fnew
, NULL
);
1271 int fr_send_icmp_err(type
, fin
, dst
)
1276 struct in_addr dst4
;
1290 ipf_stack_t
*ifs
= fin
->fin_ifs
;
1292 if ((type
< 0) || (type
> ICMP_MAXTYPE
))
1295 code
= fin
->fin_icode
;
1297 if ((code
< 0) || (code
>= ICMP_MAX_UNREACH
))
1301 #ifndef IPFILTER_CKSUM
1302 if (fr_checkl4sum(fin
) == -1)
1311 if (fin
->fin_v
== 6) {
1313 sz
+= MIN(mb
->b_wptr
- mb
->b_rptr
, 512);
1314 hlen
= sizeof(ip6_t
);
1315 type
= icmptoicmp6types
[type
];
1316 if (type
== ICMP6_DST_UNREACH
)
1317 code
= icmptoicmp6unreach
[code
];
1321 if ((fin
->fin_p
== IPPROTO_ICMP
) &&
1322 !(fin
->fin_flx
& FI_SHORT
))
1323 switch (ntohs(fin
->fin_data
[0]) >> 8)
1334 sz
= sizeof(ip_t
) * 2;
1335 sz
+= 8; /* 64 bits of data */
1336 hlen
= sizeof(ip_t
);
1339 sz
+= offsetof(struct icmp
, icmp_ip
);
1340 if ((m
= (mblk_t
*)allocb((size_t)sz
+ 64, BPRI_HI
)) == NULL
)
1344 m
->b_wptr
= m
->b_rptr
+ sz
;
1345 bzero((char *)m
->b_rptr
, (size_t)sz
);
1346 ip
= (ip_t
*)m
->b_rptr
;
1347 ip
->ip_v
= fin
->fin_v
;
1348 icmp
= (struct icmp
*)(m
->b_rptr
+ hlen
);
1349 icmp
->icmp_type
= type
& 0xff;
1350 icmp
->icmp_code
= code
& 0xff;
1351 phy
= (phy_if_t
)qpi
->qpi_ill
;
1352 if (type
== ICMP_UNREACH
&& (phy
!= 0) &&
1353 fin
->fin_icode
== ICMP_UNREACH_NEEDFRAG
)
1354 icmp
->icmp_nextmtu
= net_getmtu(ifs
->ifs_ipf_ipv4
, phy
,0 );
1357 if (fin
->fin_v
== 6) {
1358 struct in6_addr dst6
;
1362 ipf_stack_t
*ifs
= fin
->fin_ifs
;
1364 if (fr_ifpaddr(6, FRI_NORMAL
, (void *)phy
,
1365 (void *)&dst6
, NULL
, ifs
) == -1) {
1370 dst6
= fin
->fin_dst6
.in6
;
1373 sz
-= sizeof(ip6_t
);
1374 ip6
= (ip6_t
*)m
->b_rptr
;
1375 ip6
->ip6_flow
= ((ip6_t
*)fin
->fin_ip
)->ip6_flow
;
1376 ip6
->ip6_plen
= htons((u_short
)sz
);
1377 ip6
->ip6_nxt
= IPPROTO_ICMPV6
;
1378 ip6
->ip6_src
= dst6
;
1379 ip6
->ip6_dst
= fin
->fin_src6
.in6
;
1380 sz
-= offsetof(struct icmp
, icmp_ip
);
1381 bcopy((char *)mb
->b_rptr
, (char *)&icmp
->icmp_ip
, sz
);
1382 icmp
->icmp_cksum
= csz
- sizeof(ip6_t
);
1386 ip
->ip_hl
= sizeof(*ip
) >> 2;
1387 ip
->ip_p
= IPPROTO_ICMP
;
1388 ip
->ip_id
= fin
->fin_ip
->ip_id
;
1389 ip
->ip_tos
= fin
->fin_ip
->ip_tos
;
1390 ip
->ip_len
= (u_short
)sz
;
1392 ipf_stack_t
*ifs
= fin
->fin_ifs
;
1394 if (fr_ifpaddr(4, FRI_NORMAL
, (void *)phy
,
1395 (void *)&dst4
, NULL
, ifs
) == -1) {
1400 dst4
= fin
->fin_dst
;
1403 ip
->ip_dst
= fin
->fin_src
;
1404 bcopy((char *)fin
->fin_ip
, (char *)&icmp
->icmp_ip
,
1405 sizeof(*fin
->fin_ip
));
1406 bcopy((char *)fin
->fin_ip
+ fin
->fin_hlen
,
1407 (char *)&icmp
->icmp_ip
+ sizeof(*fin
->fin_ip
), 8);
1408 icmp
->icmp_ip
.ip_len
= htons(icmp
->icmp_ip
.ip_len
);
1409 icmp
->icmp_ip
.ip_off
= htons(icmp
->icmp_ip
.ip_off
);
1410 icmp
->icmp_cksum
= ipf_cksum((u_short
*)icmp
,
1415 * Need to exit out of these so we don't recursively call rw_enter
1418 return fr_send_ip(fin
, m
, &m
);
1421 #include <sys/time.h>
1422 #include <sys/varargs.h>
1429 * Return the first IP Address associated with an interface
1430 * For IPv6, we walk through the list of logical interfaces and return
1431 * the address of the first one that isn't a link-local interface.
1432 * We can't assume that it is :1 because another link-local address
1433 * may have been assigned there.
1436 int fr_ifpaddr(v
, atype
, ifptr
, inp
, inpmask
, ifs
)
1439 struct in_addr
*inp
, *inpmask
;
1442 struct sockaddr_in6 v6addr
[2];
1443 struct sockaddr_in v4addr
[2];
1444 net_ifaddr_t type
[2];
1445 net_handle_t net_data
;
1452 net_data
= ifs
->ifs_ipf_ipv4
;
1456 net_data
= ifs
->ifs_ipf_ipv6
;
1464 if (net_data
== NULL
)
1467 phyif
= (phy_if_t
)ifptr
;
1475 case FRI_BROADCAST
:
1476 type
[0] = NA_BROADCAST
;
1480 type
[0] = NA_ADDRESS
;
1484 type
[1] = NA_NETMASK
;
1490 idx
= net_lifgetnext(net_data
, phyif
, idx
);
1491 if (net_getlifaddr(net_data
, phyif
, idx
, 2, type
,
1494 if (!IN6_IS_ADDR_LINKLOCAL(&v6addr
[0].sin6_addr
) &&
1495 !IN6_IS_ADDR_MULTICAST(&v6addr
[0].sin6_addr
))
1502 return fr_ifpfillv6addr(atype
, &v6addr
[0], &v6addr
[1],
1506 if (net_getlifaddr(net_data
, phyif
, 0, 2, type
, array
) < 0)
1509 return fr_ifpfillv4addr(atype
, &v4addr
[0], &v4addr
[1], inp
, inpmask
);
1513 u_32_t
fr_newisn(fin
)
1516 static int iss_seq_off
= 0;
1520 ipf_stack_t
*ifs
= fin
->fin_ifs
;
1523 * Compute the base value of the ISS. It is a hash
1524 * of (saddr, sport, daddr, dport, secret).
1528 MD5Update(&ctx
, (u_char
*) &fin
->fin_fi
.fi_src
,
1529 sizeof(fin
->fin_fi
.fi_src
));
1530 MD5Update(&ctx
, (u_char
*) &fin
->fin_fi
.fi_dst
,
1531 sizeof(fin
->fin_fi
.fi_dst
));
1532 MD5Update(&ctx
, (u_char
*) &fin
->fin_dat
, sizeof(fin
->fin_dat
));
1534 MD5Update(&ctx
, ifs
->ifs_ipf_iss_secret
, sizeof(ifs
->ifs_ipf_iss_secret
));
1536 MD5Final(hash
, &ctx
);
1538 bcopy(hash
, &newiss
, sizeof(newiss
));
1541 * Now increment our "timer", and add it in to
1542 * the computed value.
1545 * XXX TCP_ISSINCR too large to use?
1547 iss_seq_off
+= 0x00010000;
1548 newiss
+= iss_seq_off
;
1553 /* ------------------------------------------------------------------------ */
1554 /* Function: fr_nextipid */
1555 /* Returns: int - 0 == success, -1 == error (packet should be droppped) */
1556 /* Parameters: fin(I) - pointer to packet information */
1558 /* Returns the next IPv4 ID to use for this packet. */
1559 /* ------------------------------------------------------------------------ */
1560 u_short
fr_nextipid(fin
)
1563 static u_short ipid
= 0;
1565 ipf_stack_t
*ifs
= fin
->fin_ifs
;
1567 MUTEX_ENTER(&ifs
->ifs_ipf_rw
);
1568 if (fin
->fin_pktnum
!= 0) {
1569 id
= fin
->fin_pktnum
& 0xffff;
1573 MUTEX_EXIT(&ifs
->ifs_ipf_rw
);
1579 #ifndef IPFILTER_CKSUM
1582 INLINE
void fr_checkv4sum(fin
)
1585 #ifdef IPFILTER_CKSUM
1586 if (fr_checkl4sum(fin
) == -1)
1587 fin
->fin_flx
|= FI_BAD
;
1593 # ifndef IPFILTER_CKSUM
1596 INLINE
void fr_checkv6sum(fin
)
1599 # ifdef IPFILTER_CKSUM
1600 if (fr_checkl4sum(fin
) == -1)
1601 fin
->fin_flx
|= FI_BAD
;
1604 #endif /* USE_INET6 */
1611 void fr_slowtimer
__P((void *arg
))
1614 ipf_stack_t
*ifs
= arg
;
1616 READ_ENTER(&ifs
->ifs_ipf_global
);
1617 if (ifs
->ifs_fr_running
!= 1) {
1618 ifs
->ifs_fr_timer_id
= NULL
;
1619 RWLOCK_EXIT(&ifs
->ifs_ipf_global
);
1622 ipf_expiretokens(ifs
);
1624 fr_timeoutstate(ifs
);
1627 ifs
->ifs_fr_ticks
++;
1628 if (ifs
->ifs_fr_running
== 1)
1629 ifs
->ifs_fr_timer_id
= timeout(fr_slowtimer
, arg
,
1630 drv_usectohz(500000));
1632 ifs
->ifs_fr_timer_id
= NULL
;
1633 RWLOCK_EXIT(&ifs
->ifs_ipf_global
);
1637 /* ------------------------------------------------------------------------ */
1638 /* Function: fr_pullup */
1639 /* Returns: NULL == pullup failed, else pointer to protocol header */
1640 /* Parameters: m(I) - pointer to buffer where data packet starts */
1641 /* fin(I) - pointer to packet information */
1642 /* len(I) - number of bytes to pullup */
1644 /* Attempt to move at least len bytes (from the start of the buffer) into a */
1645 /* single buffer for ease of access. Operating system native functions are */
1646 /* used to manage buffers - if necessary. If the entire packet ends up in */
1647 /* a single buffer, set the FI_COALESCE flag even though fr_coalesce() has */
1648 /* not been called. Both fin_ip and fin_dp are updated before exiting _IF_ */
1649 /* and ONLY if the pullup succeeds. */
1651 /* We assume that 'min' is a pointer to a buffer that is part of the chain */
1652 /* of buffers that starts at *fin->fin_mp. */
1653 /* ------------------------------------------------------------------------ */
1654 void *fr_pullup(min
, fin
, len
)
1659 qpktinfo_t
*qpi
= fin
->fin_qpi
;
1660 int out
= fin
->fin_out
, dpoff
, ipoff
;
1661 mb_t
*m
= min
, *m1
, *m2
;
1663 uint32_t start
, stuff
, end
, value
, flags
;
1664 ipf_stack_t
*ifs
= fin
->fin_ifs
;
1669 ip
= (char *)fin
->fin_ip
;
1670 if ((fin
->fin_flx
& FI_COALESCE
) != 0)
1673 ipoff
= fin
->fin_ipoff
;
1674 if (fin
->fin_dp
!= NULL
)
1675 dpoff
= (char *)fin
->fin_dp
- (char *)ip
;
1679 if (M_LEN(m
) < len
+ ipoff
) {
1682 * pfil_precheck ensures the IP header is on a 32bit
1683 * aligned address so simply fail if that isn't currently
1684 * the case (should never happen).
1689 if ((ipoff
& 3) != 0) {
1690 inc
= 4 - (ipoff
& 3);
1691 if (m
->b_rptr
- inc
>= m
->b_datap
->db_base
)
1699 * XXX This is here as a work around for a bug with DEBUG
1700 * XXX Solaris kernels. The problem is b_prev is used by IP
1701 * XXX code as a way to stash the phyint_index for a packet,
1702 * XXX this doesn't get reset by IP but freeb does an ASSERT()
1703 * XXX for both of these to be NULL. See 6442390.
1715 * Need to preserve checksum information by copying them
1716 * to newmp which heads the pulluped message.
1718 hcksum_retrieve(m
, NULL
, NULL
, &start
, &stuff
, &end
,
1721 if (pullupmsg(m
, len
+ ipoff
+ inc
) == 0) {
1722 ATOMIC_INCL(ifs
->ifs_frstats
[out
].fr_pull
[1]);
1723 FREE_MB_T(*fin
->fin_mp
);
1724 *fin
->fin_mp
= NULL
;
1728 qpi
->qpi_data
= NULL
;
1732 (void) hcksum_assoc(m
, NULL
, NULL
, start
, stuff
, end
,
1738 ip
= MTOD(m
, char *) + ipoff
;
1742 ATOMIC_INCL(ifs
->ifs_frstats
[out
].fr_pull
[0]);
1743 fin
->fin_ip
= (ip_t
*)ip
;
1744 if (fin
->fin_dp
!= NULL
)
1745 fin
->fin_dp
= (char *)fin
->fin_ip
+ dpoff
;
1747 if (len
== fin
->fin_plen
)
1748 fin
->fin_flx
|= FI_COALESCE
;
1754 * Function: fr_verifysrc
1755 * Returns: int (really boolean)
1756 * Parameters: fin - packet information
1758 * Check whether the packet has a valid source address for the interface on
1759 * which the packet arrived, implementing the "fr_chksrc" feature.
1760 * Returns true iff the packet's source address is valid.
1762 int fr_verifysrc(fin
)
1765 net_handle_t net_data_p
;
1766 phy_if_t phy_ifdata_routeto
;
1767 struct sockaddr sin
;
1768 ipf_stack_t
*ifs
= fin
->fin_ifs
;
1770 if (fin
->fin_v
== 4) {
1771 net_data_p
= ifs
->ifs_ipf_ipv4
;
1772 } else if (fin
->fin_v
== 6) {
1773 net_data_p
= ifs
->ifs_ipf_ipv6
;
1778 /* Get the index corresponding to the if name */
1779 sin
.sa_family
= (fin
->fin_v
== 4) ? AF_INET
: AF_INET6
;
1780 bcopy(&fin
->fin_saddr
, &sin
.sa_data
, sizeof (struct in_addr
));
1781 phy_ifdata_routeto
= net_routeto(net_data_p
, &sin
, NULL
);
1783 return (((phy_if_t
)fin
->fin_ifp
== phy_ifdata_routeto
) ? 1 : 0);
1787 * Return true only if forwarding is enabled on the interface.
1790 fr_forwarding_enabled(phy_if_t phyif
, net_handle_t ndp
)
1794 for (lif
= net_lifgetnext(ndp
, phyif
, 0); lif
> 0;
1795 lif
= net_lifgetnext(ndp
, phyif
, lif
)) {
1799 res
= net_getlifflags(ndp
, phyif
, lif
, &flags
);
1802 if (flags
& IFF_ROUTER
)
1810 * Function: fr_fastroute
1811 * Returns: 0: success;
1814 * mb: the message block where ip head starts
1815 * mpp: the pointer to the pointer of the orignal
1817 * fin: packet information
1818 * fdp: destination interface information
1819 * if it is NULL, no interface information provided.
1821 * This function is for fastroute/to/dup-to rules. It calls
1822 * pfil_make_lay2_packet to search route, make lay-2 header
1823 * ,and identify output queue for the IP packet.
1824 * The destination address depends on the following conditions:
1825 * 1: for fastroute rule, fdp is passed in as NULL, so the
1826 * destination address is the IP Packet's destination address
1827 * 2: for to/dup-to rule, if an ip address is specified after
1828 * the interface name, this address is the as destination
1829 * address. Otherwise IP Packet's destination address is used
1831 int fr_fastroute(mb
, mpp
, fin
, fdp
)
1836 net_handle_t net_data_p
;
1839 frentry_t
*fr
= fin
->fin_fr
;
1843 struct sockaddr_in
*sin
;
1844 struct sockaddr_in6
*sin6
;
1845 struct sockaddr
*sinp
;
1846 ipf_stack_t
*ifs
= fin
->fin_ifs
;
1848 u_short __iplen
, __ipoff
;
1851 if (fin
->fin_v
== 4) {
1852 net_data_p
= ifs
->ifs_ipf_ipv4
;
1853 } else if (fin
->fin_v
== 6) {
1854 net_data_p
= ifs
->ifs_ipf_ipv6
;
1859 /* Check the src here, fin_ifp is the src interface. */
1860 if (!fr_forwarding_enabled((phy_if_t
)fin
->fin_ifp
, net_data_p
))
1863 inj
= net_inject_alloc(NETINFO_VERSION
);
1871 * If this is a duplicate mblk then we want ip to point at that
1872 * data, not the original, if and only if it is already pointing at
1873 * the current mblk data.
1875 * Otherwise, if it's not a duplicate, and we're not already pointing
1876 * at the current mblk data, then we want to ensure that the data
1880 if ((ip
== (ip_t
*)qpi
->qpi_m
->b_rptr
) && (qpi
->qpi_m
!= mb
)) {
1881 ip
= (ip_t
*)mb
->b_rptr
;
1882 } else if ((qpi
->qpi_m
== mb
) && (ip
!= (ip_t
*)qpi
->qpi_m
->b_rptr
)) {
1883 qpi
->qpi_m
->b_rptr
= (uchar_t
*)ip
;
1888 * If there is another M_PROTO, we don't want it
1896 sinp
= (struct sockaddr
*)&inj
->ni_addr
;
1897 sin
= (struct sockaddr_in
*)sinp
;
1898 sin6
= (struct sockaddr_in6
*)sinp
;
1899 bzero((char *)&inj
->ni_addr
, sizeof (inj
->ni_addr
));
1900 inj
->ni_addr
.ss_family
= (fin
->fin_v
== 4) ? AF_INET
: AF_INET6
;
1901 inj
->ni_packet
= mb
;
1904 * In case we're here due to "to <if>" being used with
1905 * "keep state", check that we're going in the correct
1909 if ((fr
!= NULL
) && (fdp
->fd_ifp
!= NULL
) &&
1910 (fin
->fin_rev
!= 0) && (fdp
== &fr
->fr_tif
))
1912 inj
->ni_physical
= (phy_if_t
)fdp
->fd_ifp
;
1913 if (fin
->fin_v
== 4) {
1914 sin
->sin_addr
= fdp
->fd_ip
;
1916 sin6
->sin6_addr
= fdp
->fd_ip6
.in6
;
1919 if (fin
->fin_v
== 4) {
1920 sin
->sin_addr
= ip
->ip_dst
;
1922 sin6
->sin6_addr
= ((ip6_t
*)ip
)->ip6_dst
;
1924 inj
->ni_physical
= net_routeto(net_data_p
, sinp
, NULL
);
1927 /* we're checking the destinatation here */
1928 if (!fr_forwarding_enabled(inj
->ni_physical
, net_data_p
))
1932 * Clear the hardware checksum flags from packets that we are doing
1933 * input processing on as leaving them set will cause the outgoing
1934 * NIC (if it supports hardware checksum) to calculate them anew,
1935 * using the old (correct) checksums as the pseudo value to start
1938 if (fin
->fin_out
== 0) {
1939 DB_CKSUMFLAGS(mb
) = 0;
1944 if (fin
->fin_out
== 0) {
1948 saveifp
= fin
->fin_ifp
;
1949 fin
->fin_ifp
= (void *)inj
->ni_physical
;
1950 fin
->fin_flx
&= ~FI_STATE
;
1952 (void) fr_acctpkt(fin
, &pass
);
1954 if (!fr
|| !(fr
->fr_flags
& FR_RETMASK
))
1955 (void) fr_checkstate(fin
, &pass
);
1956 if (fr_checknatout(fin
, NULL
) == -1)
1959 fin
->fin_ifp
= saveifp
;
1962 if (fin
->fin_v
== 4) {
1963 __iplen
= (u_short
)ip
->ip_len
,
1964 __ipoff
= (u_short
)ip
->ip_off
;
1966 ip
->ip_len
= htons(__iplen
);
1967 ip
->ip_off
= htons(__ipoff
);
1972 if (net_inject(net_data_p
, NI_DIRECT_OUT
, inj
) < 0) {
1973 net_inject_free(inj
);
1978 ifs
->ifs_fr_frouteok
[0]++;
1979 net_inject_free(inj
);
1982 net_inject_free(inj
);
1984 ifs
->ifs_fr_frouteok
[1]++;
1989 /* ------------------------------------------------------------------------ */
1990 /* Function: ipf_hook4_out */
1991 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
1992 /* Parameters: event(I) - pointer to event */
1993 /* info(I) - pointer to hook information for firewalling */
1995 /* Calling ipf_hook. */
1996 /* ------------------------------------------------------------------------ */
1998 int ipf_hook4_out(hook_event_token_t token
, hook_data_t info
, void *arg
)
2000 return ipf_hook(info
, 1, 0, arg
);
2003 int ipf_hook6_out(hook_event_token_t token
, hook_data_t info
, void *arg
)
2005 return ipf_hook6(info
, 1, 0, arg
);
2008 /* ------------------------------------------------------------------------ */
2009 /* Function: ipf_hook4_in */
2010 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
2011 /* Parameters: event(I) - pointer to event */
2012 /* info(I) - pointer to hook information for firewalling */
2014 /* Calling ipf_hook. */
2015 /* ------------------------------------------------------------------------ */
2017 int ipf_hook4_in(hook_event_token_t token
, hook_data_t info
, void *arg
)
2019 return ipf_hook(info
, 0, 0, arg
);
2022 int ipf_hook6_in(hook_event_token_t token
, hook_data_t info
, void *arg
)
2024 return ipf_hook6(info
, 0, 0, arg
);
2028 /* ------------------------------------------------------------------------ */
2029 /* Function: ipf_hook4_loop_out */
2030 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
2031 /* Parameters: event(I) - pointer to event */
2032 /* info(I) - pointer to hook information for firewalling */
2034 /* Calling ipf_hook. */
2035 /* ------------------------------------------------------------------------ */
2037 int ipf_hook4_loop_out(hook_event_token_t token
, hook_data_t info
, void *arg
)
2039 return ipf_hook(info
, 1, FI_NOCKSUM
, arg
);
2042 int ipf_hook6_loop_out(hook_event_token_t token
, hook_data_t info
, void *arg
)
2044 return ipf_hook6(info
, 1, FI_NOCKSUM
, arg
);
2047 /* ------------------------------------------------------------------------ */
2048 /* Function: ipf_hook4_loop_in */
2049 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
2050 /* Parameters: event(I) - pointer to event */
2051 /* info(I) - pointer to hook information for firewalling */
2053 /* Calling ipf_hook. */
2054 /* ------------------------------------------------------------------------ */
2056 int ipf_hook4_loop_in(hook_event_token_t token
, hook_data_t info
, void *arg
)
2058 return ipf_hook(info
, 0, FI_NOCKSUM
, arg
);
2061 int ipf_hook6_loop_in(hook_event_token_t token
, hook_data_t info
, void *arg
)
2063 return ipf_hook6(info
, 0, FI_NOCKSUM
, arg
);
2066 /* ------------------------------------------------------------------------ */
2067 /* Function: ipf_hook */
2068 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
2069 /* Parameters: info(I) - pointer to hook information for firewalling */
2070 /* out(I) - whether packet is going in or out */
2071 /* loopback(I) - whether packet is a loopback packet or not */
2073 /* Stepping stone function between the IP mainline and IPFilter. Extracts */
2074 /* parameters out of the info structure and forms them up to be useful for */
2075 /* calling ipfilter. */
2076 /* ------------------------------------------------------------------------ */
2077 int ipf_hook(hook_data_t info
, int out
, int loopback
, void *arg
)
2079 hook_pkt_event_t
*fw
;
2088 fw
= (hook_pkt_event_t
*)info
;
2091 phy
= (out
== 0) ? fw
->hpe_ifp
: fw
->hpe_ofp
;
2094 swap
= ntohs(ip
->ip_len
);
2096 swap
= ntohs(ip
->ip_off
);
2098 hlen
= IPH_HDR_LENGTH(ip
);
2100 qpi
.qpi_m
= fw
->hpe_mb
;
2101 qpi
.qpi_data
= fw
->hpe_hdr
;
2102 qpi
.qpi_off
= (char *)qpi
.qpi_data
- (char *)fw
->hpe_mb
->b_rptr
;
2103 qpi
.qpi_ill
= (void *)phy
;
2104 qpi
.qpi_flags
= fw
->hpe_flags
& (HPE_MULTICAST
|HPE_BROADCAST
);
2106 qpi
.qpi_flags
|= FI_MBCAST
;
2107 qpi
.qpi_flags
|= loopback
;
2109 rval
= fr_check(fw
->hpe_hdr
, hlen
, qpi
.qpi_ill
, out
,
2110 &qpi
, fw
->hpe_mp
, ifs
);
2112 /* For fastroute cases, fr_check returns 0 with mp set to NULL */
2113 if (rval
== 0 && *(fw
->hpe_mp
) == NULL
)
2116 /* Notify IP the packet mblk_t and IP header pointers. */
2117 fw
->hpe_mb
= qpi
.qpi_m
;
2118 fw
->hpe_hdr
= qpi
.qpi_data
;
2121 swap
= ntohs(ip
->ip_len
);
2123 swap
= ntohs(ip
->ip_off
);
2129 int ipf_hook6(hook_data_t info
, int out
, int loopback
, void *arg
)
2131 hook_pkt_event_t
*fw
;
2136 fw
= (hook_pkt_event_t
*)info
;
2139 phy
= (out
== 0) ? fw
->hpe_ifp
: fw
->hpe_ofp
;
2141 hlen
= sizeof (ip6_t
);
2143 qpi
.qpi_m
= fw
->hpe_mb
;
2144 qpi
.qpi_data
= fw
->hpe_hdr
;
2145 qpi
.qpi_off
= (char *)qpi
.qpi_data
- (char *)fw
->hpe_mb
->b_rptr
;
2146 qpi
.qpi_ill
= (void *)phy
;
2147 qpi
.qpi_flags
= fw
->hpe_flags
& (HPE_MULTICAST
|HPE_BROADCAST
);
2149 qpi
.qpi_flags
|= FI_MBCAST
;
2150 qpi
.qpi_flags
|= loopback
;
2152 rval
= fr_check(fw
->hpe_hdr
, hlen
, qpi
.qpi_ill
, out
,
2153 &qpi
, fw
->hpe_mp
, arg
);
2155 /* For fastroute cases, fr_check returns 0 with mp set to NULL */
2156 if (rval
== 0 && *(fw
->hpe_mp
) == NULL
)
2159 /* Notify IP the packet mblk_t and IP header pointers. */
2160 fw
->hpe_mb
= qpi
.qpi_m
;
2161 fw
->hpe_hdr
= qpi
.qpi_data
;
2166 /* ------------------------------------------------------------------------ */
2167 /* Function: ipf_nic_event_v4 */
2168 /* Returns: int - 0 == no problems encountered */
2169 /* Parameters: event(I) - pointer to event */
2170 /* info(I) - pointer to information about a NIC event */
2172 /* Function to receive asynchronous NIC events from IP */
2173 /* ------------------------------------------------------------------------ */
2175 int ipf_nic_event_v4(hook_event_token_t event
, hook_data_t info
, void *arg
)
2177 struct sockaddr_in
*sin
;
2178 hook_nic_event_t
*hn
;
2179 ipf_stack_t
*ifs
= arg
;
2180 void *new_ifp
= NULL
;
2182 if (ifs
->ifs_fr_running
<= 0)
2185 hn
= (hook_nic_event_t
*)info
;
2187 switch (hn
->hne_event
)
2190 frsync(IPFSYNC_NEWIFP
, 4, (void *)hn
->hne_nic
, hn
->hne_data
,
2192 fr_natifpsync(IPFSYNC_NEWIFP
, 4, (void *)hn
->hne_nic
,
2194 fr_statesync(IPFSYNC_NEWIFP
, 4, (void *)hn
->hne_nic
,
2199 frsync(IPFSYNC_OLDIFP
, 4, (void *)hn
->hne_nic
, NULL
, ifs
);
2200 fr_natifpsync(IPFSYNC_OLDIFP
, 4, (void *)hn
->hne_nic
, NULL
,
2202 fr_statesync(IPFSYNC_OLDIFP
, 4, (void *)hn
->hne_nic
, NULL
, ifs
);
2205 case NE_ADDRESS_CHANGE
:
2207 * We only respond to events for logical interface 0 because
2208 * IPFilter only uses the first address given to a network
2209 * interface. We check for hne_lif==1 because the netinfo
2210 * code maps adds 1 to the lif number so that it can return
2211 * 0 to indicate "no more lifs" when walking them.
2213 if (hn
->hne_lif
== 1) {
2214 frsync(IPFSYNC_RESYNC
, 4, (void *)hn
->hne_nic
, NULL
,
2217 fr_nataddrsync(4, (void *)hn
->hne_nic
, &sin
->sin_addr
,
2223 case NE_IFINDEX_CHANGE
:
2224 WRITE_ENTER(&ifs
->ifs_ipf_mutex
);
2226 if (hn
->hne_data
!= NULL
) {
2228 * The netinfo passes interface index as int (hne_data should be
2229 * handled as a pointer to int), which is always 32bit. We need to
2230 * convert it to void pointer here, since interfaces are
2231 * represented as pointers to void in IPF. The pointers are 64 bits
2232 * long on 64bit platforms. Doing something like
2234 * will throw warning:
2235 * "cast to pointer from integer of different size"
2236 * during 64bit compilation.
2238 * The line below uses (size_t) to typecast int to
2239 * size_t, which might be 64bit/32bit (depending
2240 * on architecture). Once we have proper 64bit/32bit
2241 * type (size_t), we can safely convert it to void pointer.
2243 new_ifp
= (void *)(size_t)*((int *)hn
->hne_data
);
2244 fr_ifindexsync((void *)hn
->hne_nic
, new_ifp
, ifs
);
2245 fr_natifindexsync((void *)hn
->hne_nic
, new_ifp
, ifs
);
2246 fr_stateifindexsync((void *)hn
->hne_nic
, new_ifp
, ifs
);
2248 RWLOCK_EXIT(&ifs
->ifs_ipf_mutex
);
2260 /* ------------------------------------------------------------------------ */
2261 /* Function: ipf_nic_event_v6 */
2262 /* Returns: int - 0 == no problems encountered */
2263 /* Parameters: event(I) - pointer to event */
2264 /* info(I) - pointer to information about a NIC event */
2266 /* Function to receive asynchronous NIC events from IP */
2267 /* ------------------------------------------------------------------------ */
2269 int ipf_nic_event_v6(hook_event_token_t event
, hook_data_t info
, void *arg
)
2271 struct sockaddr_in6
*sin6
;
2272 hook_nic_event_t
*hn
;
2273 ipf_stack_t
*ifs
= arg
;
2274 void *new_ifp
= NULL
;
2276 if (ifs
->ifs_fr_running
<= 0)
2279 hn
= (hook_nic_event_t
*)info
;
2281 switch (hn
->hne_event
)
2284 frsync(IPFSYNC_NEWIFP
, 6, (void *)hn
->hne_nic
,
2286 fr_natifpsync(IPFSYNC_NEWIFP
, 6, (void *)hn
->hne_nic
,
2288 fr_statesync(IPFSYNC_NEWIFP
, 6, (void *)hn
->hne_nic
,
2293 frsync(IPFSYNC_OLDIFP
, 6, (void *)hn
->hne_nic
, NULL
, ifs
);
2294 fr_natifpsync(IPFSYNC_OLDIFP
, 6, (void *)hn
->hne_nic
, NULL
,
2296 fr_statesync(IPFSYNC_OLDIFP
, 6, (void *)hn
->hne_nic
, NULL
, ifs
);
2299 case NE_ADDRESS_CHANGE
:
2300 if (hn
->hne_lif
== 1) {
2301 sin6
= hn
->hne_data
;
2302 fr_nataddrsync(6, (void *)hn
->hne_nic
, &sin6
->sin6_addr
,
2308 case NE_IFINDEX_CHANGE
:
2309 WRITE_ENTER(&ifs
->ifs_ipf_mutex
);
2310 if (hn
->hne_data
!= NULL
) {
2312 * The netinfo passes interface index as int (hne_data should be
2313 * handled as a pointer to int), which is always 32bit. We need to
2314 * convert it to void pointer here, since interfaces are
2315 * represented as pointers to void in IPF. The pointers are 64 bits
2316 * long on 64bit platforms. Doing something like
2318 * will throw warning:
2319 * "cast to pointer from integer of different size"
2320 * during 64bit compilation.
2322 * The line below uses (size_t) to typecast int to
2323 * size_t, which might be 64bit/32bit (depending
2324 * on architecture). Once we have proper 64bit/32bit
2325 * type (size_t), we can safely convert it to void pointer.
2327 new_ifp
= (void *)(size_t)*((int *)hn
->hne_data
);
2328 fr_ifindexsync((void *)hn
->hne_nic
, new_ifp
, ifs
);
2329 fr_natifindexsync((void *)hn
->hne_nic
, new_ifp
, ifs
);
2330 fr_stateifindexsync((void *)hn
->hne_nic
, new_ifp
, ifs
);
2332 RWLOCK_EXIT(&ifs
->ifs_ipf_mutex
);
2344 * Functions fr_make_rst(), fr_make_icmp_v4(), fr_make_icmp_v6()
2345 * are needed in Solaris kernel only. We don't need them in
2346 * ipftest to pretend the ICMP/RST packet was sent as a response.
2348 #if defined(_KERNEL) && (SOLARIS2 >= 10)
2349 /* ------------------------------------------------------------------------ */
2350 /* Function: fr_make_rst */
2351 /* Returns: int - 0 on success, -1 on failure */
2352 /* Parameters: fin(I) - pointer to packet information */
2354 /* We must alter the original mblks passed to IPF from IP stack via */
2355 /* FW_HOOKS. FW_HOOKS interface is powerfull, but it has some limitations. */
2356 /* IPF can basicaly do only these things with mblk representing the packet: */
2357 /* leave it as it is (pass the packet) */
2359 /* discard it (block the packet) */
2361 /* alter it (i.e. NAT) */
2363 /* As you can see IPF can not simply discard the mblk and supply a new one */
2364 /* instead to IP stack via FW_HOOKS. */
2366 /* The return-rst action for packets coming via NIC is handled as follows: */
2367 /* mblk with packet is discarded */
2369 /* new mblk with RST response is constructed and injected to network */
2371 /* IPF can't inject packets to loopback interface, this is just another */
2372 /* limitation we have to deal with here. The only option to send RST */
2373 /* response to offending TCP packet coming via loopback is to alter it. */
2375 /* The fr_make_rst() function alters TCP SYN/FIN packet intercepted on */
2376 /* loopback interface into TCP RST packet. fin->fin_mp is pointer to */
2377 /* mblk L3 (IP) and L4 (TCP/UDP) packet headers. */
2378 /* ------------------------------------------------------------------------ */
2379 int fr_make_rst(fin
)
2385 tcphdr_t
*tcp
= NULL
;
2386 struct in_addr tmp_src
;
2388 struct in6_addr tmp_src6
;
2391 ASSERT(fin
->fin_p
== IPPROTO_TCP
);
2394 * We do not need to adjust chksum, since it is not being checked by
2395 * Solaris IP stack for loopback clients.
2397 if ((fin
->fin_v
== 4) && (fin
->fin_p
== IPPROTO_TCP
) &&
2398 ((tcp
= (tcphdr_t
*) fin
->fin_dp
) != NULL
)) {
2400 if (tcp
->th_flags
& (TH_SYN
| TH_FIN
)) {
2401 /* Swap IPv4 addresses. */
2402 tmp_src
= fin
->fin_ip
->ip_src
;
2403 fin
->fin_ip
->ip_src
= fin
->fin_ip
->ip_dst
;
2404 fin
->fin_ip
->ip_dst
= tmp_src
;
2412 else if ((fin
->fin_v
== 6) && (fin
->fin_p
== IPPROTO_TCP
) &&
2413 ((tcp
= (tcphdr_t
*) fin
->fin_dp
) != NULL
)) {
2415 * We are relying on fact the next header is TCP, which is true
2416 * for regular TCP packets coming in over loopback.
2418 if (tcp
->th_flags
& (TH_SYN
| TH_FIN
)) {
2419 /* Swap IPv6 addresses. */
2420 tmp_src6
= fin
->fin_ip6
->ip6_src
;
2421 fin
->fin_ip6
->ip6_src
= fin
->fin_ip6
->ip6_dst
;
2422 fin
->fin_ip6
->ip6_dst
= tmp_src6
;
2433 * Adjust TCP header:
2436 * set correct ACK number
2438 tmp_port
= tcp
->th_sport
;
2439 tcp
->th_sport
= tcp
->th_dport
;
2440 tcp
->th_dport
= tmp_port
;
2441 old_ack
= tcp
->th_ack
;
2442 tcp
->th_ack
= htonl(ntohl(tcp
->th_seq
) + 1);
2443 tcp
->th_seq
= old_ack
;
2444 tcp
->th_flags
= TH_RST
| TH_ACK
;
2450 /* ------------------------------------------------------------------------ */
2451 /* Function: fr_make_icmp_v4 */
2452 /* Returns: int - 0 on success, -1 on failure */
2453 /* Parameters: fin(I) - pointer to packet information */
2455 /* Please read comment at fr_make_icmp() wrapper function to get an idea */
2456 /* what is going to happen here and why. Once you read the comment there, */
2457 /* continue here with next paragraph. */
2459 /* To turn IPv4 packet into ICMPv4 response packet, these things must */
2461 /* (1) Original mblk is copied (duplicated). */
2463 /* (2) ICMP header is created. */
2465 /* (3) Link ICMP header with copy of original mblk, we have ICMPv4 */
2466 /* data ready then. */
2468 /* (4) Swap IP addresses in original mblk and adjust IP header data. */
2470 /* (5) The mblk containing original packet is trimmed to contain IP */
2471 /* header only and ICMP chksum is computed. */
2473 /* (6) The ICMP header we have from (3) is linked to original mblk, */
2474 /* which now contains new IP header. If original packet was spread */
2475 /* over several mblks, only the first mblk is kept. */
2476 /* ------------------------------------------------------------------------ */
2477 static int fr_make_icmp_v4(fin
)
2480 struct in_addr tmp_src
;
2485 size_t icmp_pld_len
; /* octets to append to ICMP header */
2486 size_t orig_iphdr_len
; /* length of IP header only */
2492 if (fin
->fin_v
!= 4)
2496 * If we are dealing with TCP, then packet must be SYN/FIN to be routed
2497 * by IP stack. If it is not SYN/FIN, then we must drop it silently.
2499 tcp
= (tcphdr_t
*) fin
->fin_dp
;
2501 if ((fin
->fin_p
== IPPROTO_TCP
) &&
2502 ((tcp
== NULL
) || ((tcp
->th_flags
& (TH_SYN
| TH_FIN
)) == 0)))
2508 * Make copy of original mblk.
2510 * We want to copy as much data as necessary, not less, not more. The
2511 * ICMPv4 payload length for unreachable messages is:
2512 * original IP header + 8 bytes of L4 (if there are any).
2514 * We determine if there are at least 8 bytes of L4 data following IP
2517 icmp_pld_len
= (fin
->fin_dlen
> ICMPERR_ICMPHLEN
) ?
2518 ICMPERR_ICMPHLEN
: fin
->fin_dlen
;
2520 * Since we don't want to copy more data than necessary, we must trim
2521 * the original mblk here. The right way (STREAMish) would be to use
2522 * adjmsg() to trim it. However we would have to calculate the length
2523 * argument for adjmsg() from pointers we already have here.
2525 * Since we have pointers and offsets, it's faster and easier for
2526 * us to just adjust pointers by hand instead of using adjmsg().
2528 fin
->fin_m
->b_wptr
= (unsigned char *) fin
->fin_dp
;
2529 fin
->fin_m
->b_wptr
+= icmp_pld_len
;
2530 icmp_pld_len
= fin
->fin_m
->b_wptr
- (unsigned char *) fin
->fin_ip
;
2533 * Also we don't want to copy any L2 stuff, which might precede IP
2534 * header, so we have have to set b_rptr to point to the start of IP
2537 fin
->fin_m
->b_rptr
+= fin
->fin_ipoff
;
2538 if ((mblk_ip
= copyb(fin
->fin_m
)) == NULL
)
2540 fin
->fin_m
->b_rptr
-= fin
->fin_ipoff
;
2545 * Create an ICMP header, which will be appened to original mblk later.
2546 * ICMP header is just another mblk.
2548 mblk_icmp
= (mblk_t
*) allocb(ICMPERR_ICMPHLEN
, BPRI_HI
);
2549 if (mblk_icmp
== NULL
) {
2554 MTYPE(mblk_icmp
) = M_DATA
;
2555 icmp
= (struct icmp
*) mblk_icmp
->b_wptr
;
2556 icmp
->icmp_type
= ICMP_UNREACH
;
2557 icmp
->icmp_code
= fin
->fin_icode
& 0xFF;
2558 icmp
->icmp_void
= 0;
2559 icmp
->icmp_cksum
= 0;
2560 mblk_icmp
->b_wptr
+= ICMPERR_ICMPHLEN
;
2565 * Complete ICMP packet - link ICMP header with L4 data from original
2568 linkb(mblk_icmp
, mblk_ip
);
2573 * Swap IP addresses and change IP header fields accordingly in
2574 * original IP packet.
2576 * There is a rule option return-icmp as a dest for physical
2577 * interfaces. This option becomes useless for loopback, since IPF box
2578 * uses same address as a loopback destination. We ignore the option
2579 * here, the ICMP packet will always look like as it would have been
2580 * sent from the original destination host.
2582 tmp_src
= fin
->fin_ip
->ip_src
;
2583 fin
->fin_ip
->ip_src
= fin
->fin_ip
->ip_dst
;
2584 fin
->fin_ip
->ip_dst
= tmp_src
;
2585 fin
->fin_ip
->ip_p
= IPPROTO_ICMP
;
2586 fin
->fin_ip
->ip_sum
= 0;
2591 * We trim the orignal mblk to hold IP header only.
2593 fin
->fin_m
->b_wptr
= fin
->fin_dp
;
2594 orig_iphdr_len
= fin
->fin_m
->b_wptr
-
2595 (fin
->fin_m
->b_rptr
+ fin
->fin_ipoff
);
2596 fin
->fin_ip
->ip_len
= htons(icmp_pld_len
+ ICMPERR_ICMPHLEN
+
2600 * ICMP chksum calculation. The data we are calculating chksum for are
2601 * spread over two mblks, therefore we have to use two for loops.
2603 * First for loop computes chksum part for ICMP header.
2605 buf
= (uint16_t *) icmp
;
2606 len
= ICMPERR_ICMPHLEN
;
2607 for (sum
= 0; len
> 1; len
-= 2)
2611 * Here we add chksum part for ICMP payload.
2614 buf
= (uint16_t *) mblk_ip
->b_rptr
;
2615 for (; len
> 1; len
-= 2)
2621 sum
= (sum
>> 16) + (sum
& 0xffff);
2623 icmp
->icmp_cksum
= ~sum
;
2628 * Release all packet mblks, except the first one.
2630 if (fin
->fin_m
->b_cont
!= NULL
) {
2631 FREE_MB_T(fin
->fin_m
->b_cont
);
2635 * Append ICMP payload to first mblk, which already contains new IP
2638 linkb(fin
->fin_m
, mblk_icmp
);
2644 /* ------------------------------------------------------------------------ */
2645 /* Function: fr_make_icmp_v6 */
2646 /* Returns: int - 0 on success, -1 on failure */
2647 /* Parameters: fin(I) - pointer to packet information */
2649 /* Please read comment at fr_make_icmp() wrapper function to get an idea */
2650 /* what and why is going to happen here. Once you read the comment there, */
2651 /* continue here with next paragraph. */
2653 /* This function turns IPv6 packet (UDP, TCP, ...) into ICMPv6 response. */
2654 /* The algorithm is fairly simple: */
2655 /* 1) We need to get copy of complete mblk. */
2657 /* 2) New ICMPv6 header is created. */
2659 /* 3) The copy of original mblk with packet is linked to ICMPv6 */
2662 /* 4) The checksum must be adjusted. */
2664 /* 5) IP addresses in original mblk are swapped and IP header data */
2665 /* are adjusted (protocol number). */
2667 /* 6) Original mblk is trimmed to hold IPv6 header only, then it is */
2668 /* linked with the ICMPv6 data we got from (3). */
2669 /* ------------------------------------------------------------------------ */
2670 static int fr_make_icmp_v6(fin
)
2673 struct icmp6_hdr
*icmp6
;
2675 struct in6_addr tmp_src6
;
2676 size_t icmp_pld_len
;
2677 mblk_t
*mblk_ip
, *mblk_icmp
;
2679 if (fin
->fin_v
!= 6)
2683 * If we are dealing with TCP, then packet must SYN/FIN to be routed by
2684 * IP stack. If it is not SYN/FIN, then we must drop it silently.
2686 tcp
= (tcphdr_t
*) fin
->fin_dp
;
2688 if ((fin
->fin_p
== IPPROTO_TCP
) &&
2689 ((tcp
== NULL
) || ((tcp
->th_flags
& (TH_SYN
| TH_FIN
)) == 0)))
2695 * We need to copy complete packet in case of IPv6, no trimming is
2696 * needed (except the L2 headers).
2698 icmp_pld_len
= M_LEN(fin
->fin_m
);
2699 fin
->fin_m
->b_rptr
+= fin
->fin_ipoff
;
2700 if ((mblk_ip
= copyb(fin
->fin_m
)) == NULL
)
2702 fin
->fin_m
->b_rptr
-= fin
->fin_ipoff
;
2707 * Allocate and create ICMP header.
2709 mblk_icmp
= (mblk_t
*) allocb(sizeof (struct icmp6_hdr
),
2712 if (mblk_icmp
== NULL
)
2715 MTYPE(mblk_icmp
) = M_DATA
;
2716 icmp6
= (struct icmp6_hdr
*) mblk_icmp
->b_wptr
;
2717 icmp6
->icmp6_type
= ICMP6_DST_UNREACH
;
2718 icmp6
->icmp6_code
= fin
->fin_icode
& 0xFF;
2719 icmp6
->icmp6_data32
[0] = 0;
2720 mblk_icmp
->b_wptr
+= sizeof (struct icmp6_hdr
);
2725 * Link the copy of IP packet to ICMP header.
2727 linkb(mblk_icmp
, mblk_ip
);
2732 * Calculate chksum - this is much more easier task than in case of
2733 * IPv4 - ICMPv6 chksum only covers IP addresses, and payload length.
2734 * We are making compensation just for change of packet length.
2736 icmp6
->icmp6_cksum
= icmp_pld_len
+ sizeof (struct icmp6_hdr
);
2741 * Swap IP addresses.
2743 tmp_src6
= fin
->fin_ip6
->ip6_src
;
2744 fin
->fin_ip6
->ip6_src
= fin
->fin_ip6
->ip6_dst
;
2745 fin
->fin_ip6
->ip6_dst
= tmp_src6
;
2748 * and adjust IP header data.
2750 fin
->fin_ip6
->ip6_nxt
= IPPROTO_ICMPV6
;
2751 fin
->fin_ip6
->ip6_plen
= htons(icmp_pld_len
+ sizeof (struct icmp6_hdr
));
2756 * We must release all linked mblks from original packet and keep only
2757 * the first mblk with IP header to link ICMP data.
2759 fin
->fin_m
->b_wptr
= (unsigned char *) fin
->fin_ip6
+ sizeof (ip6_t
);
2761 if (fin
->fin_m
->b_cont
!= NULL
) {
2762 FREE_MB_T(fin
->fin_m
->b_cont
);
2766 * Append ICMP payload to IP header.
2768 linkb(fin
->fin_m
, mblk_icmp
);
2772 #endif /* USE_INET6 */
2774 /* ------------------------------------------------------------------------ */
2775 /* Function: fr_make_icmp */
2776 /* Returns: int - 0 on success, -1 on failure */
2777 /* Parameters: fin(I) - pointer to packet information */
2779 /* We must alter the original mblks passed to IPF from IP stack via */
2780 /* FW_HOOKS. The reasons why we must alter packet are discussed within */
2781 /* comment at fr_make_rst() function. */
2783 /* The fr_make_icmp() function acts as a wrapper, which passes the code */
2784 /* execution to fr_make_icmp_v4() or fr_make_icmp_v6() depending on */
2785 /* protocol version. However there are some details, which are common to */
2786 /* both IP versions. The details are going to be explained here. */
2788 /* The packet looks as follows: */
2789 /* xxx | IP hdr | IP payload ... | */
2792 /* | | | fin_m->b_wptr = fin->fin_dp + fin->fin_dlen */
2794 /* | | `- fin_m->fin_dp (in case of IPv4 points to L4 header) */
2796 /* | `- fin_m->b_rptr + fin_ipoff (fin_ipoff is most likely 0 in case */
2797 /* | of loopback) */
2799 /* `- fin_m->b_rptr - points to L2 header in case of physical NIC */
2801 /* All relevant IP headers are pulled up into the first mblk. It happened */
2802 /* well in advance before the matching rule was found (the rule, which took */
2803 /* us here, to fr_make_icmp() function). */
2805 /* Both functions will turn packet passed in fin->fin_m mblk into a new */
2806 /* packet. New packet will be represented as chain of mblks. */
2807 /* orig mblk |- b_cont ---. */
2808 /* ^ `-> ICMP hdr |- b_cont--. */
2809 /* | ^ `-> duped orig mblk */
2811 /* `- The original mblk | | */
2812 /* will be trimmed to | | */
2813 /* to contain IP header | | */
2816 /* `- This is newly | */
2817 /* allocated mblk to | */
2818 /* hold ICMPv6 data. | */
2822 /* This is the copy of original mblk, it will contain -' */
2823 /* orignal IP packet in case of ICMPv6. In case of */
2824 /* ICMPv4 it will contain up to 8 bytes of IP payload */
2825 /* (TCP/UDP/L4) data from original packet. */
2826 /* ------------------------------------------------------------------------ */
2827 int fr_make_icmp(fin
)
2832 if (fin
->fin_v
== 4)
2833 rv
= fr_make_icmp_v4(fin
);
2835 else if (fin
->fin_v
== 6)
2836 rv
= fr_make_icmp_v6(fin
);
2844 /* ------------------------------------------------------------------------ */
2845 /* Function: fr_buf_sum */
2846 /* Returns: unsigned int - sum of buffer buf */
2847 /* Parameters: buf - pointer to buf we want to sum up */
2848 /* len - length of buffer buf */
2850 /* Sums buffer buf. The result is used for chksum calculation. The buf */
2851 /* argument must be aligned. */
2852 /* ------------------------------------------------------------------------ */
2853 static uint32_t fr_buf_sum(buf
, len
)
2858 uint16_t *b
= (uint16_t *)buf
;
2866 sum
+= htons((*(unsigned char *)b
) << 8);
2871 /* ------------------------------------------------------------------------ */
2872 /* Function: fr_calc_chksum */
2874 /* Parameters: fin - pointer to fr_info_t instance with packet data */
2875 /* pkt - pointer to duplicated packet */
2877 /* Calculates all chksums (L3, L4) for packet pkt. Works for both IP */
2879 /* ------------------------------------------------------------------------ */
2880 void fr_calc_chksum(fin
, pkt
)
2888 struct in6_addr in6
;
2894 struct in6_addr in6
;
2901 uint32_t sum
, ip_sum
;
2903 uint16_t *l4_csum_p
;
2908 struct icmp6_hdr
*icmp6
;
2915 * We need to pullup the packet to the single continuous buffer to avoid
2916 * potential misaligment of b_rptr member in mblk chain.
2918 if (pullupmsg(pkt
, -1) == 0) {
2919 cmn_err(CE_WARN
, "Failed to pullup loopback pkt -> chksum"
2920 " will not be computed by IPF");
2925 * It is guaranteed IP header starts right at b_rptr, because we are
2926 * working with a copy of the original packet.
2928 * Compute pseudo header chksum for TCP and UDP.
2930 if ((fin
->fin_p
== IPPROTO_UDP
) ||
2931 (fin
->fin_p
== IPPROTO_TCP
)) {
2932 bzero(&phdr
, sizeof (phdr
));
2934 if (fin
->fin_v
== 6) {
2935 phdr
.src_addr
.in6
= fin
->fin_srcip6
;
2936 phdr
.dst_addr
.in6
= fin
->fin_dstip6
;
2938 phdr
.src_addr
.in4
= fin
->fin_src
;
2939 phdr
.dst_addr
.in4
= fin
->fin_dst
;
2942 phdr
.src_addr
.in4
= fin
->fin_src
;
2943 phdr
.dst_addr
.in4
= fin
->fin_dst
;
2945 phdr
.zero
= (char) 0;
2946 phdr
.proto
= fin
->fin_p
;
2947 phdr
.len
= htons((uint16_t)fin
->fin_dlen
);
2948 sum
= fr_buf_sum(&phdr
, (unsigned int)sizeof (phdr
));
2954 * Set pointer to the L4 chksum field in the packet, set buf pointer to
2955 * the L4 header start.
2957 switch (fin
->fin_p
) {
2959 udp
= (udphdr_t
*)(pkt
->b_rptr
+ fin
->fin_hlen
);
2960 l4_csum_p
= &udp
->uh_sum
;
2964 tcp
= (tcphdr_t
*)(pkt
->b_rptr
+ fin
->fin_hlen
);
2965 l4_csum_p
= &tcp
->th_sum
;
2969 icmp
= (icmphdr_t
*)(pkt
->b_rptr
+ fin
->fin_hlen
);
2970 l4_csum_p
= &icmp
->icmp_cksum
;
2974 case IPPROTO_ICMPV6
:
2975 icmp6
= (struct icmp6_hdr
*)(pkt
->b_rptr
+ fin
->fin_hlen
);
2976 l4_csum_p
= &icmp6
->icmp6_cksum
;
2985 * Compute L4 chksum if needed.
2987 if (l4_csum_p
!= NULL
) {
2988 *l4_csum_p
= (uint16_t)0;
2989 pld_len
= fin
->fin_dlen
;
2990 len
= pkt
->b_wptr
- (unsigned char *)buf
;
2991 ASSERT(len
== pld_len
);
2993 * Add payload sum to pseudoheader sum.
2995 sum
+= fr_buf_sum(buf
, len
);
2997 sum
= (sum
& 0xFFFF) + (sum
>> 16);
2999 *l4_csum_p
= ~((uint16_t)sum
);
3000 DTRACE_PROBE1(l4_sum
, uint16_t, *l4_csum_p
);
3004 * The IP header chksum is needed just for IPv4.
3006 if (fin
->fin_v
== 4) {
3008 * Compute IPv4 header chksum.
3010 ip
= (ip_t
*)pkt
->b_rptr
;
3011 ip
->ip_sum
= (uint16_t)0;
3012 ip_sum
= fr_buf_sum(ip
, (unsigned int)fin
->fin_hlen
);
3013 while (ip_sum
>> 16)
3014 ip_sum
= (ip_sum
& 0xFFFF) + (ip_sum
>> 16);
3016 ip
->ip_sum
= ~((uint16_t)ip_sum
);
3017 DTRACE_PROBE1(l3_sum
, uint16_t, ip
->ip_sum
);
3023 #endif /* _KERNEL && SOLARIS2 >= 10 */