5733 ipf should only forward when forwarding is enabled
[unleashed.git] / usr / src / uts / common / inet / ipf / ip_fil_solaris.c
blobf958ca22615a7a6d09303ec064ac8689c3c03493
1 /*
2 * Copyright (C) 1993-2001, 2003 by Darren Reed.
4 * See the IPFILTER.LICENCE file for details on licencing.
6 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
8 * Copyright (c) 2015, Joyent, Inc. All rights reserved.
9 */
11 #if !defined(lint)
12 static const char sccsid[] = "@(#)ip_fil_solaris.c 1.7 07/22/06 (C) 1993-2000 Darren Reed";
13 static const char rcsid[] = "@(#)$Id: ip_fil_solaris.c,v 2.62.2.19 2005/07/13 21:40:46 darrenr Exp $";
14 #endif
16 #include <sys/types.h>
17 #include <sys/errno.h>
18 #include <sys/param.h>
19 #include <sys/cpuvar.h>
20 #include <sys/open.h>
21 #include <sys/ioctl.h>
22 #include <sys/filio.h>
23 #include <sys/systm.h>
24 #include <sys/strsubr.h>
25 #include <sys/cred.h>
26 #include <sys/ddi.h>
27 #include <sys/sunddi.h>
28 #include <sys/ksynch.h>
29 #include <sys/kmem.h>
30 #include <sys/mkdev.h>
31 #include <sys/protosw.h>
32 #include <sys/socket.h>
33 #include <sys/dditypes.h>
34 #include <sys/cmn_err.h>
35 #include <sys/zone.h>
36 #include <net/if.h>
37 #include <net/af.h>
38 #include <net/route.h>
39 #include <netinet/in.h>
40 #include <netinet/in_systm.h>
41 #include <netinet/ip.h>
42 #include <netinet/ip_var.h>
43 #include <netinet/tcp.h>
44 #include <netinet/udp.h>
45 #include <netinet/tcpip.h>
46 #include <netinet/ip_icmp.h>
47 #include "netinet/ip_compat.h"
48 #ifdef USE_INET6
49 # include <netinet/icmp6.h>
50 #endif
51 #include "netinet/ip_fil.h"
52 #include "netinet/ip_nat.h"
53 #include "netinet/ip_frag.h"
54 #include "netinet/ip_state.h"
55 #include "netinet/ip_auth.h"
56 #include "netinet/ip_proxy.h"
57 #include "netinet/ipf_stack.h"
58 #ifdef IPFILTER_LOOKUP
59 # include "netinet/ip_lookup.h"
60 #endif
61 #include <inet/ip_ire.h>
63 #include <sys/md5.h>
64 #include <sys/neti.h>
66 static int frzerostats __P((caddr_t, ipf_stack_t *));
67 static int fr_setipfloopback __P((int, ipf_stack_t *));
68 static int fr_enableipf __P((ipf_stack_t *, int));
69 static int fr_send_ip __P((fr_info_t *fin, mblk_t *m, mblk_t **mp));
70 static int ipf_nic_event_v4 __P((hook_event_token_t, hook_data_t, void *));
71 static int ipf_nic_event_v6 __P((hook_event_token_t, hook_data_t, void *));
72 static int ipf_hook __P((hook_data_t, int, int, void *));
73 static int ipf_hook4_in __P((hook_event_token_t, hook_data_t, void *));
74 static int ipf_hook4_out __P((hook_event_token_t, hook_data_t, void *));
75 static int ipf_hook4_loop_out __P((hook_event_token_t, hook_data_t,
76 void *));
77 static int ipf_hook4_loop_in __P((hook_event_token_t, hook_data_t, void *));
78 static int ipf_hook4 __P((hook_data_t, int, int, void *));
79 static int ipf_hook6_out __P((hook_event_token_t, hook_data_t, void *));
80 static int ipf_hook6_in __P((hook_event_token_t, hook_data_t, void *));
81 static int ipf_hook6_loop_out __P((hook_event_token_t, hook_data_t,
82 void *));
83 static int ipf_hook6_loop_in __P((hook_event_token_t, hook_data_t,
84 void *));
85 static int ipf_hook6 __P((hook_data_t, int, int, void *));
86 extern int ipf_geniter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
87 extern int ipf_frruleiter __P((void *, int, void *, ipf_stack_t *));
89 #if SOLARIS2 < 10
90 #if SOLARIS2 >= 7
91 u_int *ip_ttl_ptr = NULL;
92 u_int *ip_mtudisc = NULL;
93 # if SOLARIS2 >= 8
94 int *ip_forwarding = NULL;
95 u_int *ip6_forwarding = NULL;
96 # else
97 u_int *ip_forwarding = NULL;
98 # endif
99 #else
100 u_long *ip_ttl_ptr = NULL;
101 u_long *ip_mtudisc = NULL;
102 u_long *ip_forwarding = NULL;
103 #endif
104 #endif
106 vmem_t *ipf_minor; /* minor number arena */
107 void *ipf_state; /* DDI state */
110 * GZ-controlled and per-zone stacks:
112 * For each non-global zone, we create two ipf stacks: the per-zone stack and
113 * the GZ-controlled stack. The per-zone stack can be controlled and observed
114 * from inside the zone or from the global zone. The GZ-controlled stack can
115 * only be controlled and observed from the global zone (though the rules
116 * still only affect that non-global zone).
118 * The two hooks are always arranged so that the GZ-controlled stack is always
119 * "outermost" with respect to the zone. The traffic flow then looks like
120 * this:
122 * Inbound:
124 * nic ---> [ GZ-controlled rules ] ---> [ per-zone rules ] ---> zone
126 * Outbound:
128 * nic <--- [ GZ-controlled rules ] <--- [ per-zone rules ] <--- zone
131 /* IPv4 hook names */
132 char *hook4_nicevents = "ipfilter_hook4_nicevents";
133 char *hook4_nicevents_gz = "ipfilter_hook4_nicevents_gz";
134 char *hook4_in = "ipfilter_hook4_in";
135 char *hook4_in_gz = "ipfilter_hook4_in_gz";
136 char *hook4_out = "ipfilter_hook4_out";
137 char *hook4_out_gz = "ipfilter_hook4_out_gz";
138 char *hook4_loop_in = "ipfilter_hook4_loop_in";
139 char *hook4_loop_in_gz = "ipfilter_hook4_loop_in_gz";
140 char *hook4_loop_out = "ipfilter_hook4_loop_out";
141 char *hook4_loop_out_gz = "ipfilter_hook4_loop_out_gz";
143 /* IPv6 hook names */
144 char *hook6_nicevents = "ipfilter_hook6_nicevents";
145 char *hook6_nicevents_gz = "ipfilter_hook6_nicevents_gz";
146 char *hook6_in = "ipfilter_hook6_in";
147 char *hook6_in_gz = "ipfilter_hook6_in_gz";
148 char *hook6_out = "ipfilter_hook6_out";
149 char *hook6_out_gz = "ipfilter_hook6_out_gz";
150 char *hook6_loop_in = "ipfilter_hook6_loop_in";
151 char *hook6_loop_in_gz = "ipfilter_hook6_loop_in_gz";
152 char *hook6_loop_out = "ipfilter_hook6_loop_out";
153 char *hook6_loop_out_gz = "ipfilter_hook6_loop_out_gz";
155 /* ------------------------------------------------------------------------ */
156 /* Function: ipldetach */
157 /* Returns: int - 0 == success, else error. */
158 /* Parameters: Nil */
159 /* */
160 /* This function is responsible for undoing anything that might have been */
161 /* done in a call to iplattach(). It must be able to clean up from a call */
162 /* to iplattach() that did not succeed. Why might that happen? Someone */
163 /* configures a table to be so large that we cannot allocate enough memory */
164 /* for it. */
165 /* ------------------------------------------------------------------------ */
166 int ipldetach(ifs)
167 ipf_stack_t *ifs;
170 ASSERT(RW_WRITE_HELD(&ifs->ifs_ipf_global.ipf_lk));
172 #if SOLARIS2 < 10
174 if (ifs->ifs_fr_control_forwarding & 2) {
175 if (ip_forwarding != NULL)
176 *ip_forwarding = 0;
177 #if SOLARIS2 >= 8
178 if (ip6_forwarding != NULL)
179 *ip6_forwarding = 0;
180 #endif
182 #endif
185 * This lock needs to be dropped around the net_hook_unregister calls
186 * because we can deadlock here with:
187 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs
188 * R(hook_family)->R(hei_lock)->R(ipf_global) (active hook running)
190 RWLOCK_EXIT(&ifs->ifs_ipf_global);
192 #define UNDO_HOOK(_f, _b, _e, _h) \
193 do { \
194 if (ifs->_f != NULL) { \
195 if (ifs->_b) { \
196 int tmp = net_hook_unregister(ifs->_f, \
197 _e, ifs->_h); \
198 ifs->_b = (tmp != 0 && tmp != ENXIO); \
199 if (!ifs->_b && ifs->_h != NULL) { \
200 hook_free(ifs->_h); \
201 ifs->_h = NULL; \
203 } else if (ifs->_h != NULL) { \
204 hook_free(ifs->_h); \
205 ifs->_h = NULL; \
208 _NOTE(CONSTCOND) \
209 } while (0)
212 * Remove IPv6 Hooks
214 if (ifs->ifs_ipf_ipv6 != NULL) {
215 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_in,
216 NH_PHYSICAL_IN, ifs_ipfhook6_in);
217 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_out,
218 NH_PHYSICAL_OUT, ifs_ipfhook6_out);
219 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_nic_events,
220 NH_NIC_EVENTS, ifs_ipfhook6_nicevents);
221 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_in,
222 NH_LOOPBACK_IN, ifs_ipfhook6_loop_in);
223 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_out,
224 NH_LOOPBACK_OUT, ifs_ipfhook6_loop_out);
226 if (net_protocol_release(ifs->ifs_ipf_ipv6) != 0)
227 goto detach_failed;
228 ifs->ifs_ipf_ipv6 = NULL;
232 * Remove IPv4 Hooks
234 if (ifs->ifs_ipf_ipv4 != NULL) {
235 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_in,
236 NH_PHYSICAL_IN, ifs_ipfhook4_in);
237 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_out,
238 NH_PHYSICAL_OUT, ifs_ipfhook4_out);
239 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_nic_events,
240 NH_NIC_EVENTS, ifs_ipfhook4_nicevents);
241 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_in,
242 NH_LOOPBACK_IN, ifs_ipfhook4_loop_in);
243 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_out,
244 NH_LOOPBACK_OUT, ifs_ipfhook4_loop_out);
246 if (net_protocol_release(ifs->ifs_ipf_ipv4) != 0)
247 goto detach_failed;
248 ifs->ifs_ipf_ipv4 = NULL;
251 #undef UNDO_HOOK
253 #ifdef IPFDEBUG
254 cmn_err(CE_CONT, "ipldetach()\n");
255 #endif
257 WRITE_ENTER(&ifs->ifs_ipf_global);
258 fr_deinitialise(ifs);
260 (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE|FR_INACTIVE, ifs);
261 (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE, ifs);
263 if (ifs->ifs_ipf_locks_done == 1) {
264 MUTEX_DESTROY(&ifs->ifs_ipf_timeoutlock);
265 MUTEX_DESTROY(&ifs->ifs_ipf_rw);
266 RW_DESTROY(&ifs->ifs_ipf_tokens);
267 RW_DESTROY(&ifs->ifs_ipf_ipidfrag);
268 ifs->ifs_ipf_locks_done = 0;
271 if (ifs->ifs_hook4_physical_in || ifs->ifs_hook4_physical_out ||
272 ifs->ifs_hook4_nic_events || ifs->ifs_hook4_loopback_in ||
273 ifs->ifs_hook4_loopback_out || ifs->ifs_hook6_nic_events ||
274 ifs->ifs_hook6_physical_in || ifs->ifs_hook6_physical_out ||
275 ifs->ifs_hook6_loopback_in || ifs->ifs_hook6_loopback_out)
276 return -1;
278 return 0;
280 detach_failed:
281 WRITE_ENTER(&ifs->ifs_ipf_global);
282 return -1;
285 int iplattach(ifs)
286 ipf_stack_t *ifs;
288 #if SOLARIS2 < 10
289 int i;
290 #endif
291 netid_t id = ifs->ifs_netid;
293 #ifdef IPFDEBUG
294 cmn_err(CE_CONT, "iplattach()\n");
295 #endif
297 ASSERT(RW_WRITE_HELD(&ifs->ifs_ipf_global.ipf_lk));
298 ifs->ifs_fr_flags = IPF_LOGGING;
299 #ifdef _KERNEL
300 ifs->ifs_fr_update_ipid = 0;
301 #else
302 ifs->ifs_fr_update_ipid = 1;
303 #endif
304 ifs->ifs_fr_minttl = 4;
305 ifs->ifs_fr_icmpminfragmtu = 68;
306 #if defined(IPFILTER_DEFAULT_BLOCK)
307 ifs->ifs_fr_pass = FR_BLOCK|FR_NOMATCH;
308 #else
309 ifs->ifs_fr_pass = (IPF_DEFAULT_PASS)|FR_NOMATCH;
310 #endif
312 bzero((char *)ifs->ifs_frcache, sizeof(ifs->ifs_frcache));
313 MUTEX_INIT(&ifs->ifs_ipf_rw, "ipf rw mutex");
314 MUTEX_INIT(&ifs->ifs_ipf_timeoutlock, "ipf timeout lock mutex");
315 RWLOCK_INIT(&ifs->ifs_ipf_ipidfrag, "ipf IP NAT-Frag rwlock");
316 RWLOCK_INIT(&ifs->ifs_ipf_tokens, "ipf token rwlock");
317 ifs->ifs_ipf_locks_done = 1;
319 if (fr_initialise(ifs) < 0)
320 return -1;
323 * For incoming packets, we want the GZ-controlled hooks to run before
324 * the per-zone hooks, regardless of what order they're are installed.
325 * See the "GZ-controlled and per-zone stacks" comment block at the top
326 * of this file.
328 #define HOOK_INIT_GZ_BEFORE(x, fn, n, gzn, a) \
329 HOOK_INIT(x, fn, ifs->ifs_gz_controlled ? gzn : n, ifs); \
330 (x)->h_hint = ifs->ifs_gz_controlled ? HH_BEFORE : HH_AFTER; \
331 (x)->h_hintvalue = (uintptr_t) (ifs->ifs_gz_controlled ? n : gzn);
333 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_nicevents, ipf_nic_event_v4,
334 hook4_nicevents, hook4_nicevents_gz, ifs);
335 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_in, ipf_hook4_in,
336 hook4_in, hook4_in_gz, ifs);
337 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_loop_in, ipf_hook4_loop_in,
338 hook4_loop_in, hook4_loop_in_gz, ifs);
341 * For outgoing packets, we want the GZ-controlled hooks to run after
342 * the per-zone hooks, regardless of what order they're are installed.
343 * See the "GZ-controlled and per-zone stacks" comment block at the top
344 * of this file.
346 #define HOOK_INIT_GZ_AFTER(x, fn, n, gzn, a) \
347 HOOK_INIT(x, fn, ifs->ifs_gz_controlled ? gzn : n, ifs); \
348 (x)->h_hint = ifs->ifs_gz_controlled ? HH_AFTER : HH_BEFORE; \
349 (x)->h_hintvalue = (uintptr_t) (ifs->ifs_gz_controlled ? n : gzn);
351 HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook4_out, ipf_hook4_out,
352 hook4_out, hook4_out_gz, ifs);
353 HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook4_loop_out, ipf_hook4_loop_out,
354 hook4_loop_out, hook4_loop_out_gz, ifs);
357 * If we hold this lock over all of the net_hook_register calls, we
358 * can cause a deadlock to occur with the following lock ordering:
359 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs
360 * R(hook_family)->R(hei_lock)->R(ipf_global) (packet path)
362 RWLOCK_EXIT(&ifs->ifs_ipf_global);
365 * Add IPv4 hooks
367 ifs->ifs_ipf_ipv4 = net_protocol_lookup(id, NHF_INET);
368 if (ifs->ifs_ipf_ipv4 == NULL)
369 goto hookup_failed;
371 ifs->ifs_hook4_nic_events = (net_hook_register(ifs->ifs_ipf_ipv4,
372 NH_NIC_EVENTS, ifs->ifs_ipfhook4_nicevents) == 0);
373 if (!ifs->ifs_hook4_nic_events)
374 goto hookup_failed;
376 ifs->ifs_hook4_physical_in = (net_hook_register(ifs->ifs_ipf_ipv4,
377 NH_PHYSICAL_IN, ifs->ifs_ipfhook4_in) == 0);
378 if (!ifs->ifs_hook4_physical_in)
379 goto hookup_failed;
381 ifs->ifs_hook4_physical_out = (net_hook_register(ifs->ifs_ipf_ipv4,
382 NH_PHYSICAL_OUT, ifs->ifs_ipfhook4_out) == 0);
383 if (!ifs->ifs_hook4_physical_out)
384 goto hookup_failed;
386 if (ifs->ifs_ipf_loopback) {
387 ifs->ifs_hook4_loopback_in = (net_hook_register(
388 ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN,
389 ifs->ifs_ipfhook4_loop_in) == 0);
390 if (!ifs->ifs_hook4_loopback_in)
391 goto hookup_failed;
393 ifs->ifs_hook4_loopback_out = (net_hook_register(
394 ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT,
395 ifs->ifs_ipfhook4_loop_out) == 0);
396 if (!ifs->ifs_hook4_loopback_out)
397 goto hookup_failed;
401 * Add IPv6 hooks
403 ifs->ifs_ipf_ipv6 = net_protocol_lookup(id, NHF_INET6);
404 if (ifs->ifs_ipf_ipv6 == NULL)
405 goto hookup_failed;
407 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_nicevents, ipf_nic_event_v6,
408 hook6_nicevents, hook6_nicevents_gz, ifs);
409 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_in, ipf_hook6_in,
410 hook6_in, hook6_in_gz, ifs);
411 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_loop_in, ipf_hook6_loop_in,
412 hook6_loop_in, hook6_loop_in_gz, ifs);
413 HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook6_out, ipf_hook6_out,
414 hook6_out, hook6_out_gz, ifs);
415 HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook6_loop_out, ipf_hook6_loop_out,
416 hook6_loop_out, hook6_loop_out_gz, ifs);
418 ifs->ifs_hook6_nic_events = (net_hook_register(ifs->ifs_ipf_ipv6,
419 NH_NIC_EVENTS, ifs->ifs_ipfhook6_nicevents) == 0);
420 if (!ifs->ifs_hook6_nic_events)
421 goto hookup_failed;
423 ifs->ifs_hook6_physical_in = (net_hook_register(ifs->ifs_ipf_ipv6,
424 NH_PHYSICAL_IN, ifs->ifs_ipfhook6_in) == 0);
425 if (!ifs->ifs_hook6_physical_in)
426 goto hookup_failed;
428 ifs->ifs_hook6_physical_out = (net_hook_register(ifs->ifs_ipf_ipv6,
429 NH_PHYSICAL_OUT, ifs->ifs_ipfhook6_out) == 0);
430 if (!ifs->ifs_hook6_physical_out)
431 goto hookup_failed;
433 if (ifs->ifs_ipf_loopback) {
434 ifs->ifs_hook6_loopback_in = (net_hook_register(
435 ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN,
436 ifs->ifs_ipfhook6_loop_in) == 0);
437 if (!ifs->ifs_hook6_loopback_in)
438 goto hookup_failed;
440 ifs->ifs_hook6_loopback_out = (net_hook_register(
441 ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT,
442 ifs->ifs_ipfhook6_loop_out) == 0);
443 if (!ifs->ifs_hook6_loopback_out)
444 goto hookup_failed;
448 * Reacquire ipf_global, now it is safe.
450 WRITE_ENTER(&ifs->ifs_ipf_global);
452 /* Do not use private interface ip_params_arr[] in Solaris 10 */
453 #if SOLARIS2 < 10
455 #if SOLARIS2 >= 8
456 ip_forwarding = &ip_g_forward;
457 #endif
459 * XXX - There is no terminator for this array, so it is not possible
460 * to tell if what we are looking for is missing and go off the end
461 * of the array.
464 #if SOLARIS2 <= 8
465 for (i = 0; ; i++) {
466 if (!strcmp(ip_param_arr[i].ip_param_name, "ip_def_ttl")) {
467 ip_ttl_ptr = &ip_param_arr[i].ip_param_value;
468 } else if (!strcmp(ip_param_arr[i].ip_param_name,
469 "ip_path_mtu_discovery")) {
470 ip_mtudisc = &ip_param_arr[i].ip_param_value;
472 #if SOLARIS2 < 8
473 else if (!strcmp(ip_param_arr[i].ip_param_name,
474 "ip_forwarding")) {
475 ip_forwarding = &ip_param_arr[i].ip_param_value;
477 #else
478 else if (!strcmp(ip_param_arr[i].ip_param_name,
479 "ip6_forwarding")) {
480 ip6_forwarding = &ip_param_arr[i].ip_param_value;
482 #endif
484 if (ip_mtudisc != NULL && ip_ttl_ptr != NULL &&
485 #if SOLARIS2 >= 8
486 ip6_forwarding != NULL &&
487 #endif
488 ip_forwarding != NULL)
489 break;
491 #endif
493 if (ifs->ifs_fr_control_forwarding & 1) {
494 if (ip_forwarding != NULL)
495 *ip_forwarding = 1;
496 #if SOLARIS2 >= 8
497 if (ip6_forwarding != NULL)
498 *ip6_forwarding = 1;
499 #endif
502 #endif
504 return 0;
505 hookup_failed:
506 WRITE_ENTER(&ifs->ifs_ipf_global);
507 return -1;
510 static int fr_setipfloopback(set, ifs)
511 int set;
512 ipf_stack_t *ifs;
514 if (ifs->ifs_ipf_ipv4 == NULL || ifs->ifs_ipf_ipv6 == NULL)
515 return EFAULT;
517 if (set && !ifs->ifs_ipf_loopback) {
518 ifs->ifs_ipf_loopback = 1;
520 ifs->ifs_hook4_loopback_in = (net_hook_register(
521 ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN,
522 ifs->ifs_ipfhook4_loop_in) == 0);
523 if (!ifs->ifs_hook4_loopback_in)
524 return EINVAL;
526 ifs->ifs_hook4_loopback_out = (net_hook_register(
527 ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT,
528 ifs->ifs_ipfhook4_loop_out) == 0);
529 if (!ifs->ifs_hook4_loopback_out)
530 return EINVAL;
532 ifs->ifs_hook6_loopback_in = (net_hook_register(
533 ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN,
534 ifs->ifs_ipfhook6_loop_in) == 0);
535 if (!ifs->ifs_hook6_loopback_in)
536 return EINVAL;
538 ifs->ifs_hook6_loopback_out = (net_hook_register(
539 ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT,
540 ifs->ifs_ipfhook6_loop_out) == 0);
541 if (!ifs->ifs_hook6_loopback_out)
542 return EINVAL;
544 } else if (!set && ifs->ifs_ipf_loopback) {
545 ifs->ifs_ipf_loopback = 0;
547 ifs->ifs_hook4_loopback_in =
548 (net_hook_unregister(ifs->ifs_ipf_ipv4,
549 NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0);
550 if (ifs->ifs_hook4_loopback_in)
551 return EBUSY;
553 ifs->ifs_hook4_loopback_out =
554 (net_hook_unregister(ifs->ifs_ipf_ipv4,
555 NH_LOOPBACK_OUT, ifs->ifs_ipfhook4_loop_out) != 0);
556 if (ifs->ifs_hook4_loopback_out)
557 return EBUSY;
559 ifs->ifs_hook6_loopback_in =
560 (net_hook_unregister(ifs->ifs_ipf_ipv6,
561 NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0);
562 if (ifs->ifs_hook6_loopback_in)
563 return EBUSY;
565 ifs->ifs_hook6_loopback_out =
566 (net_hook_unregister(ifs->ifs_ipf_ipv6,
567 NH_LOOPBACK_OUT, ifs->ifs_ipfhook6_loop_out) != 0);
568 if (ifs->ifs_hook6_loopback_out)
569 return EBUSY;
571 return 0;
576 * Filter ioctl interface.
578 /*ARGSUSED*/
579 int iplioctl(dev, cmd, data, mode, cp, rp)
580 dev_t dev;
581 int cmd;
582 #if SOLARIS2 >= 7
583 intptr_t data;
584 #else
585 int *data;
586 #endif
587 int mode;
588 cred_t *cp;
589 int *rp;
591 int error = 0, tmp;
592 friostat_t fio;
593 minor_t unit;
594 u_int enable;
595 ipf_stack_t *ifs;
596 zoneid_t zid;
597 ipf_devstate_t *isp;
599 #ifdef IPFDEBUG
600 cmn_err(CE_CONT, "iplioctl(%x,%x,%x,%d,%x,%d)\n",
601 dev, cmd, data, mode, cp, rp);
602 #endif
603 unit = getminor(dev);
605 isp = ddi_get_soft_state(ipf_state, unit);
606 if (isp == NULL)
607 return ENXIO;
608 unit = isp->ipfs_minor;
610 zid = crgetzoneid(cp);
611 if (cmd == SIOCIPFZONESET) {
612 if (zid == GLOBAL_ZONEID)
613 return fr_setzoneid(isp, (caddr_t) data);
614 return EACCES;
618 * ipf_find_stack returns with a read lock on ifs_ipf_global
620 ifs = ipf_find_stack(zid, isp);
621 if (ifs == NULL)
622 return ENXIO;
624 if (ifs->ifs_fr_running <= 0) {
625 if (unit != IPL_LOGIPF) {
626 RWLOCK_EXIT(&ifs->ifs_ipf_global);
627 return EIO;
629 if (cmd != SIOCIPFGETNEXT && cmd != SIOCIPFGET &&
630 cmd != SIOCIPFSET && cmd != SIOCFRENB &&
631 cmd != SIOCGETFS && cmd != SIOCGETFF) {
632 RWLOCK_EXIT(&ifs->ifs_ipf_global);
633 return EIO;
637 if (ifs->ifs_fr_enable_active != 0) {
638 RWLOCK_EXIT(&ifs->ifs_ipf_global);
639 return EBUSY;
642 error = fr_ioctlswitch(unit, (caddr_t)data, cmd, mode, crgetuid(cp),
643 curproc, ifs);
644 if (error != -1) {
645 RWLOCK_EXIT(&ifs->ifs_ipf_global);
646 return error;
648 error = 0;
650 switch (cmd)
652 case SIOCFRENB :
653 if (!(mode & FWRITE))
654 error = EPERM;
655 else {
656 error = COPYIN((caddr_t)data, (caddr_t)&enable,
657 sizeof(enable));
658 if (error != 0) {
659 error = EFAULT;
660 break;
663 RWLOCK_EXIT(&ifs->ifs_ipf_global);
664 WRITE_ENTER(&ifs->ifs_ipf_global);
667 * We must recheck fr_enable_active here, since we've
668 * dropped ifs_ipf_global from R in order to get it
669 * exclusively.
671 if (ifs->ifs_fr_enable_active == 0) {
672 ifs->ifs_fr_enable_active = 1;
673 error = fr_enableipf(ifs, enable);
674 ifs->ifs_fr_enable_active = 0;
677 break;
678 case SIOCIPFSET :
679 if (!(mode & FWRITE)) {
680 error = EPERM;
681 break;
683 /* FALLTHRU */
684 case SIOCIPFGETNEXT :
685 case SIOCIPFGET :
686 error = fr_ipftune(cmd, (void *)data, ifs);
687 break;
688 case SIOCSETFF :
689 if (!(mode & FWRITE))
690 error = EPERM;
691 else {
692 error = COPYIN((caddr_t)data,
693 (caddr_t)&ifs->ifs_fr_flags,
694 sizeof(ifs->ifs_fr_flags));
695 if (error != 0)
696 error = EFAULT;
698 break;
699 case SIOCIPFLP :
700 error = COPYIN((caddr_t)data, (caddr_t)&tmp,
701 sizeof(tmp));
702 if (error != 0)
703 error = EFAULT;
704 else
705 error = fr_setipfloopback(tmp, ifs);
706 break;
707 case SIOCGETFF :
708 error = COPYOUT((caddr_t)&ifs->ifs_fr_flags, (caddr_t)data,
709 sizeof(ifs->ifs_fr_flags));
710 if (error != 0)
711 error = EFAULT;
712 break;
713 case SIOCFUNCL :
714 error = fr_resolvefunc((void *)data);
715 break;
716 case SIOCINAFR :
717 case SIOCRMAFR :
718 case SIOCADAFR :
719 case SIOCZRLST :
720 if (!(mode & FWRITE))
721 error = EPERM;
722 else
723 error = frrequest(unit, cmd, (caddr_t)data,
724 ifs->ifs_fr_active, 1, ifs);
725 break;
726 case SIOCINIFR :
727 case SIOCRMIFR :
728 case SIOCADIFR :
729 if (!(mode & FWRITE))
730 error = EPERM;
731 else
732 error = frrequest(unit, cmd, (caddr_t)data,
733 1 - ifs->ifs_fr_active, 1, ifs);
734 break;
735 case SIOCSWAPA :
736 if (!(mode & FWRITE))
737 error = EPERM;
738 else {
739 WRITE_ENTER(&ifs->ifs_ipf_mutex);
740 bzero((char *)ifs->ifs_frcache,
741 sizeof (ifs->ifs_frcache));
742 error = COPYOUT((caddr_t)&ifs->ifs_fr_active,
743 (caddr_t)data,
744 sizeof(ifs->ifs_fr_active));
745 if (error != 0)
746 error = EFAULT;
747 else
748 ifs->ifs_fr_active = 1 - ifs->ifs_fr_active;
749 RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
751 break;
752 case SIOCGETFS :
753 fr_getstat(&fio, ifs);
754 error = fr_outobj((void *)data, &fio, IPFOBJ_IPFSTAT);
755 break;
756 case SIOCFRZST :
757 if (!(mode & FWRITE))
758 error = EPERM;
759 else
760 error = fr_zerostats((caddr_t)data, ifs);
761 break;
762 case SIOCIPFFL :
763 if (!(mode & FWRITE))
764 error = EPERM;
765 else {
766 error = COPYIN((caddr_t)data, (caddr_t)&tmp,
767 sizeof(tmp));
768 if (!error) {
769 tmp = frflush(unit, 4, tmp, ifs);
770 error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
771 sizeof(tmp));
772 if (error != 0)
773 error = EFAULT;
774 } else
775 error = EFAULT;
777 break;
778 #ifdef USE_INET6
779 case SIOCIPFL6 :
780 if (!(mode & FWRITE))
781 error = EPERM;
782 else {
783 error = COPYIN((caddr_t)data, (caddr_t)&tmp,
784 sizeof(tmp));
785 if (!error) {
786 tmp = frflush(unit, 6, tmp, ifs);
787 error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
788 sizeof(tmp));
789 if (error != 0)
790 error = EFAULT;
791 } else
792 error = EFAULT;
794 break;
795 #endif
796 case SIOCSTLCK :
797 error = COPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
798 if (error == 0) {
799 ifs->ifs_fr_state_lock = tmp;
800 ifs->ifs_fr_nat_lock = tmp;
801 ifs->ifs_fr_frag_lock = tmp;
802 ifs->ifs_fr_auth_lock = tmp;
803 } else
804 error = EFAULT;
805 break;
806 #ifdef IPFILTER_LOG
807 case SIOCIPFFB :
808 if (!(mode & FWRITE))
809 error = EPERM;
810 else {
811 tmp = ipflog_clear(unit, ifs);
812 error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
813 sizeof(tmp));
814 if (error)
815 error = EFAULT;
817 break;
818 #endif /* IPFILTER_LOG */
819 case SIOCFRSYN :
820 if (!(mode & FWRITE))
821 error = EPERM;
822 else {
823 RWLOCK_EXIT(&ifs->ifs_ipf_global);
824 WRITE_ENTER(&ifs->ifs_ipf_global);
826 frsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
827 fr_natifpsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
828 fr_nataddrsync(0, NULL, NULL, ifs);
829 fr_statesync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
830 error = 0;
832 break;
833 case SIOCGFRST :
834 error = fr_outobj((void *)data, fr_fragstats(ifs),
835 IPFOBJ_FRAGSTAT);
836 break;
837 case FIONREAD :
838 #ifdef IPFILTER_LOG
839 tmp = (int)ifs->ifs_iplused[IPL_LOGIPF];
841 error = COPYOUT((caddr_t)&tmp, (caddr_t)data, sizeof(tmp));
842 if (error != 0)
843 error = EFAULT;
844 #endif
845 break;
846 case SIOCIPFITER :
847 error = ipf_frruleiter((caddr_t)data, crgetuid(cp),
848 curproc, ifs);
849 break;
851 case SIOCGENITER :
852 error = ipf_genericiter((caddr_t)data, crgetuid(cp),
853 curproc, ifs);
854 break;
856 case SIOCIPFDELTOK :
857 error = BCOPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
858 if (error != 0) {
859 error = EFAULT;
860 } else {
861 error = ipf_deltoken(tmp, crgetuid(cp), curproc, ifs);
863 break;
865 default :
866 #ifdef IPFDEBUG
867 cmn_err(CE_NOTE, "Unknown: cmd 0x%x data %p",
868 cmd, (void *)data);
869 #endif
870 error = EINVAL;
871 break;
873 RWLOCK_EXIT(&ifs->ifs_ipf_global);
874 return error;
878 static int fr_enableipf(ifs, enable)
879 ipf_stack_t *ifs;
880 int enable;
882 int error;
884 if (!enable) {
885 error = ipldetach(ifs);
886 if (error == 0)
887 ifs->ifs_fr_running = -1;
888 return error;
891 if (ifs->ifs_fr_running > 0)
892 return 0;
894 error = iplattach(ifs);
895 if (error == 0) {
896 if (ifs->ifs_fr_timer_id == NULL) {
897 int hz = drv_usectohz(500000);
899 ifs->ifs_fr_timer_id = timeout(fr_slowtimer,
900 (void *)ifs,
901 hz);
903 ifs->ifs_fr_running = 1;
904 } else {
905 (void) ipldetach(ifs);
907 return error;
911 phy_if_t get_unit(name, v, ifs)
912 char *name;
913 int v;
914 ipf_stack_t *ifs;
916 net_handle_t nif;
918 if (v == 4)
919 nif = ifs->ifs_ipf_ipv4;
920 else if (v == 6)
921 nif = ifs->ifs_ipf_ipv6;
922 else
923 return 0;
925 return (net_phylookup(nif, name));
929 * routines below for saving IP headers to buffer
931 /*ARGSUSED*/
932 int iplopen(devp, flags, otype, cred)
933 dev_t *devp;
934 int flags, otype;
935 cred_t *cred;
937 ipf_devstate_t *isp;
938 minor_t min = getminor(*devp);
939 minor_t minor;
941 #ifdef IPFDEBUG
942 cmn_err(CE_CONT, "iplopen(%x,%x,%x,%x)\n", devp, flags, otype, cred);
943 #endif
944 if (!(otype & OTYP_CHR))
945 return ENXIO;
947 if (IPL_LOGMAX < min)
948 return ENXIO;
950 minor = (minor_t)(uintptr_t)vmem_alloc(ipf_minor, 1,
951 VM_BESTFIT | VM_SLEEP);
953 if (ddi_soft_state_zalloc(ipf_state, minor) != 0) {
954 vmem_free(ipf_minor, (void *)(uintptr_t)minor, 1);
955 return ENXIO;
958 *devp = makedevice(getmajor(*devp), minor);
959 isp = ddi_get_soft_state(ipf_state, minor);
960 VERIFY(isp != NULL);
962 isp->ipfs_minor = min;
963 isp->ipfs_zoneid = IPFS_ZONE_UNSET;
965 return 0;
969 /*ARGSUSED*/
970 int iplclose(dev, flags, otype, cred)
971 dev_t dev;
972 int flags, otype;
973 cred_t *cred;
975 minor_t min = getminor(dev);
977 #ifdef IPFDEBUG
978 cmn_err(CE_CONT, "iplclose(%x,%x,%x,%x)\n", dev, flags, otype, cred);
979 #endif
981 if (IPL_LOGMAX < min)
982 return ENXIO;
984 ddi_soft_state_free(ipf_state, min);
985 vmem_free(ipf_minor, (void *)(uintptr_t)min, 1);
987 return 0;
990 #ifdef IPFILTER_LOG
992 * iplread/ipllog
993 * both of these must operate with at least splnet() lest they be
994 * called during packet processing and cause an inconsistancy to appear in
995 * the filter lists.
997 /*ARGSUSED*/
998 int iplread(dev, uio, cp)
999 dev_t dev;
1000 register struct uio *uio;
1001 cred_t *cp;
1003 ipf_stack_t *ifs;
1004 int ret;
1005 minor_t unit;
1006 ipf_devstate_t *isp;
1008 unit = getminor(dev);
1009 isp = ddi_get_soft_state(ipf_state, unit);
1010 if (isp == NULL)
1011 return ENXIO;
1012 unit = isp->ipfs_minor;
1016 * ipf_find_stack returns with a read lock on ifs_ipf_global
1018 ifs = ipf_find_stack(crgetzoneid(cp), isp);
1019 if (ifs == NULL)
1020 return ENXIO;
1022 # ifdef IPFDEBUG
1023 cmn_err(CE_CONT, "iplread(%x,%x,%x)\n", dev, uio, cp);
1024 # endif
1026 if (ifs->ifs_fr_running < 1) {
1027 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1028 return EIO;
1031 # ifdef IPFILTER_SYNC
1032 if (unit == IPL_LOGSYNC) {
1033 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1034 return ipfsync_read(uio);
1036 # endif
1038 ret = ipflog_read(unit, uio, ifs);
1039 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1040 return ret;
1042 #endif /* IPFILTER_LOG */
1046 * iplread/ipllog
1047 * both of these must operate with at least splnet() lest they be
1048 * called during packet processing and cause an inconsistancy to appear in
1049 * the filter lists.
1051 int iplwrite(dev, uio, cp)
1052 dev_t dev;
1053 register struct uio *uio;
1054 cred_t *cp;
1056 ipf_stack_t *ifs;
1057 minor_t unit;
1058 ipf_devstate_t *isp;
1060 unit = getminor(dev);
1061 isp = ddi_get_soft_state(ipf_state, unit);
1062 if (isp == NULL)
1063 return ENXIO;
1064 unit = isp->ipfs_minor;
1067 * ipf_find_stack returns with a read lock on ifs_ipf_global
1069 ifs = ipf_find_stack(crgetzoneid(cp), isp);
1070 if (ifs == NULL)
1071 return ENXIO;
1073 #ifdef IPFDEBUG
1074 cmn_err(CE_CONT, "iplwrite(%x,%x,%x)\n", dev, uio, cp);
1075 #endif
1077 if (ifs->ifs_fr_running < 1) {
1078 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1079 return EIO;
1082 #ifdef IPFILTER_SYNC
1083 if (getminor(dev) == IPL_LOGSYNC) {
1084 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1085 return ipfsync_write(uio);
1087 #endif /* IPFILTER_SYNC */
1088 dev = dev; /* LINT */
1089 uio = uio; /* LINT */
1090 cp = cp; /* LINT */
1091 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1092 return ENXIO;
1097 * fr_send_reset - this could conceivably be a call to tcp_respond(), but that
1098 * requires a large amount of setting up and isn't any more efficient.
1100 int fr_send_reset(fin)
1101 fr_info_t *fin;
1103 tcphdr_t *tcp, *tcp2;
1104 int tlen, hlen;
1105 mblk_t *m;
1106 #ifdef USE_INET6
1107 ip6_t *ip6;
1108 #endif
1109 ip_t *ip;
1111 tcp = fin->fin_dp;
1112 if (tcp->th_flags & TH_RST)
1113 return -1;
1115 #ifndef IPFILTER_CKSUM
1116 if (fr_checkl4sum(fin) == -1)
1117 return -1;
1118 #endif
1120 tlen = (tcp->th_flags & (TH_SYN|TH_FIN)) ? 1 : 0;
1121 #ifdef USE_INET6
1122 if (fin->fin_v == 6)
1123 hlen = sizeof(ip6_t);
1124 else
1125 #endif
1126 hlen = sizeof(ip_t);
1127 hlen += sizeof(*tcp2);
1128 if ((m = (mblk_t *)allocb(hlen + 64, BPRI_HI)) == NULL)
1129 return -1;
1131 m->b_rptr += 64;
1132 MTYPE(m) = M_DATA;
1133 m->b_wptr = m->b_rptr + hlen;
1134 ip = (ip_t *)m->b_rptr;
1135 bzero((char *)ip, hlen);
1136 tcp2 = (struct tcphdr *)(m->b_rptr + hlen - sizeof(*tcp2));
1137 tcp2->th_dport = tcp->th_sport;
1138 tcp2->th_sport = tcp->th_dport;
1139 if (tcp->th_flags & TH_ACK) {
1140 tcp2->th_seq = tcp->th_ack;
1141 tcp2->th_flags = TH_RST;
1142 } else {
1143 tcp2->th_ack = ntohl(tcp->th_seq);
1144 tcp2->th_ack += tlen;
1145 tcp2->th_ack = htonl(tcp2->th_ack);
1146 tcp2->th_flags = TH_RST|TH_ACK;
1148 tcp2->th_off = sizeof(struct tcphdr) >> 2;
1150 ip->ip_v = fin->fin_v;
1151 #ifdef USE_INET6
1152 if (fin->fin_v == 6) {
1153 ip6 = (ip6_t *)m->b_rptr;
1154 ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
1155 ip6->ip6_src = fin->fin_dst6.in6;
1156 ip6->ip6_dst = fin->fin_src6.in6;
1157 ip6->ip6_plen = htons(sizeof(*tcp));
1158 ip6->ip6_nxt = IPPROTO_TCP;
1159 tcp2->th_sum = fr_cksum(m, (ip_t *)ip6, IPPROTO_TCP, tcp2);
1160 } else
1161 #endif
1163 ip->ip_src.s_addr = fin->fin_daddr;
1164 ip->ip_dst.s_addr = fin->fin_saddr;
1165 ip->ip_id = fr_nextipid(fin);
1166 ip->ip_hl = sizeof(*ip) >> 2;
1167 ip->ip_p = IPPROTO_TCP;
1168 ip->ip_len = sizeof(*ip) + sizeof(*tcp);
1169 ip->ip_tos = fin->fin_ip->ip_tos;
1170 tcp2->th_sum = fr_cksum(m, ip, IPPROTO_TCP, tcp2);
1172 return fr_send_ip(fin, m, &m);
1176 * Function: fr_send_ip
1177 * Returns: 0: success
1178 * -1: failed
1179 * Parameters:
1180 * fin: packet information
1181 * m: the message block where ip head starts
1183 * Send a new packet through the IP stack.
1185 * For IPv4 packets, ip_len must be in host byte order, and ip_v,
1186 * ip_ttl, ip_off, and ip_sum are ignored (filled in by this
1187 * function).
1189 * For IPv6 packets, ip6_flow, ip6_vfc, and ip6_hlim are filled
1190 * in by this function.
1192 * All other portions of the packet must be in on-the-wire format.
1194 /*ARGSUSED*/
1195 static int fr_send_ip(fin, m, mpp)
1196 fr_info_t *fin;
1197 mblk_t *m, **mpp;
1199 qpktinfo_t qpi, *qpip;
1200 fr_info_t fnew;
1201 ip_t *ip;
1202 int i, hlen;
1203 ipf_stack_t *ifs = fin->fin_ifs;
1205 ip = (ip_t *)m->b_rptr;
1206 bzero((char *)&fnew, sizeof(fnew));
1208 #ifdef USE_INET6
1209 if (fin->fin_v == 6) {
1210 ip6_t *ip6;
1212 ip6 = (ip6_t *)ip;
1213 ip6->ip6_vfc = 0x60;
1214 ip6->ip6_hlim = 127;
1215 fnew.fin_v = 6;
1216 hlen = sizeof(*ip6);
1217 fnew.fin_plen = ntohs(ip6->ip6_plen) + hlen;
1218 } else
1219 #endif
1221 fnew.fin_v = 4;
1222 #if SOLARIS2 >= 10
1223 ip->ip_ttl = 255;
1224 if (net_getpmtuenabled(ifs->ifs_ipf_ipv4) == 1)
1225 ip->ip_off = htons(IP_DF);
1226 #else
1227 if (ip_ttl_ptr != NULL)
1228 ip->ip_ttl = (u_char)(*ip_ttl_ptr);
1229 else
1230 ip->ip_ttl = 63;
1231 if (ip_mtudisc != NULL)
1232 ip->ip_off = htons(*ip_mtudisc ? IP_DF : 0);
1233 else
1234 ip->ip_off = htons(IP_DF);
1235 #endif
1237 * The dance with byte order and ip_len/ip_off is because in
1238 * fr_fastroute, it expects them to be in host byte order but
1239 * ipf_cksum expects them to be in network byte order.
1241 ip->ip_len = htons(ip->ip_len);
1242 ip->ip_sum = ipf_cksum((u_short *)ip, sizeof(*ip));
1243 ip->ip_len = ntohs(ip->ip_len);
1244 ip->ip_off = ntohs(ip->ip_off);
1245 hlen = sizeof(*ip);
1246 fnew.fin_plen = ip->ip_len;
1249 qpip = fin->fin_qpi;
1250 qpi.qpi_off = 0;
1251 qpi.qpi_ill = qpip->qpi_ill;
1252 qpi.qpi_m = m;
1253 qpi.qpi_data = ip;
1254 fnew.fin_qpi = &qpi;
1255 fnew.fin_ifp = fin->fin_ifp;
1256 fnew.fin_flx = FI_NOCKSUM;
1257 fnew.fin_m = m;
1258 fnew.fin_qfm = m;
1259 fnew.fin_ip = ip;
1260 fnew.fin_mp = mpp;
1261 fnew.fin_hlen = hlen;
1262 fnew.fin_dp = (char *)ip + hlen;
1263 fnew.fin_ifs = fin->fin_ifs;
1264 (void) fr_makefrip(hlen, ip, &fnew);
1266 i = fr_fastroute(m, mpp, &fnew, NULL);
1267 return i;
1271 int fr_send_icmp_err(type, fin, dst)
1272 int type;
1273 fr_info_t *fin;
1274 int dst;
1276 struct in_addr dst4;
1277 struct icmp *icmp;
1278 qpktinfo_t *qpi;
1279 int hlen, code;
1280 phy_if_t phy;
1281 u_short sz;
1282 #ifdef USE_INET6
1283 mblk_t *mb;
1284 #endif
1285 mblk_t *m;
1286 #ifdef USE_INET6
1287 ip6_t *ip6;
1288 #endif
1289 ip_t *ip;
1290 ipf_stack_t *ifs = fin->fin_ifs;
1292 if ((type < 0) || (type > ICMP_MAXTYPE))
1293 return -1;
1295 code = fin->fin_icode;
1296 #ifdef USE_INET6
1297 if ((code < 0) || (code >= ICMP_MAX_UNREACH))
1298 return -1;
1299 #endif
1301 #ifndef IPFILTER_CKSUM
1302 if (fr_checkl4sum(fin) == -1)
1303 return -1;
1304 #endif
1306 qpi = fin->fin_qpi;
1308 #ifdef USE_INET6
1309 mb = fin->fin_qfm;
1311 if (fin->fin_v == 6) {
1312 sz = sizeof(ip6_t);
1313 sz += MIN(mb->b_wptr - mb->b_rptr, 512);
1314 hlen = sizeof(ip6_t);
1315 type = icmptoicmp6types[type];
1316 if (type == ICMP6_DST_UNREACH)
1317 code = icmptoicmp6unreach[code];
1318 } else
1319 #endif
1321 if ((fin->fin_p == IPPROTO_ICMP) &&
1322 !(fin->fin_flx & FI_SHORT))
1323 switch (ntohs(fin->fin_data[0]) >> 8)
1325 case ICMP_ECHO :
1326 case ICMP_TSTAMP :
1327 case ICMP_IREQ :
1328 case ICMP_MASKREQ :
1329 break;
1330 default :
1331 return 0;
1334 sz = sizeof(ip_t) * 2;
1335 sz += 8; /* 64 bits of data */
1336 hlen = sizeof(ip_t);
1339 sz += offsetof(struct icmp, icmp_ip);
1340 if ((m = (mblk_t *)allocb((size_t)sz + 64, BPRI_HI)) == NULL)
1341 return -1;
1342 MTYPE(m) = M_DATA;
1343 m->b_rptr += 64;
1344 m->b_wptr = m->b_rptr + sz;
1345 bzero((char *)m->b_rptr, (size_t)sz);
1346 ip = (ip_t *)m->b_rptr;
1347 ip->ip_v = fin->fin_v;
1348 icmp = (struct icmp *)(m->b_rptr + hlen);
1349 icmp->icmp_type = type & 0xff;
1350 icmp->icmp_code = code & 0xff;
1351 phy = (phy_if_t)qpi->qpi_ill;
1352 if (type == ICMP_UNREACH && (phy != 0) &&
1353 fin->fin_icode == ICMP_UNREACH_NEEDFRAG)
1354 icmp->icmp_nextmtu = net_getmtu(ifs->ifs_ipf_ipv4, phy,0 );
1356 #ifdef USE_INET6
1357 if (fin->fin_v == 6) {
1358 struct in6_addr dst6;
1359 int csz;
1361 if (dst == 0) {
1362 ipf_stack_t *ifs = fin->fin_ifs;
1364 if (fr_ifpaddr(6, FRI_NORMAL, (void *)phy,
1365 (void *)&dst6, NULL, ifs) == -1) {
1366 FREE_MB_T(m);
1367 return -1;
1369 } else
1370 dst6 = fin->fin_dst6.in6;
1372 csz = sz;
1373 sz -= sizeof(ip6_t);
1374 ip6 = (ip6_t *)m->b_rptr;
1375 ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
1376 ip6->ip6_plen = htons((u_short)sz);
1377 ip6->ip6_nxt = IPPROTO_ICMPV6;
1378 ip6->ip6_src = dst6;
1379 ip6->ip6_dst = fin->fin_src6.in6;
1380 sz -= offsetof(struct icmp, icmp_ip);
1381 bcopy((char *)mb->b_rptr, (char *)&icmp->icmp_ip, sz);
1382 icmp->icmp_cksum = csz - sizeof(ip6_t);
1383 } else
1384 #endif
1386 ip->ip_hl = sizeof(*ip) >> 2;
1387 ip->ip_p = IPPROTO_ICMP;
1388 ip->ip_id = fin->fin_ip->ip_id;
1389 ip->ip_tos = fin->fin_ip->ip_tos;
1390 ip->ip_len = (u_short)sz;
1391 if (dst == 0) {
1392 ipf_stack_t *ifs = fin->fin_ifs;
1394 if (fr_ifpaddr(4, FRI_NORMAL, (void *)phy,
1395 (void *)&dst4, NULL, ifs) == -1) {
1396 FREE_MB_T(m);
1397 return -1;
1399 } else {
1400 dst4 = fin->fin_dst;
1402 ip->ip_src = dst4;
1403 ip->ip_dst = fin->fin_src;
1404 bcopy((char *)fin->fin_ip, (char *)&icmp->icmp_ip,
1405 sizeof(*fin->fin_ip));
1406 bcopy((char *)fin->fin_ip + fin->fin_hlen,
1407 (char *)&icmp->icmp_ip + sizeof(*fin->fin_ip), 8);
1408 icmp->icmp_ip.ip_len = htons(icmp->icmp_ip.ip_len);
1409 icmp->icmp_ip.ip_off = htons(icmp->icmp_ip.ip_off);
1410 icmp->icmp_cksum = ipf_cksum((u_short *)icmp,
1411 sz - sizeof(ip_t));
1415 * Need to exit out of these so we don't recursively call rw_enter
1416 * from fr_qout.
1418 return fr_send_ip(fin, m, &m);
1421 #include <sys/time.h>
1422 #include <sys/varargs.h>
1424 #ifndef _KERNEL
1425 #include <stdio.h>
1426 #endif
1429 * Return the first IP Address associated with an interface
1430 * For IPv6, we walk through the list of logical interfaces and return
1431 * the address of the first one that isn't a link-local interface.
1432 * We can't assume that it is :1 because another link-local address
1433 * may have been assigned there.
1435 /*ARGSUSED*/
1436 int fr_ifpaddr(v, atype, ifptr, inp, inpmask, ifs)
1437 int v, atype;
1438 void *ifptr;
1439 struct in_addr *inp, *inpmask;
1440 ipf_stack_t *ifs;
1442 struct sockaddr_in6 v6addr[2];
1443 struct sockaddr_in v4addr[2];
1444 net_ifaddr_t type[2];
1445 net_handle_t net_data;
1446 phy_if_t phyif;
1447 void *array;
1449 switch (v)
1451 case 4:
1452 net_data = ifs->ifs_ipf_ipv4;
1453 array = v4addr;
1454 break;
1455 case 6:
1456 net_data = ifs->ifs_ipf_ipv6;
1457 array = v6addr;
1458 break;
1459 default:
1460 net_data = NULL;
1461 break;
1464 if (net_data == NULL)
1465 return -1;
1467 phyif = (phy_if_t)ifptr;
1469 switch (atype)
1471 case FRI_PEERADDR :
1472 type[0] = NA_PEER;
1473 break;
1475 case FRI_BROADCAST :
1476 type[0] = NA_BROADCAST;
1477 break;
1479 default :
1480 type[0] = NA_ADDRESS;
1481 break;
1484 type[1] = NA_NETMASK;
1486 if (v == 6) {
1487 lif_if_t idx = 0;
1489 do {
1490 idx = net_lifgetnext(net_data, phyif, idx);
1491 if (net_getlifaddr(net_data, phyif, idx, 2, type,
1492 array) < 0)
1493 return -1;
1494 if (!IN6_IS_ADDR_LINKLOCAL(&v6addr[0].sin6_addr) &&
1495 !IN6_IS_ADDR_MULTICAST(&v6addr[0].sin6_addr))
1496 break;
1497 } while (idx != 0);
1499 if (idx == 0)
1500 return -1;
1502 return fr_ifpfillv6addr(atype, &v6addr[0], &v6addr[1],
1503 inp, inpmask);
1506 if (net_getlifaddr(net_data, phyif, 0, 2, type, array) < 0)
1507 return -1;
1509 return fr_ifpfillv4addr(atype, &v4addr[0], &v4addr[1], inp, inpmask);
1513 u_32_t fr_newisn(fin)
1514 fr_info_t *fin;
1516 static int iss_seq_off = 0;
1517 u_char hash[16];
1518 u_32_t newiss;
1519 MD5_CTX ctx;
1520 ipf_stack_t *ifs = fin->fin_ifs;
1523 * Compute the base value of the ISS. It is a hash
1524 * of (saddr, sport, daddr, dport, secret).
1526 MD5Init(&ctx);
1528 MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_src,
1529 sizeof(fin->fin_fi.fi_src));
1530 MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_dst,
1531 sizeof(fin->fin_fi.fi_dst));
1532 MD5Update(&ctx, (u_char *) &fin->fin_dat, sizeof(fin->fin_dat));
1534 MD5Update(&ctx, ifs->ifs_ipf_iss_secret, sizeof(ifs->ifs_ipf_iss_secret));
1536 MD5Final(hash, &ctx);
1538 bcopy(hash, &newiss, sizeof(newiss));
1541 * Now increment our "timer", and add it in to
1542 * the computed value.
1544 * XXX Use `addin'?
1545 * XXX TCP_ISSINCR too large to use?
1547 iss_seq_off += 0x00010000;
1548 newiss += iss_seq_off;
1549 return newiss;
1553 /* ------------------------------------------------------------------------ */
1554 /* Function: fr_nextipid */
1555 /* Returns: int - 0 == success, -1 == error (packet should be droppped) */
1556 /* Parameters: fin(I) - pointer to packet information */
1557 /* */
1558 /* Returns the next IPv4 ID to use for this packet. */
1559 /* ------------------------------------------------------------------------ */
1560 u_short fr_nextipid(fin)
1561 fr_info_t *fin;
1563 static u_short ipid = 0;
1564 u_short id;
1565 ipf_stack_t *ifs = fin->fin_ifs;
1567 MUTEX_ENTER(&ifs->ifs_ipf_rw);
1568 if (fin->fin_pktnum != 0) {
1569 id = fin->fin_pktnum & 0xffff;
1570 } else {
1571 id = ipid++;
1573 MUTEX_EXIT(&ifs->ifs_ipf_rw);
1575 return id;
1579 #ifndef IPFILTER_CKSUM
1580 /* ARGSUSED */
1581 #endif
1582 INLINE void fr_checkv4sum(fin)
1583 fr_info_t *fin;
1585 #ifdef IPFILTER_CKSUM
1586 if (fr_checkl4sum(fin) == -1)
1587 fin->fin_flx |= FI_BAD;
1588 #endif
1592 #ifdef USE_INET6
1593 # ifndef IPFILTER_CKSUM
1594 /* ARGSUSED */
1595 # endif
1596 INLINE void fr_checkv6sum(fin)
1597 fr_info_t *fin;
1599 # ifdef IPFILTER_CKSUM
1600 if (fr_checkl4sum(fin) == -1)
1601 fin->fin_flx |= FI_BAD;
1602 # endif
1604 #endif /* USE_INET6 */
1607 #if (SOLARIS2 < 7)
1608 void fr_slowtimer()
1609 #else
1610 /*ARGSUSED*/
1611 void fr_slowtimer __P((void *arg))
1612 #endif
1614 ipf_stack_t *ifs = arg;
1616 READ_ENTER(&ifs->ifs_ipf_global);
1617 if (ifs->ifs_fr_running != 1) {
1618 ifs->ifs_fr_timer_id = NULL;
1619 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1620 return;
1622 ipf_expiretokens(ifs);
1623 fr_fragexpire(ifs);
1624 fr_timeoutstate(ifs);
1625 fr_natexpire(ifs);
1626 fr_authexpire(ifs);
1627 ifs->ifs_fr_ticks++;
1628 if (ifs->ifs_fr_running == 1)
1629 ifs->ifs_fr_timer_id = timeout(fr_slowtimer, arg,
1630 drv_usectohz(500000));
1631 else
1632 ifs->ifs_fr_timer_id = NULL;
1633 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1637 /* ------------------------------------------------------------------------ */
1638 /* Function: fr_pullup */
1639 /* Returns: NULL == pullup failed, else pointer to protocol header */
1640 /* Parameters: m(I) - pointer to buffer where data packet starts */
1641 /* fin(I) - pointer to packet information */
1642 /* len(I) - number of bytes to pullup */
1643 /* */
1644 /* Attempt to move at least len bytes (from the start of the buffer) into a */
1645 /* single buffer for ease of access. Operating system native functions are */
1646 /* used to manage buffers - if necessary. If the entire packet ends up in */
1647 /* a single buffer, set the FI_COALESCE flag even though fr_coalesce() has */
1648 /* not been called. Both fin_ip and fin_dp are updated before exiting _IF_ */
1649 /* and ONLY if the pullup succeeds. */
1650 /* */
1651 /* We assume that 'min' is a pointer to a buffer that is part of the chain */
1652 /* of buffers that starts at *fin->fin_mp. */
1653 /* ------------------------------------------------------------------------ */
1654 void *fr_pullup(min, fin, len)
1655 mb_t *min;
1656 fr_info_t *fin;
1657 int len;
1659 qpktinfo_t *qpi = fin->fin_qpi;
1660 int out = fin->fin_out, dpoff, ipoff;
1661 mb_t *m = min, *m1, *m2;
1662 char *ip;
1663 uint32_t start, stuff, end, value, flags;
1664 ipf_stack_t *ifs = fin->fin_ifs;
1666 if (m == NULL)
1667 return NULL;
1669 ip = (char *)fin->fin_ip;
1670 if ((fin->fin_flx & FI_COALESCE) != 0)
1671 return ip;
1673 ipoff = fin->fin_ipoff;
1674 if (fin->fin_dp != NULL)
1675 dpoff = (char *)fin->fin_dp - (char *)ip;
1676 else
1677 dpoff = 0;
1679 if (M_LEN(m) < len + ipoff) {
1682 * pfil_precheck ensures the IP header is on a 32bit
1683 * aligned address so simply fail if that isn't currently
1684 * the case (should never happen).
1686 int inc = 0;
1688 if (ipoff > 0) {
1689 if ((ipoff & 3) != 0) {
1690 inc = 4 - (ipoff & 3);
1691 if (m->b_rptr - inc >= m->b_datap->db_base)
1692 m->b_rptr -= inc;
1693 else
1694 inc = 0;
1699 * XXX This is here as a work around for a bug with DEBUG
1700 * XXX Solaris kernels. The problem is b_prev is used by IP
1701 * XXX code as a way to stash the phyint_index for a packet,
1702 * XXX this doesn't get reset by IP but freeb does an ASSERT()
1703 * XXX for both of these to be NULL. See 6442390.
1705 m1 = m;
1706 m2 = m->b_prev;
1708 do {
1709 m1->b_next = NULL;
1710 m1->b_prev = NULL;
1711 m1 = m1->b_cont;
1712 } while (m1);
1715 * Need to preserve checksum information by copying them
1716 * to newmp which heads the pulluped message.
1718 hcksum_retrieve(m, NULL, NULL, &start, &stuff, &end,
1719 &value, &flags);
1721 if (pullupmsg(m, len + ipoff + inc) == 0) {
1722 ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[1]);
1723 FREE_MB_T(*fin->fin_mp);
1724 *fin->fin_mp = NULL;
1725 fin->fin_m = NULL;
1726 fin->fin_ip = NULL;
1727 fin->fin_dp = NULL;
1728 qpi->qpi_data = NULL;
1729 return NULL;
1732 (void) hcksum_assoc(m, NULL, NULL, start, stuff, end,
1733 value, flags, 0);
1735 m->b_prev = m2;
1736 m->b_rptr += inc;
1737 fin->fin_m = m;
1738 ip = MTOD(m, char *) + ipoff;
1739 qpi->qpi_data = ip;
1742 ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[0]);
1743 fin->fin_ip = (ip_t *)ip;
1744 if (fin->fin_dp != NULL)
1745 fin->fin_dp = (char *)fin->fin_ip + dpoff;
1747 if (len == fin->fin_plen)
1748 fin->fin_flx |= FI_COALESCE;
1749 return ip;
1754 * Function: fr_verifysrc
1755 * Returns: int (really boolean)
1756 * Parameters: fin - packet information
1758 * Check whether the packet has a valid source address for the interface on
1759 * which the packet arrived, implementing the "fr_chksrc" feature.
1760 * Returns true iff the packet's source address is valid.
1762 int fr_verifysrc(fin)
1763 fr_info_t *fin;
1765 net_handle_t net_data_p;
1766 phy_if_t phy_ifdata_routeto;
1767 struct sockaddr sin;
1768 ipf_stack_t *ifs = fin->fin_ifs;
1770 if (fin->fin_v == 4) {
1771 net_data_p = ifs->ifs_ipf_ipv4;
1772 } else if (fin->fin_v == 6) {
1773 net_data_p = ifs->ifs_ipf_ipv6;
1774 } else {
1775 return (0);
1778 /* Get the index corresponding to the if name */
1779 sin.sa_family = (fin->fin_v == 4) ? AF_INET : AF_INET6;
1780 bcopy(&fin->fin_saddr, &sin.sa_data, sizeof (struct in_addr));
1781 phy_ifdata_routeto = net_routeto(net_data_p, &sin, NULL);
1783 return (((phy_if_t)fin->fin_ifp == phy_ifdata_routeto) ? 1 : 0);
1787 * Return true only if forwarding is enabled on the interface.
1789 static int
1790 fr_forwarding_enabled(phy_if_t phyif, net_handle_t ndp)
1792 lif_if_t lif;
1794 for (lif = net_lifgetnext(ndp, phyif, 0); lif > 0;
1795 lif = net_lifgetnext(ndp, phyif, lif)) {
1796 int res;
1797 uint64_t flags;
1799 res = net_getlifflags(ndp, phyif, lif, &flags);
1800 if (res != 0)
1801 return (0);
1802 if (flags & IFF_ROUTER)
1803 return (1);
1806 return (0);
1810 * Function: fr_fastroute
1811 * Returns: 0: success;
1812 * -1: failed
1813 * Parameters:
1814 * mb: the message block where ip head starts
1815 * mpp: the pointer to the pointer of the orignal
1816 * packet message
1817 * fin: packet information
1818 * fdp: destination interface information
1819 * if it is NULL, no interface information provided.
1821 * This function is for fastroute/to/dup-to rules. It calls
1822 * pfil_make_lay2_packet to search route, make lay-2 header
1823 * ,and identify output queue for the IP packet.
1824 * The destination address depends on the following conditions:
1825 * 1: for fastroute rule, fdp is passed in as NULL, so the
1826 * destination address is the IP Packet's destination address
1827 * 2: for to/dup-to rule, if an ip address is specified after
1828 * the interface name, this address is the as destination
1829 * address. Otherwise IP Packet's destination address is used
1831 int fr_fastroute(mb, mpp, fin, fdp)
1832 mblk_t *mb, **mpp;
1833 fr_info_t *fin;
1834 frdest_t *fdp;
1836 net_handle_t net_data_p;
1837 net_inject_t *inj;
1838 mblk_t *mp = NULL;
1839 frentry_t *fr = fin->fin_fr;
1840 qpktinfo_t *qpi;
1841 ip_t *ip;
1843 struct sockaddr_in *sin;
1844 struct sockaddr_in6 *sin6;
1845 struct sockaddr *sinp;
1846 ipf_stack_t *ifs = fin->fin_ifs;
1847 #ifndef sparc
1848 u_short __iplen, __ipoff;
1849 #endif
1851 if (fin->fin_v == 4) {
1852 net_data_p = ifs->ifs_ipf_ipv4;
1853 } else if (fin->fin_v == 6) {
1854 net_data_p = ifs->ifs_ipf_ipv6;
1855 } else {
1856 return (-1);
1859 /* Check the src here, fin_ifp is the src interface. */
1860 if (!fr_forwarding_enabled((phy_if_t)fin->fin_ifp, net_data_p))
1861 return (-1);
1863 inj = net_inject_alloc(NETINFO_VERSION);
1864 if (inj == NULL)
1865 return -1;
1867 ip = fin->fin_ip;
1868 qpi = fin->fin_qpi;
1871 * If this is a duplicate mblk then we want ip to point at that
1872 * data, not the original, if and only if it is already pointing at
1873 * the current mblk data.
1875 * Otherwise, if it's not a duplicate, and we're not already pointing
1876 * at the current mblk data, then we want to ensure that the data
1877 * points at ip.
1880 if ((ip == (ip_t *)qpi->qpi_m->b_rptr) && (qpi->qpi_m != mb)) {
1881 ip = (ip_t *)mb->b_rptr;
1882 } else if ((qpi->qpi_m == mb) && (ip != (ip_t *)qpi->qpi_m->b_rptr)) {
1883 qpi->qpi_m->b_rptr = (uchar_t *)ip;
1884 qpi->qpi_off = 0;
1888 * If there is another M_PROTO, we don't want it
1890 if (*mpp != mb) {
1891 mp = unlinkb(*mpp);
1892 freeb(*mpp);
1893 *mpp = mp;
1896 sinp = (struct sockaddr *)&inj->ni_addr;
1897 sin = (struct sockaddr_in *)sinp;
1898 sin6 = (struct sockaddr_in6 *)sinp;
1899 bzero((char *)&inj->ni_addr, sizeof (inj->ni_addr));
1900 inj->ni_addr.ss_family = (fin->fin_v == 4) ? AF_INET : AF_INET6;
1901 inj->ni_packet = mb;
1904 * In case we're here due to "to <if>" being used with
1905 * "keep state", check that we're going in the correct
1906 * direction.
1908 if (fdp != NULL) {
1909 if ((fr != NULL) && (fdp->fd_ifp != NULL) &&
1910 (fin->fin_rev != 0) && (fdp == &fr->fr_tif))
1911 goto bad_fastroute;
1912 inj->ni_physical = (phy_if_t)fdp->fd_ifp;
1913 if (fin->fin_v == 4) {
1914 sin->sin_addr = fdp->fd_ip;
1915 } else {
1916 sin6->sin6_addr = fdp->fd_ip6.in6;
1918 } else {
1919 if (fin->fin_v == 4) {
1920 sin->sin_addr = ip->ip_dst;
1921 } else {
1922 sin6->sin6_addr = ((ip6_t *)ip)->ip6_dst;
1924 inj->ni_physical = net_routeto(net_data_p, sinp, NULL);
1927 /* we're checking the destinatation here */
1928 if (!fr_forwarding_enabled(inj->ni_physical, net_data_p))
1929 goto bad_fastroute;
1932 * Clear the hardware checksum flags from packets that we are doing
1933 * input processing on as leaving them set will cause the outgoing
1934 * NIC (if it supports hardware checksum) to calculate them anew,
1935 * using the old (correct) checksums as the pseudo value to start
1936 * from.
1938 if (fin->fin_out == 0) {
1939 DB_CKSUMFLAGS(mb) = 0;
1942 *mpp = mb;
1944 if (fin->fin_out == 0) {
1945 void *saveifp;
1946 u_32_t pass;
1948 saveifp = fin->fin_ifp;
1949 fin->fin_ifp = (void *)inj->ni_physical;
1950 fin->fin_flx &= ~FI_STATE;
1951 fin->fin_out = 1;
1952 (void) fr_acctpkt(fin, &pass);
1953 fin->fin_fr = NULL;
1954 if (!fr || !(fr->fr_flags & FR_RETMASK))
1955 (void) fr_checkstate(fin, &pass);
1956 if (fr_checknatout(fin, NULL) == -1)
1957 goto bad_fastroute;
1958 fin->fin_out = 0;
1959 fin->fin_ifp = saveifp;
1961 #ifndef sparc
1962 if (fin->fin_v == 4) {
1963 __iplen = (u_short)ip->ip_len,
1964 __ipoff = (u_short)ip->ip_off;
1966 ip->ip_len = htons(__iplen);
1967 ip->ip_off = htons(__ipoff);
1969 #endif
1971 if (net_data_p) {
1972 if (net_inject(net_data_p, NI_DIRECT_OUT, inj) < 0) {
1973 net_inject_free(inj);
1974 return (-1);
1978 ifs->ifs_fr_frouteok[0]++;
1979 net_inject_free(inj);
1980 return 0;
1981 bad_fastroute:
1982 net_inject_free(inj);
1983 freemsg(mb);
1984 ifs->ifs_fr_frouteok[1]++;
1985 return -1;
1989 /* ------------------------------------------------------------------------ */
1990 /* Function: ipf_hook4_out */
1991 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
1992 /* Parameters: event(I) - pointer to event */
1993 /* info(I) - pointer to hook information for firewalling */
1994 /* */
1995 /* Calling ipf_hook. */
1996 /* ------------------------------------------------------------------------ */
1997 /*ARGSUSED*/
1998 int ipf_hook4_out(hook_event_token_t token, hook_data_t info, void *arg)
2000 return ipf_hook(info, 1, 0, arg);
2002 /*ARGSUSED*/
2003 int ipf_hook6_out(hook_event_token_t token, hook_data_t info, void *arg)
2005 return ipf_hook6(info, 1, 0, arg);
2008 /* ------------------------------------------------------------------------ */
2009 /* Function: ipf_hook4_in */
2010 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
2011 /* Parameters: event(I) - pointer to event */
2012 /* info(I) - pointer to hook information for firewalling */
2013 /* */
2014 /* Calling ipf_hook. */
2015 /* ------------------------------------------------------------------------ */
2016 /*ARGSUSED*/
2017 int ipf_hook4_in(hook_event_token_t token, hook_data_t info, void *arg)
2019 return ipf_hook(info, 0, 0, arg);
2021 /*ARGSUSED*/
2022 int ipf_hook6_in(hook_event_token_t token, hook_data_t info, void *arg)
2024 return ipf_hook6(info, 0, 0, arg);
2028 /* ------------------------------------------------------------------------ */
2029 /* Function: ipf_hook4_loop_out */
2030 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
2031 /* Parameters: event(I) - pointer to event */
2032 /* info(I) - pointer to hook information for firewalling */
2033 /* */
2034 /* Calling ipf_hook. */
2035 /* ------------------------------------------------------------------------ */
2036 /*ARGSUSED*/
2037 int ipf_hook4_loop_out(hook_event_token_t token, hook_data_t info, void *arg)
2039 return ipf_hook(info, 1, FI_NOCKSUM, arg);
2041 /*ARGSUSED*/
2042 int ipf_hook6_loop_out(hook_event_token_t token, hook_data_t info, void *arg)
2044 return ipf_hook6(info, 1, FI_NOCKSUM, arg);
2047 /* ------------------------------------------------------------------------ */
2048 /* Function: ipf_hook4_loop_in */
2049 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
2050 /* Parameters: event(I) - pointer to event */
2051 /* info(I) - pointer to hook information for firewalling */
2052 /* */
2053 /* Calling ipf_hook. */
2054 /* ------------------------------------------------------------------------ */
2055 /*ARGSUSED*/
2056 int ipf_hook4_loop_in(hook_event_token_t token, hook_data_t info, void *arg)
2058 return ipf_hook(info, 0, FI_NOCKSUM, arg);
2060 /*ARGSUSED*/
2061 int ipf_hook6_loop_in(hook_event_token_t token, hook_data_t info, void *arg)
2063 return ipf_hook6(info, 0, FI_NOCKSUM, arg);
2066 /* ------------------------------------------------------------------------ */
2067 /* Function: ipf_hook */
2068 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
2069 /* Parameters: info(I) - pointer to hook information for firewalling */
2070 /* out(I) - whether packet is going in or out */
2071 /* loopback(I) - whether packet is a loopback packet or not */
2072 /* */
2073 /* Stepping stone function between the IP mainline and IPFilter. Extracts */
2074 /* parameters out of the info structure and forms them up to be useful for */
2075 /* calling ipfilter. */
2076 /* ------------------------------------------------------------------------ */
2077 int ipf_hook(hook_data_t info, int out, int loopback, void *arg)
2079 hook_pkt_event_t *fw;
2080 ipf_stack_t *ifs;
2081 qpktinfo_t qpi;
2082 int rval, hlen;
2083 u_short swap;
2084 phy_if_t phy;
2085 ip_t *ip;
2087 ifs = arg;
2088 fw = (hook_pkt_event_t *)info;
2090 ASSERT(fw != NULL);
2091 phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp;
2093 ip = fw->hpe_hdr;
2094 swap = ntohs(ip->ip_len);
2095 ip->ip_len = swap;
2096 swap = ntohs(ip->ip_off);
2097 ip->ip_off = swap;
2098 hlen = IPH_HDR_LENGTH(ip);
2100 qpi.qpi_m = fw->hpe_mb;
2101 qpi.qpi_data = fw->hpe_hdr;
2102 qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr;
2103 qpi.qpi_ill = (void *)phy;
2104 qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST);
2105 if (qpi.qpi_flags)
2106 qpi.qpi_flags |= FI_MBCAST;
2107 qpi.qpi_flags |= loopback;
2109 rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out,
2110 &qpi, fw->hpe_mp, ifs);
2112 /* For fastroute cases, fr_check returns 0 with mp set to NULL */
2113 if (rval == 0 && *(fw->hpe_mp) == NULL)
2114 rval = 1;
2116 /* Notify IP the packet mblk_t and IP header pointers. */
2117 fw->hpe_mb = qpi.qpi_m;
2118 fw->hpe_hdr = qpi.qpi_data;
2119 if (rval == 0) {
2120 ip = qpi.qpi_data;
2121 swap = ntohs(ip->ip_len);
2122 ip->ip_len = swap;
2123 swap = ntohs(ip->ip_off);
2124 ip->ip_off = swap;
2126 return rval;
2129 int ipf_hook6(hook_data_t info, int out, int loopback, void *arg)
2131 hook_pkt_event_t *fw;
2132 int rval, hlen;
2133 qpktinfo_t qpi;
2134 phy_if_t phy;
2136 fw = (hook_pkt_event_t *)info;
2138 ASSERT(fw != NULL);
2139 phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp;
2141 hlen = sizeof (ip6_t);
2143 qpi.qpi_m = fw->hpe_mb;
2144 qpi.qpi_data = fw->hpe_hdr;
2145 qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr;
2146 qpi.qpi_ill = (void *)phy;
2147 qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST);
2148 if (qpi.qpi_flags)
2149 qpi.qpi_flags |= FI_MBCAST;
2150 qpi.qpi_flags |= loopback;
2152 rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out,
2153 &qpi, fw->hpe_mp, arg);
2155 /* For fastroute cases, fr_check returns 0 with mp set to NULL */
2156 if (rval == 0 && *(fw->hpe_mp) == NULL)
2157 rval = 1;
2159 /* Notify IP the packet mblk_t and IP header pointers. */
2160 fw->hpe_mb = qpi.qpi_m;
2161 fw->hpe_hdr = qpi.qpi_data;
2162 return rval;
2166 /* ------------------------------------------------------------------------ */
2167 /* Function: ipf_nic_event_v4 */
2168 /* Returns: int - 0 == no problems encountered */
2169 /* Parameters: event(I) - pointer to event */
2170 /* info(I) - pointer to information about a NIC event */
2171 /* */
2172 /* Function to receive asynchronous NIC events from IP */
2173 /* ------------------------------------------------------------------------ */
2174 /*ARGSUSED*/
2175 int ipf_nic_event_v4(hook_event_token_t event, hook_data_t info, void *arg)
2177 struct sockaddr_in *sin;
2178 hook_nic_event_t *hn;
2179 ipf_stack_t *ifs = arg;
2180 void *new_ifp = NULL;
2182 if (ifs->ifs_fr_running <= 0)
2183 return (0);
2185 hn = (hook_nic_event_t *)info;
2187 switch (hn->hne_event)
2189 case NE_PLUMB :
2190 frsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic, hn->hne_data,
2191 ifs);
2192 fr_natifpsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic,
2193 hn->hne_data, ifs);
2194 fr_statesync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic,
2195 hn->hne_data, ifs);
2196 break;
2198 case NE_UNPLUMB :
2199 frsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs);
2200 fr_natifpsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL,
2201 ifs);
2202 fr_statesync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs);
2203 break;
2205 case NE_ADDRESS_CHANGE :
2207 * We only respond to events for logical interface 0 because
2208 * IPFilter only uses the first address given to a network
2209 * interface. We check for hne_lif==1 because the netinfo
2210 * code maps adds 1 to the lif number so that it can return
2211 * 0 to indicate "no more lifs" when walking them.
2213 if (hn->hne_lif == 1) {
2214 frsync(IPFSYNC_RESYNC, 4, (void *)hn->hne_nic, NULL,
2215 ifs);
2216 sin = hn->hne_data;
2217 fr_nataddrsync(4, (void *)hn->hne_nic, &sin->sin_addr,
2218 ifs);
2220 break;
2222 #if SOLARIS2 >= 10
2223 case NE_IFINDEX_CHANGE :
2224 WRITE_ENTER(&ifs->ifs_ipf_mutex);
2226 if (hn->hne_data != NULL) {
2228 * The netinfo passes interface index as int (hne_data should be
2229 * handled as a pointer to int), which is always 32bit. We need to
2230 * convert it to void pointer here, since interfaces are
2231 * represented as pointers to void in IPF. The pointers are 64 bits
2232 * long on 64bit platforms. Doing something like
2233 * (void *)((int) x)
2234 * will throw warning:
2235 * "cast to pointer from integer of different size"
2236 * during 64bit compilation.
2238 * The line below uses (size_t) to typecast int to
2239 * size_t, which might be 64bit/32bit (depending
2240 * on architecture). Once we have proper 64bit/32bit
2241 * type (size_t), we can safely convert it to void pointer.
2243 new_ifp = (void *)(size_t)*((int *)hn->hne_data);
2244 fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2245 fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2246 fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2248 RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
2249 break;
2250 #endif
2252 default :
2253 break;
2256 return 0;
2260 /* ------------------------------------------------------------------------ */
2261 /* Function: ipf_nic_event_v6 */
2262 /* Returns: int - 0 == no problems encountered */
2263 /* Parameters: event(I) - pointer to event */
2264 /* info(I) - pointer to information about a NIC event */
2265 /* */
2266 /* Function to receive asynchronous NIC events from IP */
2267 /* ------------------------------------------------------------------------ */
2268 /*ARGSUSED*/
2269 int ipf_nic_event_v6(hook_event_token_t event, hook_data_t info, void *arg)
2271 struct sockaddr_in6 *sin6;
2272 hook_nic_event_t *hn;
2273 ipf_stack_t *ifs = arg;
2274 void *new_ifp = NULL;
2276 if (ifs->ifs_fr_running <= 0)
2277 return (0);
2279 hn = (hook_nic_event_t *)info;
2281 switch (hn->hne_event)
2283 case NE_PLUMB :
2284 frsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2285 hn->hne_data, ifs);
2286 fr_natifpsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2287 hn->hne_data, ifs);
2288 fr_statesync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2289 hn->hne_data, ifs);
2290 break;
2292 case NE_UNPLUMB :
2293 frsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs);
2294 fr_natifpsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL,
2295 ifs);
2296 fr_statesync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs);
2297 break;
2299 case NE_ADDRESS_CHANGE :
2300 if (hn->hne_lif == 1) {
2301 sin6 = hn->hne_data;
2302 fr_nataddrsync(6, (void *)hn->hne_nic, &sin6->sin6_addr,
2303 ifs);
2305 break;
2307 #if SOLARIS2 >= 10
2308 case NE_IFINDEX_CHANGE :
2309 WRITE_ENTER(&ifs->ifs_ipf_mutex);
2310 if (hn->hne_data != NULL) {
2312 * The netinfo passes interface index as int (hne_data should be
2313 * handled as a pointer to int), which is always 32bit. We need to
2314 * convert it to void pointer here, since interfaces are
2315 * represented as pointers to void in IPF. The pointers are 64 bits
2316 * long on 64bit platforms. Doing something like
2317 * (void *)((int) x)
2318 * will throw warning:
2319 * "cast to pointer from integer of different size"
2320 * during 64bit compilation.
2322 * The line below uses (size_t) to typecast int to
2323 * size_t, which might be 64bit/32bit (depending
2324 * on architecture). Once we have proper 64bit/32bit
2325 * type (size_t), we can safely convert it to void pointer.
2327 new_ifp = (void *)(size_t)*((int *)hn->hne_data);
2328 fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2329 fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2330 fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2332 RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
2333 break;
2334 #endif
2336 default :
2337 break;
2340 return 0;
2344 * Functions fr_make_rst(), fr_make_icmp_v4(), fr_make_icmp_v6()
2345 * are needed in Solaris kernel only. We don't need them in
2346 * ipftest to pretend the ICMP/RST packet was sent as a response.
2348 #if defined(_KERNEL) && (SOLARIS2 >= 10)
2349 /* ------------------------------------------------------------------------ */
2350 /* Function: fr_make_rst */
2351 /* Returns: int - 0 on success, -1 on failure */
2352 /* Parameters: fin(I) - pointer to packet information */
2353 /* */
2354 /* We must alter the original mblks passed to IPF from IP stack via */
2355 /* FW_HOOKS. FW_HOOKS interface is powerfull, but it has some limitations. */
2356 /* IPF can basicaly do only these things with mblk representing the packet: */
2357 /* leave it as it is (pass the packet) */
2358 /* */
2359 /* discard it (block the packet) */
2360 /* */
2361 /* alter it (i.e. NAT) */
2362 /* */
2363 /* As you can see IPF can not simply discard the mblk and supply a new one */
2364 /* instead to IP stack via FW_HOOKS. */
2365 /* */
2366 /* The return-rst action for packets coming via NIC is handled as follows: */
2367 /* mblk with packet is discarded */
2368 /* */
2369 /* new mblk with RST response is constructed and injected to network */
2370 /* */
2371 /* IPF can't inject packets to loopback interface, this is just another */
2372 /* limitation we have to deal with here. The only option to send RST */
2373 /* response to offending TCP packet coming via loopback is to alter it. */
2374 /* */
2375 /* The fr_make_rst() function alters TCP SYN/FIN packet intercepted on */
2376 /* loopback interface into TCP RST packet. fin->fin_mp is pointer to */
2377 /* mblk L3 (IP) and L4 (TCP/UDP) packet headers. */
2378 /* ------------------------------------------------------------------------ */
2379 int fr_make_rst(fin)
2380 fr_info_t *fin;
2382 uint16_t tmp_port;
2383 int rv = -1;
2384 uint32_t old_ack;
2385 tcphdr_t *tcp = NULL;
2386 struct in_addr tmp_src;
2387 #ifdef USE_INET6
2388 struct in6_addr tmp_src6;
2389 #endif
2391 ASSERT(fin->fin_p == IPPROTO_TCP);
2394 * We do not need to adjust chksum, since it is not being checked by
2395 * Solaris IP stack for loopback clients.
2397 if ((fin->fin_v == 4) && (fin->fin_p == IPPROTO_TCP) &&
2398 ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) {
2400 if (tcp->th_flags & (TH_SYN | TH_FIN)) {
2401 /* Swap IPv4 addresses. */
2402 tmp_src = fin->fin_ip->ip_src;
2403 fin->fin_ip->ip_src = fin->fin_ip->ip_dst;
2404 fin->fin_ip->ip_dst = tmp_src;
2406 rv = 0;
2408 else
2409 tcp = NULL;
2411 #ifdef USE_INET6
2412 else if ((fin->fin_v == 6) && (fin->fin_p == IPPROTO_TCP) &&
2413 ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) {
2415 * We are relying on fact the next header is TCP, which is true
2416 * for regular TCP packets coming in over loopback.
2418 if (tcp->th_flags & (TH_SYN | TH_FIN)) {
2419 /* Swap IPv6 addresses. */
2420 tmp_src6 = fin->fin_ip6->ip6_src;
2421 fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst;
2422 fin->fin_ip6->ip6_dst = tmp_src6;
2424 rv = 0;
2426 else
2427 tcp = NULL;
2429 #endif
2431 if (tcp != NULL) {
2433 * Adjust TCP header:
2434 * swap ports,
2435 * set flags,
2436 * set correct ACK number
2438 tmp_port = tcp->th_sport;
2439 tcp->th_sport = tcp->th_dport;
2440 tcp->th_dport = tmp_port;
2441 old_ack = tcp->th_ack;
2442 tcp->th_ack = htonl(ntohl(tcp->th_seq) + 1);
2443 tcp->th_seq = old_ack;
2444 tcp->th_flags = TH_RST | TH_ACK;
2447 return (rv);
2450 /* ------------------------------------------------------------------------ */
2451 /* Function: fr_make_icmp_v4 */
2452 /* Returns: int - 0 on success, -1 on failure */
2453 /* Parameters: fin(I) - pointer to packet information */
2454 /* */
2455 /* Please read comment at fr_make_icmp() wrapper function to get an idea */
2456 /* what is going to happen here and why. Once you read the comment there, */
2457 /* continue here with next paragraph. */
2458 /* */
2459 /* To turn IPv4 packet into ICMPv4 response packet, these things must */
2460 /* happen here: */
2461 /* (1) Original mblk is copied (duplicated). */
2462 /* */
2463 /* (2) ICMP header is created. */
2464 /* */
2465 /* (3) Link ICMP header with copy of original mblk, we have ICMPv4 */
2466 /* data ready then. */
2467 /* */
2468 /* (4) Swap IP addresses in original mblk and adjust IP header data. */
2469 /* */
2470 /* (5) The mblk containing original packet is trimmed to contain IP */
2471 /* header only and ICMP chksum is computed. */
2472 /* */
2473 /* (6) The ICMP header we have from (3) is linked to original mblk, */
2474 /* which now contains new IP header. If original packet was spread */
2475 /* over several mblks, only the first mblk is kept. */
2476 /* ------------------------------------------------------------------------ */
2477 static int fr_make_icmp_v4(fin)
2478 fr_info_t *fin;
2480 struct in_addr tmp_src;
2481 tcphdr_t *tcp;
2482 struct icmp *icmp;
2483 mblk_t *mblk_icmp;
2484 mblk_t *mblk_ip;
2485 size_t icmp_pld_len; /* octets to append to ICMP header */
2486 size_t orig_iphdr_len; /* length of IP header only */
2487 uint32_t sum;
2488 uint16_t *buf;
2489 int len;
2492 if (fin->fin_v != 4)
2493 return (-1);
2496 * If we are dealing with TCP, then packet must be SYN/FIN to be routed
2497 * by IP stack. If it is not SYN/FIN, then we must drop it silently.
2499 tcp = (tcphdr_t *) fin->fin_dp;
2501 if ((fin->fin_p == IPPROTO_TCP) &&
2502 ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0)))
2503 return (-1);
2506 * Step (1)
2508 * Make copy of original mblk.
2510 * We want to copy as much data as necessary, not less, not more. The
2511 * ICMPv4 payload length for unreachable messages is:
2512 * original IP header + 8 bytes of L4 (if there are any).
2514 * We determine if there are at least 8 bytes of L4 data following IP
2515 * header first.
2517 icmp_pld_len = (fin->fin_dlen > ICMPERR_ICMPHLEN) ?
2518 ICMPERR_ICMPHLEN : fin->fin_dlen;
2520 * Since we don't want to copy more data than necessary, we must trim
2521 * the original mblk here. The right way (STREAMish) would be to use
2522 * adjmsg() to trim it. However we would have to calculate the length
2523 * argument for adjmsg() from pointers we already have here.
2525 * Since we have pointers and offsets, it's faster and easier for
2526 * us to just adjust pointers by hand instead of using adjmsg().
2528 fin->fin_m->b_wptr = (unsigned char *) fin->fin_dp;
2529 fin->fin_m->b_wptr += icmp_pld_len;
2530 icmp_pld_len = fin->fin_m->b_wptr - (unsigned char *) fin->fin_ip;
2533 * Also we don't want to copy any L2 stuff, which might precede IP
2534 * header, so we have have to set b_rptr to point to the start of IP
2535 * header.
2537 fin->fin_m->b_rptr += fin->fin_ipoff;
2538 if ((mblk_ip = copyb(fin->fin_m)) == NULL)
2539 return (-1);
2540 fin->fin_m->b_rptr -= fin->fin_ipoff;
2543 * Step (2)
2545 * Create an ICMP header, which will be appened to original mblk later.
2546 * ICMP header is just another mblk.
2548 mblk_icmp = (mblk_t *) allocb(ICMPERR_ICMPHLEN, BPRI_HI);
2549 if (mblk_icmp == NULL) {
2550 FREE_MB_T(mblk_ip);
2551 return (-1);
2554 MTYPE(mblk_icmp) = M_DATA;
2555 icmp = (struct icmp *) mblk_icmp->b_wptr;
2556 icmp->icmp_type = ICMP_UNREACH;
2557 icmp->icmp_code = fin->fin_icode & 0xFF;
2558 icmp->icmp_void = 0;
2559 icmp->icmp_cksum = 0;
2560 mblk_icmp->b_wptr += ICMPERR_ICMPHLEN;
2563 * Step (3)
2565 * Complete ICMP packet - link ICMP header with L4 data from original
2566 * IP packet.
2568 linkb(mblk_icmp, mblk_ip);
2571 * Step (4)
2573 * Swap IP addresses and change IP header fields accordingly in
2574 * original IP packet.
2576 * There is a rule option return-icmp as a dest for physical
2577 * interfaces. This option becomes useless for loopback, since IPF box
2578 * uses same address as a loopback destination. We ignore the option
2579 * here, the ICMP packet will always look like as it would have been
2580 * sent from the original destination host.
2582 tmp_src = fin->fin_ip->ip_src;
2583 fin->fin_ip->ip_src = fin->fin_ip->ip_dst;
2584 fin->fin_ip->ip_dst = tmp_src;
2585 fin->fin_ip->ip_p = IPPROTO_ICMP;
2586 fin->fin_ip->ip_sum = 0;
2589 * Step (5)
2591 * We trim the orignal mblk to hold IP header only.
2593 fin->fin_m->b_wptr = fin->fin_dp;
2594 orig_iphdr_len = fin->fin_m->b_wptr -
2595 (fin->fin_m->b_rptr + fin->fin_ipoff);
2596 fin->fin_ip->ip_len = htons(icmp_pld_len + ICMPERR_ICMPHLEN +
2597 orig_iphdr_len);
2600 * ICMP chksum calculation. The data we are calculating chksum for are
2601 * spread over two mblks, therefore we have to use two for loops.
2603 * First for loop computes chksum part for ICMP header.
2605 buf = (uint16_t *) icmp;
2606 len = ICMPERR_ICMPHLEN;
2607 for (sum = 0; len > 1; len -= 2)
2608 sum += *buf++;
2611 * Here we add chksum part for ICMP payload.
2613 len = icmp_pld_len;
2614 buf = (uint16_t *) mblk_ip->b_rptr;
2615 for (; len > 1; len -= 2)
2616 sum += *buf++;
2619 * Chksum is done.
2621 sum = (sum >> 16) + (sum & 0xffff);
2622 sum += (sum >> 16);
2623 icmp->icmp_cksum = ~sum;
2626 * Step (6)
2628 * Release all packet mblks, except the first one.
2630 if (fin->fin_m->b_cont != NULL) {
2631 FREE_MB_T(fin->fin_m->b_cont);
2635 * Append ICMP payload to first mblk, which already contains new IP
2636 * header.
2638 linkb(fin->fin_m, mblk_icmp);
2640 return (0);
2643 #ifdef USE_INET6
2644 /* ------------------------------------------------------------------------ */
2645 /* Function: fr_make_icmp_v6 */
2646 /* Returns: int - 0 on success, -1 on failure */
2647 /* Parameters: fin(I) - pointer to packet information */
2648 /* */
2649 /* Please read comment at fr_make_icmp() wrapper function to get an idea */
2650 /* what and why is going to happen here. Once you read the comment there, */
2651 /* continue here with next paragraph. */
2652 /* */
2653 /* This function turns IPv6 packet (UDP, TCP, ...) into ICMPv6 response. */
2654 /* The algorithm is fairly simple: */
2655 /* 1) We need to get copy of complete mblk. */
2656 /* */
2657 /* 2) New ICMPv6 header is created. */
2658 /* */
2659 /* 3) The copy of original mblk with packet is linked to ICMPv6 */
2660 /* header. */
2661 /* */
2662 /* 4) The checksum must be adjusted. */
2663 /* */
2664 /* 5) IP addresses in original mblk are swapped and IP header data */
2665 /* are adjusted (protocol number). */
2666 /* */
2667 /* 6) Original mblk is trimmed to hold IPv6 header only, then it is */
2668 /* linked with the ICMPv6 data we got from (3). */
2669 /* ------------------------------------------------------------------------ */
2670 static int fr_make_icmp_v6(fin)
2671 fr_info_t *fin;
2673 struct icmp6_hdr *icmp6;
2674 tcphdr_t *tcp;
2675 struct in6_addr tmp_src6;
2676 size_t icmp_pld_len;
2677 mblk_t *mblk_ip, *mblk_icmp;
2679 if (fin->fin_v != 6)
2680 return (-1);
2683 * If we are dealing with TCP, then packet must SYN/FIN to be routed by
2684 * IP stack. If it is not SYN/FIN, then we must drop it silently.
2686 tcp = (tcphdr_t *) fin->fin_dp;
2688 if ((fin->fin_p == IPPROTO_TCP) &&
2689 ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0)))
2690 return (-1);
2693 * Step (1)
2695 * We need to copy complete packet in case of IPv6, no trimming is
2696 * needed (except the L2 headers).
2698 icmp_pld_len = M_LEN(fin->fin_m);
2699 fin->fin_m->b_rptr += fin->fin_ipoff;
2700 if ((mblk_ip = copyb(fin->fin_m)) == NULL)
2701 return (-1);
2702 fin->fin_m->b_rptr -= fin->fin_ipoff;
2705 * Step (2)
2707 * Allocate and create ICMP header.
2709 mblk_icmp = (mblk_t *) allocb(sizeof (struct icmp6_hdr),
2710 BPRI_HI);
2712 if (mblk_icmp == NULL)
2713 return (-1);
2715 MTYPE(mblk_icmp) = M_DATA;
2716 icmp6 = (struct icmp6_hdr *) mblk_icmp->b_wptr;
2717 icmp6->icmp6_type = ICMP6_DST_UNREACH;
2718 icmp6->icmp6_code = fin->fin_icode & 0xFF;
2719 icmp6->icmp6_data32[0] = 0;
2720 mblk_icmp->b_wptr += sizeof (struct icmp6_hdr);
2723 * Step (3)
2725 * Link the copy of IP packet to ICMP header.
2727 linkb(mblk_icmp, mblk_ip);
2730 * Step (4)
2732 * Calculate chksum - this is much more easier task than in case of
2733 * IPv4 - ICMPv6 chksum only covers IP addresses, and payload length.
2734 * We are making compensation just for change of packet length.
2736 icmp6->icmp6_cksum = icmp_pld_len + sizeof (struct icmp6_hdr);
2739 * Step (5)
2741 * Swap IP addresses.
2743 tmp_src6 = fin->fin_ip6->ip6_src;
2744 fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst;
2745 fin->fin_ip6->ip6_dst = tmp_src6;
2748 * and adjust IP header data.
2750 fin->fin_ip6->ip6_nxt = IPPROTO_ICMPV6;
2751 fin->fin_ip6->ip6_plen = htons(icmp_pld_len + sizeof (struct icmp6_hdr));
2754 * Step (6)
2756 * We must release all linked mblks from original packet and keep only
2757 * the first mblk with IP header to link ICMP data.
2759 fin->fin_m->b_wptr = (unsigned char *) fin->fin_ip6 + sizeof (ip6_t);
2761 if (fin->fin_m->b_cont != NULL) {
2762 FREE_MB_T(fin->fin_m->b_cont);
2766 * Append ICMP payload to IP header.
2768 linkb(fin->fin_m, mblk_icmp);
2770 return (0);
2772 #endif /* USE_INET6 */
2774 /* ------------------------------------------------------------------------ */
2775 /* Function: fr_make_icmp */
2776 /* Returns: int - 0 on success, -1 on failure */
2777 /* Parameters: fin(I) - pointer to packet information */
2778 /* */
2779 /* We must alter the original mblks passed to IPF from IP stack via */
2780 /* FW_HOOKS. The reasons why we must alter packet are discussed within */
2781 /* comment at fr_make_rst() function. */
2782 /* */
2783 /* The fr_make_icmp() function acts as a wrapper, which passes the code */
2784 /* execution to fr_make_icmp_v4() or fr_make_icmp_v6() depending on */
2785 /* protocol version. However there are some details, which are common to */
2786 /* both IP versions. The details are going to be explained here. */
2787 /* */
2788 /* The packet looks as follows: */
2789 /* xxx | IP hdr | IP payload ... | */
2790 /* ^ ^ ^ ^ */
2791 /* | | | | */
2792 /* | | | fin_m->b_wptr = fin->fin_dp + fin->fin_dlen */
2793 /* | | | */
2794 /* | | `- fin_m->fin_dp (in case of IPv4 points to L4 header) */
2795 /* | | */
2796 /* | `- fin_m->b_rptr + fin_ipoff (fin_ipoff is most likely 0 in case */
2797 /* | of loopback) */
2798 /* | */
2799 /* `- fin_m->b_rptr - points to L2 header in case of physical NIC */
2800 /* */
2801 /* All relevant IP headers are pulled up into the first mblk. It happened */
2802 /* well in advance before the matching rule was found (the rule, which took */
2803 /* us here, to fr_make_icmp() function). */
2804 /* */
2805 /* Both functions will turn packet passed in fin->fin_m mblk into a new */
2806 /* packet. New packet will be represented as chain of mblks. */
2807 /* orig mblk |- b_cont ---. */
2808 /* ^ `-> ICMP hdr |- b_cont--. */
2809 /* | ^ `-> duped orig mblk */
2810 /* | | ^ */
2811 /* `- The original mblk | | */
2812 /* will be trimmed to | | */
2813 /* to contain IP header | | */
2814 /* only | | */
2815 /* | | */
2816 /* `- This is newly | */
2817 /* allocated mblk to | */
2818 /* hold ICMPv6 data. | */
2819 /* | */
2820 /* | */
2821 /* | */
2822 /* This is the copy of original mblk, it will contain -' */
2823 /* orignal IP packet in case of ICMPv6. In case of */
2824 /* ICMPv4 it will contain up to 8 bytes of IP payload */
2825 /* (TCP/UDP/L4) data from original packet. */
2826 /* ------------------------------------------------------------------------ */
2827 int fr_make_icmp(fin)
2828 fr_info_t *fin;
2830 int rv;
2832 if (fin->fin_v == 4)
2833 rv = fr_make_icmp_v4(fin);
2834 #ifdef USE_INET6
2835 else if (fin->fin_v == 6)
2836 rv = fr_make_icmp_v6(fin);
2837 #endif
2838 else
2839 rv = -1;
2841 return (rv);
2844 /* ------------------------------------------------------------------------ */
2845 /* Function: fr_buf_sum */
2846 /* Returns: unsigned int - sum of buffer buf */
2847 /* Parameters: buf - pointer to buf we want to sum up */
2848 /* len - length of buffer buf */
2849 /* */
2850 /* Sums buffer buf. The result is used for chksum calculation. The buf */
2851 /* argument must be aligned. */
2852 /* ------------------------------------------------------------------------ */
2853 static uint32_t fr_buf_sum(buf, len)
2854 const void *buf;
2855 unsigned int len;
2857 uint32_t sum = 0;
2858 uint16_t *b = (uint16_t *)buf;
2860 while (len > 1) {
2861 sum += *b++;
2862 len -= 2;
2865 if (len == 1)
2866 sum += htons((*(unsigned char *)b) << 8);
2868 return (sum);
2871 /* ------------------------------------------------------------------------ */
2872 /* Function: fr_calc_chksum */
2873 /* Returns: void */
2874 /* Parameters: fin - pointer to fr_info_t instance with packet data */
2875 /* pkt - pointer to duplicated packet */
2876 /* */
2877 /* Calculates all chksums (L3, L4) for packet pkt. Works for both IP */
2878 /* versions. */
2879 /* ------------------------------------------------------------------------ */
2880 void fr_calc_chksum(fin, pkt)
2881 fr_info_t *fin;
2882 mb_t *pkt;
2884 struct pseudo_hdr {
2885 union {
2886 struct in_addr in4;
2887 #ifdef USE_INET6
2888 struct in6_addr in6;
2889 #endif
2890 } src_addr;
2891 union {
2892 struct in_addr in4;
2893 #ifdef USE_INET6
2894 struct in6_addr in6;
2895 #endif
2896 } dst_addr;
2897 char zero;
2898 char proto;
2899 uint16_t len;
2900 } phdr;
2901 uint32_t sum, ip_sum;
2902 void *buf;
2903 uint16_t *l4_csum_p;
2904 tcphdr_t *tcp;
2905 udphdr_t *udp;
2906 icmphdr_t *icmp;
2907 #ifdef USE_INET6
2908 struct icmp6_hdr *icmp6;
2909 #endif
2910 ip_t *ip;
2911 unsigned int len;
2912 int pld_len;
2915 * We need to pullup the packet to the single continuous buffer to avoid
2916 * potential misaligment of b_rptr member in mblk chain.
2918 if (pullupmsg(pkt, -1) == 0) {
2919 cmn_err(CE_WARN, "Failed to pullup loopback pkt -> chksum"
2920 " will not be computed by IPF");
2921 return;
2925 * It is guaranteed IP header starts right at b_rptr, because we are
2926 * working with a copy of the original packet.
2928 * Compute pseudo header chksum for TCP and UDP.
2930 if ((fin->fin_p == IPPROTO_UDP) ||
2931 (fin->fin_p == IPPROTO_TCP)) {
2932 bzero(&phdr, sizeof (phdr));
2933 #ifdef USE_INET6
2934 if (fin->fin_v == 6) {
2935 phdr.src_addr.in6 = fin->fin_srcip6;
2936 phdr.dst_addr.in6 = fin->fin_dstip6;
2937 } else {
2938 phdr.src_addr.in4 = fin->fin_src;
2939 phdr.dst_addr.in4 = fin->fin_dst;
2941 #else
2942 phdr.src_addr.in4 = fin->fin_src;
2943 phdr.dst_addr.in4 = fin->fin_dst;
2944 #endif
2945 phdr.zero = (char) 0;
2946 phdr.proto = fin->fin_p;
2947 phdr.len = htons((uint16_t)fin->fin_dlen);
2948 sum = fr_buf_sum(&phdr, (unsigned int)sizeof (phdr));
2949 } else {
2950 sum = 0;
2954 * Set pointer to the L4 chksum field in the packet, set buf pointer to
2955 * the L4 header start.
2957 switch (fin->fin_p) {
2958 case IPPROTO_UDP:
2959 udp = (udphdr_t *)(pkt->b_rptr + fin->fin_hlen);
2960 l4_csum_p = &udp->uh_sum;
2961 buf = udp;
2962 break;
2963 case IPPROTO_TCP:
2964 tcp = (tcphdr_t *)(pkt->b_rptr + fin->fin_hlen);
2965 l4_csum_p = &tcp->th_sum;
2966 buf = tcp;
2967 break;
2968 case IPPROTO_ICMP:
2969 icmp = (icmphdr_t *)(pkt->b_rptr + fin->fin_hlen);
2970 l4_csum_p = &icmp->icmp_cksum;
2971 buf = icmp;
2972 break;
2973 #ifdef USE_INET6
2974 case IPPROTO_ICMPV6:
2975 icmp6 = (struct icmp6_hdr *)(pkt->b_rptr + fin->fin_hlen);
2976 l4_csum_p = &icmp6->icmp6_cksum;
2977 buf = icmp6;
2978 break;
2979 #endif
2980 default:
2981 l4_csum_p = NULL;
2985 * Compute L4 chksum if needed.
2987 if (l4_csum_p != NULL) {
2988 *l4_csum_p = (uint16_t)0;
2989 pld_len = fin->fin_dlen;
2990 len = pkt->b_wptr - (unsigned char *)buf;
2991 ASSERT(len == pld_len);
2993 * Add payload sum to pseudoheader sum.
2995 sum += fr_buf_sum(buf, len);
2996 while (sum >> 16)
2997 sum = (sum & 0xFFFF) + (sum >> 16);
2999 *l4_csum_p = ~((uint16_t)sum);
3000 DTRACE_PROBE1(l4_sum, uint16_t, *l4_csum_p);
3004 * The IP header chksum is needed just for IPv4.
3006 if (fin->fin_v == 4) {
3008 * Compute IPv4 header chksum.
3010 ip = (ip_t *)pkt->b_rptr;
3011 ip->ip_sum = (uint16_t)0;
3012 ip_sum = fr_buf_sum(ip, (unsigned int)fin->fin_hlen);
3013 while (ip_sum >> 16)
3014 ip_sum = (ip_sum & 0xFFFF) + (ip_sum >> 16);
3016 ip->ip_sum = ~((uint16_t)ip_sum);
3017 DTRACE_PROBE1(l3_sum, uint16_t, ip->ip_sum);
3020 return;
3023 #endif /* _KERNEL && SOLARIS2 >= 10 */