2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
12 * Copyright (c) 2012, Joyent, Inc. All rights reserved.
16 * ipd: Internet packet disturber
18 * The purpose of ipd is to simulate congested and lossy networks when they
19 * don't actually exist. The features of these congested and lossy networks are
20 * events that end up leading to retransmits and thus kicking us out of the
21 * TCP/IP fastpath. Since normally this would require us to have an actually
22 * congested network, which can be problematic, we instead simulate this
25 * 1. ipd's operations and restrictions
27 * ipd currently has facilities to cause IP traffic to be:
29 * - Corrupted with some probability.
30 * - Delayed for a set number of microseconds.
31 * - Dropped with some probability.
33 * Each of these features are enabled on a per-zone basic. The current
34 * implementation restricts this specifically to exclusive stack zones.
35 * Enabling ipd on a given zone causes pfhooks to be installed for that zone's
36 * netstack. Because of the nature of ipd, it currently only supports exclusive
37 * stack zones and as a further restriction, it only allows the global zone
38 * administrative access. ipd can be enabled for the global zone, but doing so
39 * will cause all shared-stack zones to also be affected.
41 * 2. General architecture and Locking
43 * ipd consists of a few components. There is a per netstack data structure that
44 * is created and destroyed with the creation and destruction of each exclusive
45 * stack zone. Each of these netstacks is stored in a global list which is
46 * accessed for control of ipd via ioctls. The following diagram touches on the
47 * data structures that are used throughout ipd.
49 * ADMINISTRATIVE DATA PATH
51 * +--------+ +------+ +------+
52 * | ipdadm | | ip | | nics |
53 * +--------+ +------+ +------+
57 * +----------+ +-------------------------+
58 * | /dev/ipd | | pfhooks packet callback | == ipd_hook()
59 * +----------+ +-------------------------+
63 * +----------------+ |
64 * | list_t ipd_nsl |------+ |
65 * +----------------+ | |
68 * +----------------------------+
70 * +----------------------------+
72 * ipd has two different entry points, one is administrative, the other is the
73 * data path. The administrative path is accessed by a userland component called
74 * ipdadm(8). It communicates to the kernel component via ioctls to /dev/ipd.
75 * If the administrative path enables a specific zone, then the data path will
76 * become active for that zone. Any packet that leaves that zone's IP stack or
77 * is going to enter it, comes through the callback specified in the hook_t(9S)
78 * structure. This will cause each packet to go through ipd_hook().
80 * While the locking inside of ipd should be straightforward, unfortunately, the
81 * pfhooks subsystem necessarily complicates this a little bit. There are
82 * currently three different sets of locks in ipd.
84 * - Global lock N on the netstack list.
85 * - Global lock A on the active count.
86 * - Per-netstack data structure lock Z.
90 * L.1a N must always be acquired first and released last
92 * If you need to acquire the netstack list lock, either for reading or writing,
93 * then N must be acquired first and before any other locks. It may not be
94 * dropped before any other lock.
96 * L.1b N must only be acquired from the administrative path and zone creation,
97 * shutdown, and destruct callbacks.
99 * The data path, e.g. receiving the per-packet callbacks, should never be
100 * grabbing the list lock. If it is, then the architecture here needs to be
103 * L.2 Z cannot be held across calls to the pfhooks subsystem if packet hooks
106 * The way the pfhooks subsystem is designed is that a reference count is
107 * present on the hook_t while it is active. As long as that reference count is
108 * non-zero, a call to net_hook_unregister will block until it is lowered.
109 * Because the callbacks want the same lock for the netstack that is held by the
110 * administrative path calling into net_hook_unregister, we deadlock.
112 * ioctl from ipdadm remove hook_t cb (from nic) hook_t cb (from IP)
113 * ----------------------- -------------------- -------------------
115 * | bump hook_t refcount |
116 * mutex_enter(ipd_nsl_lock); enter ipd_hook() bump hook_t refcount
117 * mutex acquired mutex_enter(ins->ipdn_lock); |
118 * | mutex acquired enter ipd_hook()
119 * mutex_enter(ins->ipdn_lock); | mutex_enter(ins->ipdn_lock);
122 * | mutex_exit(ins->ipdn_lock); |
124 * mutex acquired leave ipd_hook() |
125 * | decrement hook_t refcount |
127 * ipd_teardown_hooks() | |
128 * net_hook_unregister() | |
129 * cv_wait() if recount | |
131 * ---------------------------------------------------------------------------
133 * At this point, we can see that the second hook callback still doesn't have
134 * the mutex, but it has bumped the hook_t refcount. However, it will never
135 * acquire the mutex that it needs to finish its operation and decrement the
138 * Obviously, deadlocking is not acceptable, thus the following corollary to the
139 * second locking rule:
141 * L.2 Corollary: If Z is being released across a call to the pfhooks subsystem,
144 * There is currently only one path where we have to worry about this. That is
145 * when we are removing a hook, but the zone is not being shutdown, then hooks
146 * are currently active. The only place that this currently happens is in
151 #include <sys/types.h>
152 #include <sys/file.h>
153 #include <sys/errno.h>
154 #include <sys/open.h>
155 #include <sys/cred.h>
157 #include <sys/sunddi.h>
158 #include <sys/kmem.h>
159 #include <sys/conf.h>
160 #include <sys/stat.h>
161 #include <sys/cmn_err.h>
163 #include <sys/sunddi.h>
164 #include <sys/modctl.h>
165 #include <sys/kstat.h>
166 #include <sys/neti.h>
167 #include <sys/list.h>
168 #include <sys/ksynch.h>
169 #include <sys/sysmacros.h>
170 #include <sys/policy.h>
171 #include <sys/atomic.h>
172 #include <sys/model.h>
173 #include <sys/strsun.h>
175 #include <sys/netstack.h>
176 #include <sys/hook.h>
177 #include <sys/hook_event.h>
181 #define IPDN_STATUS_DISABLED 0x1
182 #define IPDN_STATUS_ENABLED 0x2
183 #define IPDN_STATUS_CONDEMNED 0x4
186 * These flags are used to determine whether or not the hooks are registered.
188 #define IPDN_HOOK_NONE 0x0
189 #define IPDN_HOOK_V4IN 0x1
190 #define IPDN_HOOK_V4OUT 0x2
191 #define IPDN_HOOK_V6IN 0x4
192 #define IPDN_HOOK_V6OUT 0x8
193 #define IPDN_HOOK_ALL 0xf
196 * Per-netstack kstats.
198 typedef struct ipd_nskstat
{
199 kstat_named_t ink_ndrops
;
200 kstat_named_t ink_ncorrupts
;
201 kstat_named_t ink_ndelays
;
205 * Different parts of this structure have different locking semantics. The list
206 * node is not normally referenced, if it is, one has to hold the ipd_nsl_lock.
207 * The following members are read only: ipdn_netid and ipdn_zoneid. The members
208 * of the kstat structure are always accessible in the data path, but the
209 * counters must be bumped with atomic operations. The ipdn_lock protects every
210 * other aspect of this structure. Please see the big theory statement on the
211 * requirements for lock ordering.
213 typedef struct ipd_netstack
{
214 list_node_t ipdn_link
; /* link on ipd_nsl */
215 netid_t ipdn_netid
; /* netstack id */
216 zoneid_t ipdn_zoneid
; /* zone id */
217 kstat_t
*ipdn_kstat
; /* kstat_t ptr */
218 ipd_nskstat_t ipdn_ksdata
; /* kstat data */
219 kmutex_t ipdn_lock
; /* protects following members */
220 int ipdn_status
; /* status flags */
221 net_handle_t ipdn_v4hdl
; /* IPv4 net handle */
222 net_handle_t ipdn_v6hdl
; /* IPv4 net handle */
223 int ipdn_hooked
; /* are hooks registered */
224 hook_t
*ipdn_v4in
; /* IPv4 traffic in hook */
225 hook_t
*ipdn_v4out
; /* IPv4 traffice out hook */
226 hook_t
*ipdn_v6in
; /* IPv6 traffic in hook */
227 hook_t
*ipdn_v6out
; /* IPv6 traffic out hook */
228 int ipdn_enabled
; /* which perturbs are on */
229 int ipdn_corrupt
; /* corrupt percentage */
230 int ipdn_drop
; /* drop percentage */
231 uint_t ipdn_delay
; /* delay us */
232 long ipdn_rand
; /* random seed */
236 * ipd internal variables
238 static dev_info_t
*ipd_devi
; /* device info */
239 static net_instance_t
*ipd_neti
; /* net_instance for hooks */
240 static unsigned int ipd_max_delay
= IPD_MAX_DELAY
; /* max delay in us */
241 static kmutex_t ipd_nsl_lock
; /* lock for the nestack list */
242 static list_t ipd_nsl
; /* list of netstacks */
243 static kmutex_t ipd_nactive_lock
; /* lock for nactive */
244 static unsigned int ipd_nactive
; /* number of active netstacks */
245 static int ipd_nactive_fudge
= 4; /* amount to fudge by in list */
248 * Note that this random number implementation is based upon the old BSD 4.1
249 * rand. It's good enough for us!
252 ipd_nextrand(ipd_netstack_t
*ins
)
254 ins
->ipdn_rand
= ins
->ipdn_rand
* 1103515245L + 12345;
255 return (ins
->ipdn_rand
& 0x7fffffff);
259 ipd_ksbump(kstat_named_t
*nkp
)
261 atomic_inc_64(&nkp
->value
.ui64
);
265 * This is where all the magic actually happens. The way that this works is we
266 * grab the ins lock to basically get a copy of all the data that we need to do
267 * our job and then let it go to minimize contention. In terms of actual work on
268 * the packet we do them in the following order:
276 ipd_hook(hook_event_token_t event
, hook_data_t data
, void *arg
)
279 int dwait
, corrupt
, drop
, rand
, off
, status
;
281 ipd_netstack_t
*ins
= arg
;
282 hook_pkt_event_t
*pkt
= (hook_pkt_event_t
*)data
;
284 mutex_enter(&ins
->ipdn_lock
);
285 status
= ins
->ipdn_status
;
286 dwait
= ins
->ipdn_delay
;
287 corrupt
= ins
->ipdn_corrupt
;
288 drop
= ins
->ipdn_drop
;
289 rand
= ipd_nextrand(ins
);
290 mutex_exit(&ins
->ipdn_lock
);
293 * This probably cannot happen, but we'll do an extra guard just in
296 if (status
& IPDN_STATUS_CONDEMNED
)
299 if (drop
!= 0 && rand
% 100 < drop
) {
300 freemsg(*pkt
->hpe_mp
);
304 ipd_ksbump(&ins
->ipdn_ksdata
.ink_ndrops
);
310 if (dwait
< TICK_TO_USEC(1))
313 delay(drv_usectohz(dwait
));
314 ipd_ksbump(&ins
->ipdn_ksdata
.ink_ndelays
);
317 if (corrupt
!= 0 && rand
% 100 < corrupt
) {
319 * Since we're corrupting the mblk, just corrupt everything in
320 * the chain. While we could corrupt the entire packet, that's a
321 * little strong. Instead we're going to just change one of the
322 * bytes in each mblock.
325 while (mbp
!= NULL
) {
326 if (mbp
->b_wptr
== mbp
->b_rptr
)
330 * While pfhooks probably won't send us anything else,
331 * let's just be extra careful. The stack probably isn't
332 * as resiliant to corruption of control messages.
334 if (DB_TYPE(mbp
) != M_DATA
)
337 off
= rand
% ((uintptr_t)mbp
->b_wptr
-
338 (uintptr_t)mbp
->b_rptr
);
339 crp
= mbp
->b_rptr
+ off
;
341 *crp
= *crp
^ (1 << off
);
345 ipd_ksbump(&ins
->ipdn_ksdata
.ink_ncorrupts
);
352 * Sets up and registers all the proper hooks needed for the netstack to capture
353 * packets. Callers are assumed to already be holding the ipd_netstack_t's lock.
354 * If there is a failure in setting something up, it is the responsibility of
355 * this function to clean it up. Once this function has been called, it should
356 * not be called until a corresponding call to tear down the hooks has been
360 ipd_setup_hooks(ipd_netstack_t
*ins
)
362 ASSERT(MUTEX_HELD(&ins
->ipdn_lock
));
363 ins
->ipdn_v4hdl
= net_protocol_lookup(ins
->ipdn_netid
, NHF_INET
);
364 if (ins
->ipdn_v4hdl
== NULL
)
367 ins
->ipdn_v6hdl
= net_protocol_lookup(ins
->ipdn_netid
, NHF_INET6
);
368 if (ins
->ipdn_v6hdl
== NULL
)
371 ins
->ipdn_v4in
= hook_alloc(HOOK_VERSION
);
372 if (ins
->ipdn_v4in
== NULL
)
375 ins
->ipdn_v4in
->h_flags
= 0;
376 ins
->ipdn_v4in
->h_hint
= HH_NONE
;
377 ins
->ipdn_v4in
->h_hintvalue
= 0;
378 ins
->ipdn_v4in
->h_func
= ipd_hook
;
379 ins
->ipdn_v4in
->h_arg
= ins
;
380 ins
->ipdn_v4in
->h_name
= "ipd IPv4 in";
382 if (net_hook_register(ins
->ipdn_v4hdl
, NH_PHYSICAL_IN
,
383 ins
->ipdn_v4in
) != 0)
385 ins
->ipdn_hooked
|= IPDN_HOOK_V4IN
;
387 ins
->ipdn_v4out
= hook_alloc(HOOK_VERSION
);
388 if (ins
->ipdn_v4out
== NULL
)
390 ins
->ipdn_v4out
->h_flags
= 0;
391 ins
->ipdn_v4out
->h_hint
= HH_NONE
;
392 ins
->ipdn_v4out
->h_hintvalue
= 0;
393 ins
->ipdn_v4out
->h_func
= ipd_hook
;
394 ins
->ipdn_v4out
->h_arg
= ins
;
395 ins
->ipdn_v4out
->h_name
= "ipd IPv4 out";
397 if (net_hook_register(ins
->ipdn_v4hdl
, NH_PHYSICAL_OUT
,
398 ins
->ipdn_v4out
) != 0)
400 ins
->ipdn_hooked
|= IPDN_HOOK_V4OUT
;
402 ins
->ipdn_v6in
= hook_alloc(HOOK_VERSION
);
403 if (ins
->ipdn_v6in
== NULL
)
405 ins
->ipdn_v6in
->h_flags
= 0;
406 ins
->ipdn_v6in
->h_hint
= HH_NONE
;
407 ins
->ipdn_v6in
->h_hintvalue
= 0;
408 ins
->ipdn_v6in
->h_func
= ipd_hook
;
409 ins
->ipdn_v6in
->h_arg
= ins
;
410 ins
->ipdn_v6in
->h_name
= "ipd IPv6 in";
412 if (net_hook_register(ins
->ipdn_v6hdl
, NH_PHYSICAL_IN
,
413 ins
->ipdn_v6in
) != 0)
415 ins
->ipdn_hooked
|= IPDN_HOOK_V6IN
;
417 ins
->ipdn_v6out
= hook_alloc(HOOK_VERSION
);
418 if (ins
->ipdn_v6out
== NULL
)
420 ins
->ipdn_v6out
->h_flags
= 0;
421 ins
->ipdn_v6out
->h_hint
= HH_NONE
;
422 ins
->ipdn_v6out
->h_hintvalue
= 0;
423 ins
->ipdn_v6out
->h_func
= ipd_hook
;
424 ins
->ipdn_v6out
->h_arg
= ins
;
425 ins
->ipdn_v6out
->h_name
= "ipd IPv6 out";
427 if (net_hook_register(ins
->ipdn_v6hdl
, NH_PHYSICAL_OUT
,
428 ins
->ipdn_v6out
) != 0)
430 ins
->ipdn_hooked
|= IPDN_HOOK_V6OUT
;
431 mutex_enter(&ipd_nactive_lock
);
433 mutex_exit(&ipd_nactive_lock
);
438 if (ins
->ipdn_hooked
& IPDN_HOOK_V6OUT
)
439 (void) net_hook_unregister(ins
->ipdn_v6hdl
, NH_PHYSICAL_OUT
,
442 if (ins
->ipdn_hooked
& IPDN_HOOK_V6IN
)
443 (void) net_hook_unregister(ins
->ipdn_v6hdl
, NH_PHYSICAL_IN
,
446 if (ins
->ipdn_hooked
& IPDN_HOOK_V4OUT
)
447 (void) net_hook_unregister(ins
->ipdn_v4hdl
, NH_PHYSICAL_OUT
,
450 if (ins
->ipdn_hooked
& IPDN_HOOK_V4IN
)
451 (void) net_hook_unregister(ins
->ipdn_v4hdl
, NH_PHYSICAL_IN
,
454 ins
->ipdn_hooked
= IPDN_HOOK_NONE
;
456 if (ins
->ipdn_v6out
!= NULL
)
457 hook_free(ins
->ipdn_v6out
);
459 if (ins
->ipdn_v6in
!= NULL
)
460 hook_free(ins
->ipdn_v6in
);
462 if (ins
->ipdn_v4out
!= NULL
)
463 hook_free(ins
->ipdn_v4out
);
465 if (ins
->ipdn_v4in
!= NULL
)
466 hook_free(ins
->ipdn_v4in
);
468 if (ins
->ipdn_v6hdl
!= NULL
)
469 (void) net_protocol_release(ins
->ipdn_v6hdl
);
471 if (ins
->ipdn_v4hdl
!= NULL
)
472 (void) net_protocol_release(ins
->ipdn_v4hdl
);
478 ipd_teardown_hooks(ipd_netstack_t
*ins
)
480 ASSERT(ins
->ipdn_hooked
== IPDN_HOOK_ALL
);
481 VERIFY(net_hook_unregister(ins
->ipdn_v6hdl
, NH_PHYSICAL_OUT
,
482 ins
->ipdn_v6out
) == 0);
483 VERIFY(net_hook_unregister(ins
->ipdn_v6hdl
, NH_PHYSICAL_IN
,
484 ins
->ipdn_v6in
) == 0);
485 VERIFY(net_hook_unregister(ins
->ipdn_v4hdl
, NH_PHYSICAL_OUT
,
486 ins
->ipdn_v4out
) == 0);
487 VERIFY(net_hook_unregister(ins
->ipdn_v4hdl
, NH_PHYSICAL_IN
,
488 ins
->ipdn_v4in
) == 0);
490 ins
->ipdn_hooked
= IPDN_HOOK_NONE
;
492 hook_free(ins
->ipdn_v6out
);
493 hook_free(ins
->ipdn_v6in
);
494 hook_free(ins
->ipdn_v4out
);
495 hook_free(ins
->ipdn_v4in
);
497 VERIFY(net_protocol_release(ins
->ipdn_v6hdl
) == 0);
498 VERIFY(net_protocol_release(ins
->ipdn_v4hdl
) == 0);
500 mutex_enter(&ipd_nactive_lock
);
502 mutex_exit(&ipd_nactive_lock
);
506 ipd_check_hooks(ipd_netstack_t
*ins
, int type
, boolean_t enable
)
509 olden
= ins
->ipdn_enabled
;
512 ins
->ipdn_enabled
|= type
;
514 ins
->ipdn_enabled
&= ~type
;
517 * If hooks were previously enabled.
519 if (olden
== 0 && ins
->ipdn_enabled
!= 0) {
520 rval
= ipd_setup_hooks(ins
);
522 ins
->ipdn_enabled
&= ~type
;
523 ASSERT(ins
->ipdn_enabled
== 0);
530 if (olden
!= 0 && ins
->ipdn_enabled
== 0) {
534 * We have to drop the lock here, lest we cause a deadlock.
535 * Unfortunately, there may be hooks that are running and are
536 * actively in flight and we have to call the unregister
537 * function. Due to the hooks framework, if there is an inflight
538 * hook (most likely right now), and we are holding the
539 * netstack's lock, those hooks will never return. This is
542 * Because we only come into this path holding the list lock, we
543 * know that only way that someone else can come in and get to
544 * this structure is via the hook callbacks which are going to
545 * only be doing reads. They'll also see that everything has
546 * been disabled and return. So while this is unfortunate, it
547 * should be relatively safe.
549 mutex_exit(&ins
->ipdn_lock
);
550 ipd_teardown_hooks(ins
);
551 mutex_enter(&ins
->ipdn_lock
);
556 * Othwerise, nothing should have changed here.
558 ASSERT((olden
== 0) == (ins
->ipdn_enabled
== 0));
563 ipd_toggle_corrupt(ipd_netstack_t
*ins
, int percent
)
567 ASSERT(MUTEX_HELD(&ins
->ipdn_lock
));
569 if (percent
< 0 || percent
> 100)
573 * If we've been asked to set the value to a value that we already have,
574 * great, then we're done.
576 if (percent
== ins
->ipdn_corrupt
)
579 ins
->ipdn_corrupt
= percent
;
580 rval
= ipd_check_hooks(ins
, IPD_CORRUPT
, percent
!= 0);
583 * If ipd_check_hooks_failed, that must mean that we failed to set up
584 * the hooks, so we are going to effectively zero out and fail the
585 * request to enable corruption.
588 ins
->ipdn_corrupt
= 0;
594 ipd_toggle_delay(ipd_netstack_t
*ins
, uint32_t delay
)
598 ASSERT(MUTEX_HELD(&ins
->ipdn_lock
));
600 if (delay
> ipd_max_delay
)
604 * If we've been asked to set the value to a value that we already have,
605 * great, then we're done.
607 if (delay
== ins
->ipdn_delay
)
610 ins
->ipdn_delay
= delay
;
611 rval
= ipd_check_hooks(ins
, IPD_DELAY
, delay
!= 0);
614 * If ipd_check_hooks_failed, that must mean that we failed to set up
615 * the hooks, so we are going to effectively zero out and fail the
616 * request to enable corruption.
624 ipd_toggle_drop(ipd_netstack_t
*ins
, int percent
)
628 ASSERT(MUTEX_HELD(&ins
->ipdn_lock
));
630 if (percent
< 0 || percent
> 100)
634 * If we've been asked to set the value to a value that we already have,
635 * great, then we're done.
637 if (percent
== ins
->ipdn_drop
)
640 ins
->ipdn_drop
= percent
;
641 rval
= ipd_check_hooks(ins
, IPD_DROP
, percent
!= 0);
644 * If ipd_check_hooks_failed, that must mean that we failed to set up
645 * the hooks, so we are going to effectively zero out and fail the
646 * request to enable corruption.
655 ipd_ioctl_perturb(ipd_ioc_perturb_t
*ipi
, cred_t
*cr
, intptr_t cmd
)
662 * If the zone that we're coming from is not the GZ, then we ignore it
663 * completely and then instead just set the zoneid to be that of the
664 * caller. If the zoneid is that of the GZ, then we don't touch this
667 zid
= crgetzoneid(cr
);
668 if (zid
!= GLOBAL_ZONEID
)
669 ipi
->ipip_zoneid
= zid
;
671 if (zoneid_to_netstackid(ipi
->ipip_zoneid
) == GLOBAL_NETSTACKID
&&
672 zid
!= GLOBAL_ZONEID
)
676 * We need to hold the ipd_nsl_lock throughout the entire operation,
677 * otherwise someone else could come in and remove us from the list and
678 * free us, e.g. the netstack destroy handler. By holding the lock, we
679 * stop it from being able to do anything wrong.
681 mutex_enter(&ipd_nsl_lock
);
682 for (ins
= list_head(&ipd_nsl
); ins
!= NULL
;
683 ins
= list_next(&ipd_nsl
, ins
)) {
684 if (ins
->ipdn_zoneid
== ipi
->ipip_zoneid
)
689 mutex_exit(&ipd_nsl_lock
);
693 mutex_enter(&ins
->ipdn_lock
);
695 if (ins
->ipdn_status
& IPDN_STATUS_CONDEMNED
) {
702 rval
= ipd_toggle_corrupt(ins
, ipi
->ipip_arg
);
705 rval
= ipd_toggle_delay(ins
, ipi
->ipip_arg
);
708 rval
= ipd_toggle_drop(ins
, ipi
->ipip_arg
);
713 mutex_exit(&ins
->ipdn_lock
);
714 mutex_exit(&ipd_nsl_lock
);
719 ipd_ioctl_remove(ipd_ioc_perturb_t
*ipi
, cred_t
*cr
)
726 * See ipd_ioctl_perturb for the rational here.
728 zid
= crgetzoneid(cr
);
729 if (zid
!= GLOBAL_ZONEID
)
730 ipi
->ipip_zoneid
= zid
;
732 if (zoneid_to_netstackid(ipi
->ipip_zoneid
) == GLOBAL_NETSTACKID
&&
733 zid
!= GLOBAL_ZONEID
)
736 mutex_enter(&ipd_nsl_lock
);
737 for (ins
= list_head(&ipd_nsl
); ins
!= NULL
;
738 ins
= list_next(&ipd_nsl
, ins
)) {
739 if (ins
->ipdn_zoneid
== ipi
->ipip_zoneid
)
744 mutex_exit(&ipd_nsl_lock
);
748 mutex_enter(&ins
->ipdn_lock
);
751 * If this is condemned, that means it's very shortly going to be torn
752 * down. In that case, there's no reason to actually do anything here,
753 * as it will all be done rather shortly in the destroy function.
754 * Furthermore, because condemned corresponds with it having hit
755 * shutdown, we know that no more packets can be received by this
756 * netstack. All this translates to a no-op.
758 if (ins
->ipdn_status
& IPDN_STATUS_CONDEMNED
) {
765 * Go through and disable the requested pieces. We can safely ignore the
766 * return value of ipd_check_hooks because the removal case should never
767 * fail, we verify that in the hook teardown case.
769 if (ipi
->ipip_arg
& IPD_CORRUPT
) {
770 ins
->ipdn_corrupt
= 0;
771 (void) ipd_check_hooks(ins
, IPD_CORRUPT
, B_FALSE
);
775 if (ipi
->ipip_arg
& IPD_DELAY
) {
777 (void) ipd_check_hooks(ins
, IPD_DELAY
, B_FALSE
);
781 if (ipi
->ipip_arg
& IPD_DROP
) {
783 (void) ipd_check_hooks(ins
, IPD_DROP
, B_FALSE
);
788 mutex_exit(&ins
->ipdn_lock
);
789 mutex_exit(&ipd_nsl_lock
);
794 * When this function is called, the value of the ipil_nzones argument controls
795 * how this function works. When called with a value of zero, then we treat that
796 * as the caller asking us what's a reasonable number of entries for me to
797 * allocate memory for. If the zone is the global zone, then we tell them how
798 * many folks are currently active and add a fudge factor. Otherwise the answer
801 * In the non-zero case, we give them that number of zone ids. While this isn't
802 * quite ideal as it might mean that someone misses something, this generally
803 * won't be an issue, as it involves a rather tight race condition in the
804 * current ipdadm implementation.
807 ipd_ioctl_list(intptr_t arg
, cred_t
*cr
)
810 ipd_ioc_info_t
*configs
;
812 uint_t azones
, rzones
, nzones
, cur
;
814 STRUCT_DECL(ipd_ioc_list
, h
);
816 STRUCT_INIT(h
, get_udatamodel());
817 if (ddi_copyin((void *)arg
, STRUCT_BUF(h
),
818 STRUCT_SIZE(h
), 0) != 0)
821 zid
= crgetzoneid(cr
);
823 rzones
= STRUCT_FGET(h
, ipil_nzones
);
825 if (zid
== GLOBAL_ZONEID
) {
826 mutex_enter(&ipd_nactive_lock
);
827 rzones
= ipd_nactive
+ ipd_nactive_fudge
;
828 mutex_exit(&ipd_nactive_lock
);
832 STRUCT_FSET(h
, ipil_nzones
, rzones
);
833 if (ddi_copyout(STRUCT_BUF(h
), (void *)arg
,
834 STRUCT_SIZE(h
), 0) != 0)
840 mutex_enter(&ipd_nsl_lock
);
841 if (zid
== GLOBAL_ZONEID
) {
842 azones
= ipd_nactive
;
847 configs
= kmem_alloc(sizeof (ipd_ioc_info_t
) * azones
, KM_SLEEP
);
849 for (ins
= list_head(&ipd_nsl
); ins
!= NULL
;
850 ins
= list_next(&ipd_nsl
, ins
)) {
851 if (ins
->ipdn_enabled
== 0)
854 ASSERT(cur
< azones
);
856 if (zid
== GLOBAL_ZONEID
|| zid
== ins
->ipdn_zoneid
) {
857 configs
[cur
].ipii_zoneid
= ins
->ipdn_zoneid
;
859 mutex_enter(&ins
->ipdn_lock
);
860 configs
[cur
].ipii_corrupt
= ins
->ipdn_corrupt
;
861 configs
[cur
].ipii_delay
= ins
->ipdn_delay
;
862 configs
[cur
].ipii_drop
= ins
->ipdn_drop
;
863 mutex_exit(&ins
->ipdn_lock
);
868 if (zid
!= GLOBAL_ZONEID
&& zid
== ins
->ipdn_zoneid
)
871 mutex_exit(&ipd_nsl_lock
);
873 ASSERT(zid
!= GLOBAL_ZONEID
|| cur
== azones
);
876 STRUCT_FSET(h
, ipil_nzones
, 0);
878 STRUCT_FSET(h
, ipil_nzones
, cur
);
880 nzones
= MIN(cur
, rzones
);
882 if (ddi_copyout(configs
, STRUCT_FGETP(h
, ipil_info
),
883 nzones
* sizeof (ipd_ioc_info_t
), 0) != 0)
887 kmem_free(configs
, sizeof (ipd_ioc_info_t
) * azones
);
888 if (ddi_copyout(STRUCT_BUF(h
), (void *)arg
, STRUCT_SIZE(h
), 0) != 0)
895 ipd_nin_create(const netid_t id
)
900 ins
= kmem_zalloc(sizeof (ipd_netstack_t
), KM_SLEEP
);
901 ins
->ipdn_status
= IPDN_STATUS_DISABLED
;
902 ins
->ipdn_netid
= id
;
903 ins
->ipdn_zoneid
= netstackid_to_zoneid(id
);
904 ins
->ipdn_rand
= gethrtime();
905 mutex_init(&ins
->ipdn_lock
, NULL
, MUTEX_DRIVER
, NULL
);
907 ins
->ipdn_kstat
= net_kstat_create(id
, "ipd", ins
->ipdn_zoneid
,
908 "ipd", "net", KSTAT_TYPE_NAMED
,
909 sizeof (ipd_nskstat_t
) / sizeof (kstat_named_t
),
912 if (ins
->ipdn_kstat
!= NULL
) {
913 if (ins
->ipdn_zoneid
!= GLOBAL_ZONEID
)
914 kstat_zone_add(ins
->ipdn_kstat
, GLOBAL_ZONEID
);
916 ink
= &ins
->ipdn_ksdata
;
917 ins
->ipdn_kstat
->ks_data
= ink
;
918 kstat_named_init(&ink
->ink_ncorrupts
, "corrupts",
920 kstat_named_init(&ink
->ink_ndrops
, "drops", KSTAT_DATA_UINT64
);
921 kstat_named_init(&ink
->ink_ndelays
, "delays",
923 kstat_install(ins
->ipdn_kstat
);
926 mutex_enter(&ipd_nsl_lock
);
927 list_insert_tail(&ipd_nsl
, ins
);
928 mutex_exit(&ipd_nsl_lock
);
934 ipd_nin_shutdown(const netid_t id
, void *arg
)
936 ipd_netstack_t
*ins
= arg
;
938 VERIFY(id
== ins
->ipdn_netid
);
939 mutex_enter(&ins
->ipdn_lock
);
940 ASSERT(ins
->ipdn_status
== IPDN_STATUS_DISABLED
||
941 ins
->ipdn_status
== IPDN_STATUS_ENABLED
);
942 ins
->ipdn_status
|= IPDN_STATUS_CONDEMNED
;
943 if (ins
->ipdn_kstat
!= NULL
)
944 net_kstat_delete(id
, ins
->ipdn_kstat
);
945 mutex_exit(&ins
->ipdn_lock
);
950 ipd_nin_destroy(const netid_t id
, void *arg
)
952 ipd_netstack_t
*ins
= arg
;
955 * At this point none of the hooks should be able to fire because the
956 * zone has been shutdown and we are in the process of destroying it.
957 * Thus it should not be possible for someone else to come in and grab
958 * our ipd_netstack_t for this zone. Because of that, we know that we
959 * are the only ones who could be running here.
961 mutex_enter(&ipd_nsl_lock
);
962 list_remove(&ipd_nsl
, ins
);
963 mutex_exit(&ipd_nsl_lock
);
965 if (ins
->ipdn_hooked
)
966 ipd_teardown_hooks(ins
);
967 mutex_destroy(&ins
->ipdn_lock
);
968 kmem_free(ins
, sizeof (ipd_netstack_t
));
973 ipd_open(dev_t
*devp
, int flag
, int otype
, cred_t
*credp
)
975 if (flag
& FEXCL
|| flag
& FNDELAY
)
978 if (otype
!= OTYP_CHR
)
981 if (!(flag
& FREAD
&& flag
& FWRITE
))
984 if (secpolicy_ip_config(credp
, B_FALSE
) != 0)
992 ipd_ioctl(dev_t dev
, int cmd
, intptr_t arg
, int md
, cred_t
*cr
, int *rv
)
995 ipd_ioc_perturb_t ipip
;
1001 if (ddi_copyin((void *)arg
, &ipip
, sizeof (ipd_ioc_perturb_t
),
1004 rval
= ipd_ioctl_perturb(&ipip
, cr
, cmd
);
1007 if (ddi_copyin((void *)arg
, &ipip
, sizeof (ipd_ioc_perturb_t
),
1010 rval
= ipd_ioctl_remove(&ipip
, cr
);
1014 * Because the list ioctl doesn't have a fixed-size struct due
1015 * to needing to pass around a pointer, we instead delegate the
1016 * copyin logic to the list code.
1018 return (ipd_ioctl_list(arg
, cr
));
1027 ipd_close(dev_t dev
, int flag
, int otype
, cred_t
*credp
)
1033 ipd_attach(dev_info_t
*dip
, ddi_attach_cmd_t cmd
)
1037 if (cmd
!= DDI_ATTACH
)
1038 return (DDI_FAILURE
);
1040 if (ipd_devi
!= NULL
)
1041 return (DDI_FAILURE
);
1043 instance
= ddi_get_instance(dip
);
1044 if (ddi_create_minor_node(dip
, "ipd", S_IFCHR
, instance
,
1045 DDI_PSEUDO
, 0) == DDI_FAILURE
)
1046 return (DDI_FAILURE
);
1048 ipd_neti
= net_instance_alloc(NETINFO_VERSION
);
1049 if (ipd_neti
== NULL
) {
1050 ddi_remove_minor_node(dip
, NULL
);
1051 return (DDI_FAILURE
);
1055 * Note that these global structures MUST be initialized before we call
1056 * net_instance_register, as that will instantly cause us to drive into
1057 * the ipd_nin_create callbacks.
1059 list_create(&ipd_nsl
, sizeof (ipd_netstack_t
),
1060 offsetof(ipd_netstack_t
, ipdn_link
));
1061 mutex_init(&ipd_nsl_lock
, NULL
, MUTEX_DRIVER
, NULL
);
1062 mutex_init(&ipd_nactive_lock
, NULL
, MUTEX_DRIVER
, NULL
);
1064 /* Note, net_instance_alloc sets the version. */
1065 ipd_neti
->nin_name
= "ipd";
1066 ipd_neti
->nin_create
= ipd_nin_create
;
1067 ipd_neti
->nin_destroy
= ipd_nin_destroy
;
1068 ipd_neti
->nin_shutdown
= ipd_nin_shutdown
;
1069 if (net_instance_register(ipd_neti
) == DDI_FAILURE
) {
1070 net_instance_free(ipd_neti
);
1071 ddi_remove_minor_node(dip
, NULL
);
1074 ddi_report_dev(dip
);
1077 return (DDI_SUCCESS
);
1082 ipd_getinfo(dev_info_t
*dip
, ddi_info_cmd_t infocmd
, void *arg
, void **result
)
1087 case DDI_INFO_DEVT2DEVINFO
:
1089 error
= DDI_SUCCESS
;
1091 case DDI_INFO_DEVT2INSTANCE
:
1092 *result
= (void *)(uintptr_t)getminor((dev_t
)arg
);
1093 error
= DDI_SUCCESS
;
1096 error
= DDI_FAILURE
;
1104 ipd_detach(dev_info_t
*dip
, ddi_detach_cmd_t cmd
)
1106 if (cmd
!= DDI_DETACH
)
1107 return (DDI_FAILURE
);
1109 mutex_enter(&ipd_nactive_lock
);
1110 if (ipd_nactive
> 0) {
1111 mutex_exit(&ipd_nactive_lock
);
1114 mutex_exit(&ipd_nactive_lock
);
1115 ASSERT(dip
== ipd_devi
);
1116 ddi_remove_minor_node(dip
, NULL
);
1119 if (ipd_neti
!= NULL
) {
1120 VERIFY(net_instance_unregister(ipd_neti
) == 0);
1121 net_instance_free(ipd_neti
);
1124 mutex_destroy(&ipd_nsl_lock
);
1125 mutex_destroy(&ipd_nactive_lock
);
1126 list_destroy(&ipd_nsl
);
1128 return (DDI_SUCCESS
);
1131 static struct cb_ops ipd_cb_ops
= {
1132 ipd_open
, /* open */
1133 ipd_close
, /* close */
1134 nodev
, /* strategy */
1139 ipd_ioctl
, /* ioctl */
1143 nochpoll
, /* poll */
1144 ddi_prop_op
, /* cb_prop_op */
1145 NULL
, /* streamtab */
1146 D_NEW
| D_MP
, /* Driver compatibility flag */
1152 static struct dev_ops ipd_ops
= {
1153 DEVO_REV
, /* devo_rev */
1155 ipd_getinfo
, /* get_dev_info */
1156 nulldev
, /* identify */
1157 nulldev
, /* probe */
1158 ipd_attach
, /* attach */
1159 ipd_detach
, /* detach */
1161 &ipd_cb_ops
, /* driver operations */
1162 NULL
, /* bus operations */
1163 nodev
, /* dev power */
1164 ddi_quiesce_not_needed
/* quiesce */
1167 static struct modldrv modldrv
= {
1169 "Internet packet disturber",
1173 static struct modlinkage modlinkage
= {
1175 { (void *)&modldrv
, NULL
}
1181 return (mod_install(&modlinkage
));
1185 _info(struct modinfo
*modinfop
)
1187 return (mod_info(&modlinkage
, modinfop
));
1193 return (mod_remove(&modlinkage
));