4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2011, Joyent, Inc. All rights reserved.
28 * DTrace - Dynamic Tracing for Solaris
30 * This is the implementation of the Solaris Dynamic Tracing framework
31 * (DTrace). The user-visible interface to DTrace is described at length in
32 * the "Solaris Dynamic Tracing Guide". The interfaces between the libdtrace
33 * library, the in-kernel DTrace framework, and the DTrace providers are
34 * described in the block comments in the <sys/dtrace.h> header file. The
35 * internal architecture of DTrace is described in the block comments in the
36 * <sys/dtrace_impl.h> header file. The comments contained within the DTrace
37 * implementation very much assume mastery of all of these sources; if one has
38 * an unanswered question about the implementation, one should consult them
41 * The functions here are ordered roughly as follows:
43 * - Probe context functions
44 * - Probe hashing functions
45 * - Non-probe context utility functions
46 * - Matching functions
47 * - Provider-to-Framework API functions
48 * - Probe management functions
49 * - DIF object functions
51 * - Predicate functions
54 * - Enabling functions
56 * - Anonymous enabling functions
57 * - Consumer state functions
60 * - Driver cookbook functions
62 * Each group of functions begins with a block comment labelled the "DTrace
63 * [Group] Functions", allowing one to find each block by searching forward
64 * on capital-f functions.
66 #include <sys/errno.h>
68 #include <sys/modctl.h>
70 #include <sys/systm.h>
72 #include <sys/sunddi.h>
73 #include <sys/cpuvar.h>
75 #include <sys/strsubr.h>
76 #include <sys/sysmacros.h>
77 #include <sys/dtrace_impl.h>
78 #include <sys/atomic.h>
79 #include <sys/cmn_err.h>
80 #include <sys/mutex_impl.h>
81 #include <sys/rwlock_impl.h>
82 #include <sys/ctf_api.h>
83 #include <sys/panic.h>
84 #include <sys/priv_impl.h>
85 #include <sys/policy.h>
86 #include <sys/cred_impl.h>
87 #include <sys/procfs_isa.h>
88 #include <sys/taskq.h>
89 #include <sys/mkdev.h>
92 #include <sys/socket.h>
93 #include <netinet/in.h>
96 * DTrace Tunable Variables
98 * The following variables may be tuned by adding a line to /etc/system that
99 * includes both the name of the DTrace module ("dtrace") and the name of the
100 * variable. For example:
102 * set dtrace:dtrace_destructive_disallow = 1
104 * In general, the only variables that one should be tuning this way are those
105 * that affect system-wide DTrace behavior, and for which the default behavior
106 * is undesirable. Most of these variables are tunable on a per-consumer
107 * basis using DTrace options, and need not be tuned on a system-wide basis.
108 * When tuning these variables, avoid pathological values; while some attempt
109 * is made to verify the integrity of these variables, they are not considered
110 * part of the supported interface to DTrace, and they are therefore not
111 * checked comprehensively. Further, these variables should not be tuned
112 * dynamically via "mdb -kw" or other means; they should only be tuned via
115 int dtrace_destructive_disallow
= 0;
116 dtrace_optval_t dtrace_nonroot_maxsize
= (16 * 1024 * 1024);
117 size_t dtrace_difo_maxsize
= (256 * 1024);
118 dtrace_optval_t dtrace_dof_maxsize
= (256 * 1024);
119 size_t dtrace_global_maxsize
= (16 * 1024);
120 size_t dtrace_actions_max
= (16 * 1024);
121 size_t dtrace_retain_max
= 1024;
122 dtrace_optval_t dtrace_helper_actions_max
= 32;
123 dtrace_optval_t dtrace_helper_providers_max
= 32;
124 dtrace_optval_t dtrace_dstate_defsize
= (1 * 1024 * 1024);
125 size_t dtrace_strsize_default
= 256;
126 dtrace_optval_t dtrace_cleanrate_default
= 9900990; /* 101 hz */
127 dtrace_optval_t dtrace_cleanrate_min
= 200000; /* 5000 hz */
128 dtrace_optval_t dtrace_cleanrate_max
= (uint64_t)60 * NANOSEC
; /* 1/minute */
129 dtrace_optval_t dtrace_aggrate_default
= NANOSEC
; /* 1 hz */
130 dtrace_optval_t dtrace_statusrate_default
= NANOSEC
; /* 1 hz */
131 dtrace_optval_t dtrace_statusrate_max
= (hrtime_t
)10 * NANOSEC
; /* 6/minute */
132 dtrace_optval_t dtrace_switchrate_default
= NANOSEC
; /* 1 hz */
133 dtrace_optval_t dtrace_nspec_default
= 1;
134 dtrace_optval_t dtrace_specsize_default
= 32 * 1024;
135 dtrace_optval_t dtrace_stackframes_default
= 20;
136 dtrace_optval_t dtrace_ustackframes_default
= 20;
137 dtrace_optval_t dtrace_jstackframes_default
= 50;
138 dtrace_optval_t dtrace_jstackstrsize_default
= 512;
139 int dtrace_msgdsize_max
= 128;
140 hrtime_t dtrace_chill_max
= 500 * (NANOSEC
/ MILLISEC
); /* 500 ms */
141 hrtime_t dtrace_chill_interval
= NANOSEC
; /* 1000 ms */
142 int dtrace_devdepth_max
= 32;
143 int dtrace_err_verbose
;
144 hrtime_t dtrace_deadman_interval
= NANOSEC
;
145 hrtime_t dtrace_deadman_timeout
= (hrtime_t
)10 * NANOSEC
;
146 hrtime_t dtrace_deadman_user
= (hrtime_t
)30 * NANOSEC
;
147 hrtime_t dtrace_unregister_defunct_reap
= (hrtime_t
)60 * NANOSEC
;
150 * DTrace External Variables
152 * As dtrace(7D) is a kernel module, any DTrace variables are obviously
153 * available to DTrace consumers via the backtick (`) syntax. One of these,
154 * dtrace_zero, is made deliberately so: it is provided as a source of
155 * well-known, zero-filled memory. While this variable is not documented,
156 * it is used by some translators as an implementation detail.
158 const char dtrace_zero
[256] = { 0 }; /* zero-filled memory */
161 * DTrace Internal Variables
163 static dev_info_t
*dtrace_devi
; /* device info */
164 static vmem_t
*dtrace_arena
; /* probe ID arena */
165 static vmem_t
*dtrace_minor
; /* minor number arena */
166 static taskq_t
*dtrace_taskq
; /* task queue */
167 static dtrace_probe_t
**dtrace_probes
; /* array of all probes */
168 static int dtrace_nprobes
; /* number of probes */
169 static dtrace_provider_t
*dtrace_provider
; /* provider list */
170 static dtrace_meta_t
*dtrace_meta_pid
; /* user-land meta provider */
171 static int dtrace_opens
; /* number of opens */
172 static int dtrace_helpers
; /* number of helpers */
173 static void *dtrace_softstate
; /* softstate pointer */
174 static dtrace_hash_t
*dtrace_bymod
; /* probes hashed by module */
175 static dtrace_hash_t
*dtrace_byfunc
; /* probes hashed by function */
176 static dtrace_hash_t
*dtrace_byname
; /* probes hashed by name */
177 static dtrace_toxrange_t
*dtrace_toxrange
; /* toxic range array */
178 static int dtrace_toxranges
; /* number of toxic ranges */
179 static int dtrace_toxranges_max
; /* size of toxic range array */
180 static dtrace_anon_t dtrace_anon
; /* anonymous enabling */
181 static kmem_cache_t
*dtrace_state_cache
; /* cache for dynamic state */
182 static uint64_t dtrace_vtime_references
; /* number of vtimestamp refs */
183 static kthread_t
*dtrace_panicked
; /* panicking thread */
184 static dtrace_ecb_t
*dtrace_ecb_create_cache
; /* cached created ECB */
185 static dtrace_genid_t dtrace_probegen
; /* current probe generation */
186 static dtrace_helpers_t
*dtrace_deferred_pid
; /* deferred helper list */
187 static dtrace_enabling_t
*dtrace_retained
; /* list of retained enablings */
188 static dtrace_genid_t dtrace_retained_gen
; /* current retained enab gen */
189 static dtrace_dynvar_t dtrace_dynhash_sink
; /* end of dynamic hash chains */
190 static int dtrace_dynvar_failclean
; /* dynvars failed to clean */
194 * DTrace is protected by three (relatively coarse-grained) locks:
196 * (1) dtrace_lock is required to manipulate essentially any DTrace state,
197 * including enabling state, probes, ECBs, consumer state, helper state,
198 * etc. Importantly, dtrace_lock is _not_ required when in probe context;
199 * probe context is lock-free -- synchronization is handled via the
200 * dtrace_sync() cross call mechanism.
202 * (2) dtrace_provider_lock is required when manipulating provider state, or
203 * when provider state must be held constant.
205 * (3) dtrace_meta_lock is required when manipulating meta provider state, or
206 * when meta provider state must be held constant.
208 * The lock ordering between these three locks is dtrace_meta_lock before
209 * dtrace_provider_lock before dtrace_lock. (In particular, there are
210 * several places where dtrace_provider_lock is held by the framework as it
211 * calls into the providers -- which then call back into the framework,
212 * grabbing dtrace_lock.)
214 * There are two other locks in the mix: mod_lock and cpu_lock. With respect
215 * to dtrace_provider_lock and dtrace_lock, cpu_lock continues its historical
216 * role as a coarse-grained lock; it is acquired before both of these locks.
217 * With respect to dtrace_meta_lock, its behavior is stranger: cpu_lock must
218 * be acquired _between_ dtrace_meta_lock and any other DTrace locks.
219 * mod_lock is similar with respect to dtrace_provider_lock in that it must be
220 * acquired _between_ dtrace_provider_lock and dtrace_lock.
222 static kmutex_t dtrace_lock
; /* probe state lock */
223 static kmutex_t dtrace_provider_lock
; /* provider state lock */
224 static kmutex_t dtrace_meta_lock
; /* meta-provider state lock */
227 * DTrace Provider Variables
229 * These are the variables relating to DTrace as a provider (that is, the
230 * provider of the BEGIN, END, and ERROR probes).
232 static dtrace_pattr_t dtrace_provider_attr
= {
233 { DTRACE_STABILITY_STABLE
, DTRACE_STABILITY_STABLE
, DTRACE_CLASS_COMMON
},
234 { DTRACE_STABILITY_PRIVATE
, DTRACE_STABILITY_PRIVATE
, DTRACE_CLASS_UNKNOWN
},
235 { DTRACE_STABILITY_PRIVATE
, DTRACE_STABILITY_PRIVATE
, DTRACE_CLASS_UNKNOWN
},
236 { DTRACE_STABILITY_STABLE
, DTRACE_STABILITY_STABLE
, DTRACE_CLASS_COMMON
},
237 { DTRACE_STABILITY_STABLE
, DTRACE_STABILITY_STABLE
, DTRACE_CLASS_COMMON
},
245 dtrace_enable_nullop(void)
250 static dtrace_pops_t dtrace_provider_ops
= {
251 (void (*)(void *, const dtrace_probedesc_t
*))dtrace_nullop
,
252 (void (*)(void *, struct modctl
*))dtrace_nullop
,
253 (int (*)(void *, dtrace_id_t
, void *))dtrace_enable_nullop
,
254 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
,
255 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
,
256 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
,
260 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
263 static dtrace_id_t dtrace_probeid_begin
; /* special BEGIN probe */
264 static dtrace_id_t dtrace_probeid_end
; /* special END probe */
265 dtrace_id_t dtrace_probeid_error
; /* special ERROR probe */
268 * DTrace Helper Tracing Variables
270 uint32_t dtrace_helptrace_next
= 0;
271 uint32_t dtrace_helptrace_nlocals
;
272 char *dtrace_helptrace_buffer
;
273 int dtrace_helptrace_bufsize
= 512 * 1024;
276 int dtrace_helptrace_enabled
= 1;
278 int dtrace_helptrace_enabled
= 0;
282 * DTrace Error Hashing
284 * On DEBUG kernels, DTrace will track the errors that has seen in a hash
285 * table. This is very useful for checking coverage of tests that are
286 * expected to induce DIF or DOF processing errors, and may be useful for
287 * debugging problems in the DIF code generator or in DOF generation . The
288 * error hash may be examined with the ::dtrace_errhash MDB dcmd.
291 static dtrace_errhash_t dtrace_errhash
[DTRACE_ERRHASHSZ
];
292 static const char *dtrace_errlast
;
293 static kthread_t
*dtrace_errthread
;
294 static kmutex_t dtrace_errlock
;
298 * DTrace Macros and Constants
300 * These are various macros that are useful in various spots in the
301 * implementation, along with a few random constants that have no meaning
302 * outside of the implementation. There is no real structure to this cpp
303 * mishmash -- but is there ever?
305 #define DTRACE_HASHSTR(hash, probe) \
306 dtrace_hash_str(*((char **)((uintptr_t)(probe) + (hash)->dth_stroffs)))
308 #define DTRACE_HASHNEXT(hash, probe) \
309 (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_nextoffs)
311 #define DTRACE_HASHPREV(hash, probe) \
312 (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_prevoffs)
314 #define DTRACE_HASHEQ(hash, lhs, rhs) \
315 (strcmp(*((char **)((uintptr_t)(lhs) + (hash)->dth_stroffs)), \
316 *((char **)((uintptr_t)(rhs) + (hash)->dth_stroffs))) == 0)
318 #define DTRACE_AGGHASHSIZE_SLEW 17
320 #define DTRACE_V4MAPPED_OFFSET (sizeof (uint32_t) * 3)
323 * The key for a thread-local variable consists of the lower 61 bits of the
324 * t_did, plus the 3 bits of the highest active interrupt above LOCK_LEVEL.
325 * We add DIF_VARIABLE_MAX to t_did to assure that the thread key is never
326 * equal to a variable identifier. This is necessary (but not sufficient) to
327 * assure that global associative arrays never collide with thread-local
328 * variables. To guarantee that they cannot collide, we must also define the
329 * order for keying dynamic variables. That order is:
331 * [ key0 ] ... [ keyn ] [ variable-key ] [ tls-key ]
333 * Because the variable-key and the tls-key are in orthogonal spaces, there is
334 * no way for a global variable key signature to match a thread-local key
337 #define DTRACE_TLS_THRKEY(where) { \
339 uint_t actv = CPU->cpu_intr_actv >> (LOCK_LEVEL + 1); \
340 for (; actv; actv >>= 1) \
342 ASSERT(intr < (1 << 3)); \
343 (where) = ((curthread->t_did + DIF_VARIABLE_MAX) & \
344 (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \
347 #define DT_BSWAP_8(x) ((x) & 0xff)
348 #define DT_BSWAP_16(x) ((DT_BSWAP_8(x) << 8) | DT_BSWAP_8((x) >> 8))
349 #define DT_BSWAP_32(x) ((DT_BSWAP_16(x) << 16) | DT_BSWAP_16((x) >> 16))
350 #define DT_BSWAP_64(x) ((DT_BSWAP_32(x) << 32) | DT_BSWAP_32((x) >> 32))
352 #define DT_MASK_LO 0x00000000FFFFFFFFULL
354 #define DTRACE_STORE(type, tomax, offset, what) \
355 *((type *)((uintptr_t)(tomax) + (uintptr_t)offset)) = (type)(what);
358 #define DTRACE_ALIGNCHECK(addr, size, flags) \
359 if (addr & (size - 1)) { \
360 *flags |= CPU_DTRACE_BADALIGN; \
361 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \
365 #define DTRACE_ALIGNCHECK(addr, size, flags)
369 * Test whether a range of memory starting at testaddr of size testsz falls
370 * within the range of memory described by addr, sz. We take care to avoid
371 * problems with overflow and underflow of the unsigned quantities, and
372 * disallow all negative sizes. Ranges of size 0 are allowed.
374 #define DTRACE_INRANGE(testaddr, testsz, baseaddr, basesz) \
375 ((testaddr) - (baseaddr) < (basesz) && \
376 (testaddr) + (testsz) - (baseaddr) <= (basesz) && \
377 (testaddr) + (testsz) >= (testaddr))
380 * Test whether alloc_sz bytes will fit in the scratch region. We isolate
381 * alloc_sz on the righthand side of the comparison in order to avoid overflow
382 * or underflow in the comparison with it. This is simpler than the INRANGE
383 * check above, because we know that the dtms_scratch_ptr is valid in the
384 * range. Allocations of size zero are allowed.
386 #define DTRACE_INSCRATCH(mstate, alloc_sz) \
387 ((mstate)->dtms_scratch_base + (mstate)->dtms_scratch_size - \
388 (mstate)->dtms_scratch_ptr >= (alloc_sz))
390 #define DTRACE_LOADFUNC(bits) \
393 dtrace_load##bits(uintptr_t addr) \
395 size_t size = bits / NBBY; \
397 uint##bits##_t rval; \
399 volatile uint16_t *flags = (volatile uint16_t *) \
400 &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; \
402 DTRACE_ALIGNCHECK(addr, size, flags); \
404 for (i = 0; i < dtrace_toxranges; i++) { \
405 if (addr >= dtrace_toxrange[i].dtt_limit) \
408 if (addr + size <= dtrace_toxrange[i].dtt_base) \
412 * This address falls within a toxic region; return 0. \
414 *flags |= CPU_DTRACE_BADADDR; \
415 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \
419 *flags |= CPU_DTRACE_NOFAULT; \
421 rval = *((volatile uint##bits##_t *)addr); \
422 *flags &= ~CPU_DTRACE_NOFAULT; \
424 return (!(*flags & CPU_DTRACE_FAULT) ? rval : 0); \
428 #define dtrace_loadptr dtrace_load64
430 #define dtrace_loadptr dtrace_load32
433 #define DTRACE_DYNHASH_FREE 0
434 #define DTRACE_DYNHASH_SINK 1
435 #define DTRACE_DYNHASH_VALID 2
437 #define DTRACE_MATCH_FAIL -1
438 #define DTRACE_MATCH_NEXT 0
439 #define DTRACE_MATCH_DONE 1
440 #define DTRACE_ANCHORED(probe) ((probe)->dtpr_func[0] != '\0')
441 #define DTRACE_STATE_ALIGN 64
443 #define DTRACE_FLAGS2FLT(flags) \
444 (((flags) & CPU_DTRACE_BADADDR) ? DTRACEFLT_BADADDR : \
445 ((flags) & CPU_DTRACE_ILLOP) ? DTRACEFLT_ILLOP : \
446 ((flags) & CPU_DTRACE_DIVZERO) ? DTRACEFLT_DIVZERO : \
447 ((flags) & CPU_DTRACE_KPRIV) ? DTRACEFLT_KPRIV : \
448 ((flags) & CPU_DTRACE_UPRIV) ? DTRACEFLT_UPRIV : \
449 ((flags) & CPU_DTRACE_TUPOFLOW) ? DTRACEFLT_TUPOFLOW : \
450 ((flags) & CPU_DTRACE_BADALIGN) ? DTRACEFLT_BADALIGN : \
451 ((flags) & CPU_DTRACE_NOSCRATCH) ? DTRACEFLT_NOSCRATCH : \
452 ((flags) & CPU_DTRACE_BADSTACK) ? DTRACEFLT_BADSTACK : \
455 #define DTRACEACT_ISSTRING(act) \
456 ((act)->dta_kind == DTRACEACT_DIFEXPR && \
457 (act)->dta_difo->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING)
459 static size_t dtrace_strlen(const char *, size_t);
460 static dtrace_probe_t
*dtrace_probe_lookup_id(dtrace_id_t id
);
461 static void dtrace_enabling_provide(dtrace_provider_t
*);
462 static int dtrace_enabling_match(dtrace_enabling_t
*, int *);
463 static void dtrace_enabling_matchall(void);
464 static void dtrace_enabling_reap(void);
465 static dtrace_state_t
*dtrace_anon_grab(void);
466 static uint64_t dtrace_helper(int, dtrace_mstate_t
*,
467 dtrace_state_t
*, uint64_t, uint64_t);
468 static dtrace_helpers_t
*dtrace_helpers_create(proc_t
*);
469 static void dtrace_buffer_drop(dtrace_buffer_t
*);
470 static int dtrace_buffer_consumed(dtrace_buffer_t
*, hrtime_t when
);
471 static intptr_t dtrace_buffer_reserve(dtrace_buffer_t
*, size_t, size_t,
472 dtrace_state_t
*, dtrace_mstate_t
*);
473 static int dtrace_state_option(dtrace_state_t
*, dtrace_optid_t
,
475 static int dtrace_ecb_create_enable(dtrace_probe_t
*, void *);
476 static void dtrace_helper_provider_destroy(dtrace_helper_provider_t
*);
479 * DTrace Probe Context Functions
481 * These functions are called from probe context. Because probe context is
482 * any context in which C may be called, arbitrarily locks may be held,
483 * interrupts may be disabled, we may be in arbitrary dispatched state, etc.
484 * As a result, functions called from probe context may only call other DTrace
485 * support functions -- they may not interact at all with the system at large.
486 * (Note that the ASSERT macro is made probe-context safe by redefining it in
487 * terms of dtrace_assfail(), a probe-context safe function.) If arbitrary
488 * loads are to be performed from probe context, they _must_ be in terms of
489 * the safe dtrace_load*() variants.
491 * Some functions in this block are not actually called from probe context;
492 * for these functions, there will be a comment above the function reading
493 * "Note: not called from probe context."
496 dtrace_panic(const char *format
, ...)
500 va_start(alist
, format
);
501 dtrace_vpanic(format
, alist
);
506 dtrace_assfail(const char *a
, const char *f
, int l
)
508 dtrace_panic("assertion failed: %s, file: %s, line: %d", a
, f
, l
);
511 * We just need something here that even the most clever compiler
512 * cannot optimize away.
514 return (a
[(uintptr_t)f
]);
518 * Atomically increment a specified error counter from probe context.
521 dtrace_error(uint32_t *counter
)
524 * Most counters stored to in probe context are per-CPU counters.
525 * However, there are some error conditions that are sufficiently
526 * arcane that they don't merit per-CPU storage. If these counters
527 * are incremented concurrently on different CPUs, scalability will be
528 * adversely affected -- but we don't expect them to be white-hot in a
529 * correctly constructed enabling...
536 if ((nval
= oval
+ 1) == 0) {
538 * If the counter would wrap, set it to 1 -- assuring
539 * that the counter is never zero when we have seen
540 * errors. (The counter must be 32-bits because we
541 * aren't guaranteed a 64-bit compare&swap operation.)
542 * To save this code both the infamy of being fingered
543 * by a priggish news story and the indignity of being
544 * the target of a neo-puritan witch trial, we're
545 * carefully avoiding any colorful description of the
546 * likelihood of this condition -- but suffice it to
547 * say that it is only slightly more likely than the
548 * overflow of predicate cache IDs, as discussed in
549 * dtrace_predicate_create().
553 } while (dtrace_cas32(counter
, oval
, nval
) != oval
);
557 * Use the DTRACE_LOADFUNC macro to define functions for each of loading a
558 * uint8_t, a uint16_t, a uint32_t and a uint64_t.
566 dtrace_inscratch(uintptr_t dest
, size_t size
, dtrace_mstate_t
*mstate
)
568 if (dest
< mstate
->dtms_scratch_base
)
571 if (dest
+ size
< dest
)
574 if (dest
+ size
> mstate
->dtms_scratch_ptr
)
581 dtrace_canstore_statvar(uint64_t addr
, size_t sz
,
582 dtrace_statvar_t
**svars
, int nsvars
)
586 for (i
= 0; i
< nsvars
; i
++) {
587 dtrace_statvar_t
*svar
= svars
[i
];
589 if (svar
== NULL
|| svar
->dtsv_size
== 0)
592 if (DTRACE_INRANGE(addr
, sz
, svar
->dtsv_data
, svar
->dtsv_size
))
600 * Check to see if the address is within a memory region to which a store may
601 * be issued. This includes the DTrace scratch areas, and any DTrace variable
602 * region. The caller of dtrace_canstore() is responsible for performing any
603 * alignment checks that are needed before stores are actually executed.
606 dtrace_canstore(uint64_t addr
, size_t sz
, dtrace_mstate_t
*mstate
,
607 dtrace_vstate_t
*vstate
)
610 * First, check to see if the address is in scratch space...
612 if (DTRACE_INRANGE(addr
, sz
, mstate
->dtms_scratch_base
,
613 mstate
->dtms_scratch_size
))
617 * Now check to see if it's a dynamic variable. This check will pick
618 * up both thread-local variables and any global dynamically-allocated
621 if (DTRACE_INRANGE(addr
, sz
, (uintptr_t)vstate
->dtvs_dynvars
.dtds_base
,
622 vstate
->dtvs_dynvars
.dtds_size
)) {
623 dtrace_dstate_t
*dstate
= &vstate
->dtvs_dynvars
;
624 uintptr_t base
= (uintptr_t)dstate
->dtds_base
+
625 (dstate
->dtds_hashsize
* sizeof (dtrace_dynhash_t
));
629 * Before we assume that we can store here, we need to make
630 * sure that it isn't in our metadata -- storing to our
631 * dynamic variable metadata would corrupt our state. For
632 * the range to not include any dynamic variable metadata,
635 * (1) Start above the hash table that is at the base of
636 * the dynamic variable space
638 * (2) Have a starting chunk offset that is beyond the
639 * dtrace_dynvar_t that is at the base of every chunk
641 * (3) Not span a chunk boundary
647 chunkoffs
= (addr
- base
) % dstate
->dtds_chunksize
;
649 if (chunkoffs
< sizeof (dtrace_dynvar_t
))
652 if (chunkoffs
+ sz
> dstate
->dtds_chunksize
)
659 * Finally, check the static local and global variables. These checks
660 * take the longest, so we perform them last.
662 if (dtrace_canstore_statvar(addr
, sz
,
663 vstate
->dtvs_locals
, vstate
->dtvs_nlocals
))
666 if (dtrace_canstore_statvar(addr
, sz
,
667 vstate
->dtvs_globals
, vstate
->dtvs_nglobals
))
675 * Convenience routine to check to see if the address is within a memory
676 * region in which a load may be issued given the user's privilege level;
677 * if not, it sets the appropriate error flags and loads 'addr' into the
678 * illegal value slot.
680 * DTrace subroutines (DIF_SUBR_*) should use this helper to implement
681 * appropriate memory access protection.
684 dtrace_canload(uint64_t addr
, size_t sz
, dtrace_mstate_t
*mstate
,
685 dtrace_vstate_t
*vstate
)
687 volatile uintptr_t *illval
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
;
690 * If we hold the privilege to read from kernel memory, then
691 * everything is readable.
693 if ((mstate
->dtms_access
& DTRACE_ACCESS_KERNEL
) != 0)
697 * You can obviously read that which you can store.
699 if (dtrace_canstore(addr
, sz
, mstate
, vstate
))
703 * We're allowed to read from our own string table.
705 if (DTRACE_INRANGE(addr
, sz
, (uintptr_t)mstate
->dtms_difo
->dtdo_strtab
,
706 mstate
->dtms_difo
->dtdo_strlen
))
709 DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV
);
715 * Convenience routine to check to see if a given string is within a memory
716 * region in which a load may be issued given the user's privilege level;
717 * this exists so that we don't need to issue unnecessary dtrace_strlen()
718 * calls in the event that the user has all privileges.
721 dtrace_strcanload(uint64_t addr
, size_t sz
, dtrace_mstate_t
*mstate
,
722 dtrace_vstate_t
*vstate
)
727 * If we hold the privilege to read from kernel memory, then
728 * everything is readable.
730 if ((mstate
->dtms_access
& DTRACE_ACCESS_KERNEL
) != 0)
733 strsz
= 1 + dtrace_strlen((char *)(uintptr_t)addr
, sz
);
734 if (dtrace_canload(addr
, strsz
, mstate
, vstate
))
741 * Convenience routine to check to see if a given variable is within a memory
742 * region in which a load may be issued given the user's privilege level.
745 dtrace_vcanload(void *src
, dtrace_diftype_t
*type
, dtrace_mstate_t
*mstate
,
746 dtrace_vstate_t
*vstate
)
749 ASSERT(type
->dtdt_flags
& DIF_TF_BYREF
);
752 * If we hold the privilege to read from kernel memory, then
753 * everything is readable.
755 if ((mstate
->dtms_access
& DTRACE_ACCESS_KERNEL
) != 0)
758 if (type
->dtdt_kind
== DIF_TYPE_STRING
)
759 sz
= dtrace_strlen(src
,
760 vstate
->dtvs_state
->dts_options
[DTRACEOPT_STRSIZE
]) + 1;
762 sz
= type
->dtdt_size
;
764 return (dtrace_canload((uintptr_t)src
, sz
, mstate
, vstate
));
768 * Compare two strings using safe loads.
771 dtrace_strncmp(char *s1
, char *s2
, size_t limit
)
774 volatile uint16_t *flags
;
776 if (s1
== s2
|| limit
== 0)
779 flags
= (volatile uint16_t *)&cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
785 c1
= dtrace_load8((uintptr_t)s1
++);
791 c2
= dtrace_load8((uintptr_t)s2
++);
796 } while (--limit
&& c1
!= '\0' && !(*flags
& CPU_DTRACE_FAULT
));
802 * Compute strlen(s) for a string using safe memory accesses. The additional
803 * len parameter is used to specify a maximum length to ensure completion.
806 dtrace_strlen(const char *s
, size_t lim
)
810 for (len
= 0; len
!= lim
; len
++) {
811 if (dtrace_load8((uintptr_t)s
++) == '\0')
819 * Check if an address falls within a toxic region.
822 dtrace_istoxic(uintptr_t kaddr
, size_t size
)
824 uintptr_t taddr
, tsize
;
827 for (i
= 0; i
< dtrace_toxranges
; i
++) {
828 taddr
= dtrace_toxrange
[i
].dtt_base
;
829 tsize
= dtrace_toxrange
[i
].dtt_limit
- taddr
;
831 if (kaddr
- taddr
< tsize
) {
832 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
833 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= kaddr
;
837 if (taddr
- kaddr
< size
) {
838 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
839 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= taddr
;
848 * Copy src to dst using safe memory accesses. The src is assumed to be unsafe
849 * memory specified by the DIF program. The dst is assumed to be safe memory
850 * that we can store to directly because it is managed by DTrace. As with
851 * standard bcopy, overlapping copies are handled properly.
854 dtrace_bcopy(const void *src
, void *dst
, size_t len
)
858 const uint8_t *s2
= src
;
862 *s1
++ = dtrace_load8((uintptr_t)s2
++);
863 } while (--len
!= 0);
869 *--s1
= dtrace_load8((uintptr_t)--s2
);
870 } while (--len
!= 0);
876 * Copy src to dst using safe memory accesses, up to either the specified
877 * length, or the point that a nul byte is encountered. The src is assumed to
878 * be unsafe memory specified by the DIF program. The dst is assumed to be
879 * safe memory that we can store to directly because it is managed by DTrace.
880 * Unlike dtrace_bcopy(), overlapping regions are not handled.
883 dtrace_strcpy(const void *src
, void *dst
, size_t len
)
886 uint8_t *s1
= dst
, c
;
887 const uint8_t *s2
= src
;
890 *s1
++ = c
= dtrace_load8((uintptr_t)s2
++);
891 } while (--len
!= 0 && c
!= '\0');
896 * Copy src to dst, deriving the size and type from the specified (BYREF)
897 * variable type. The src is assumed to be unsafe memory specified by the DIF
898 * program. The dst is assumed to be DTrace variable memory that is of the
899 * specified type; we assume that we can store to directly.
902 dtrace_vcopy(void *src
, void *dst
, dtrace_diftype_t
*type
)
904 ASSERT(type
->dtdt_flags
& DIF_TF_BYREF
);
906 if (type
->dtdt_kind
== DIF_TYPE_STRING
) {
907 dtrace_strcpy(src
, dst
, type
->dtdt_size
);
909 dtrace_bcopy(src
, dst
, type
->dtdt_size
);
914 * Compare s1 to s2 using safe memory accesses. The s1 data is assumed to be
915 * unsafe memory specified by the DIF program. The s2 data is assumed to be
916 * safe memory that we can access directly because it is managed by DTrace.
919 dtrace_bcmp(const void *s1
, const void *s2
, size_t len
)
921 volatile uint16_t *flags
;
923 flags
= (volatile uint16_t *)&cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
928 if (s1
== NULL
|| s2
== NULL
)
931 if (s1
!= s2
&& len
!= 0) {
932 const uint8_t *ps1
= s1
;
933 const uint8_t *ps2
= s2
;
936 if (dtrace_load8((uintptr_t)ps1
++) != *ps2
++)
938 } while (--len
!= 0 && !(*flags
& CPU_DTRACE_FAULT
));
944 * Zero the specified region using a simple byte-by-byte loop. Note that this
945 * is for safe DTrace-managed memory only.
948 dtrace_bzero(void *dst
, size_t len
)
952 for (cp
= dst
; len
!= 0; len
--)
957 dtrace_add_128(uint64_t *addend1
, uint64_t *addend2
, uint64_t *sum
)
961 result
[0] = addend1
[0] + addend2
[0];
962 result
[1] = addend1
[1] + addend2
[1] +
963 (result
[0] < addend1
[0] || result
[0] < addend2
[0] ? 1 : 0);
970 * Shift the 128-bit value in a by b. If b is positive, shift left.
971 * If b is negative, shift right.
974 dtrace_shift_128(uint64_t *a
, int b
)
984 a
[0] = a
[1] >> (b
- 64);
988 mask
= 1LL << (64 - b
);
990 a
[0] |= ((a
[1] & mask
) << (64 - b
));
995 a
[1] = a
[0] << (b
- 64);
999 mask
= a
[0] >> (64 - b
);
1007 * The basic idea is to break the 2 64-bit values into 4 32-bit values,
1008 * use native multiplication on those, and then re-combine into the
1009 * resulting 128-bit value.
1011 * (hi1 << 32 + lo1) * (hi2 << 32 + lo2) =
1018 dtrace_multiply_128(uint64_t factor1
, uint64_t factor2
, uint64_t *product
)
1020 uint64_t hi1
, hi2
, lo1
, lo2
;
1023 hi1
= factor1
>> 32;
1024 hi2
= factor2
>> 32;
1026 lo1
= factor1
& DT_MASK_LO
;
1027 lo2
= factor2
& DT_MASK_LO
;
1029 product
[0] = lo1
* lo2
;
1030 product
[1] = hi1
* hi2
;
1034 dtrace_shift_128(tmp
, 32);
1035 dtrace_add_128(product
, tmp
, product
);
1039 dtrace_shift_128(tmp
, 32);
1040 dtrace_add_128(product
, tmp
, product
);
1044 * This privilege check should be used by actions and subroutines to
1045 * verify that the user credentials of the process that enabled the
1046 * invoking ECB match the target credentials
1049 dtrace_priv_proc_common_user(dtrace_state_t
*state
)
1051 cred_t
*cr
, *s_cr
= state
->dts_cred
.dcr_cred
;
1054 * We should always have a non-NULL state cred here, since if cred
1055 * is null (anonymous tracing), we fast-path bypass this routine.
1057 ASSERT(s_cr
!= NULL
);
1059 if ((cr
= CRED()) != NULL
&&
1060 s_cr
->cr_uid
== cr
->cr_uid
&&
1061 s_cr
->cr_uid
== cr
->cr_ruid
&&
1062 s_cr
->cr_uid
== cr
->cr_suid
&&
1063 s_cr
->cr_gid
== cr
->cr_gid
&&
1064 s_cr
->cr_gid
== cr
->cr_rgid
&&
1065 s_cr
->cr_gid
== cr
->cr_sgid
)
1072 * This privilege check should be used by actions and subroutines to
1073 * verify that the zone of the process that enabled the invoking ECB
1074 * matches the target credentials
1077 dtrace_priv_proc_common_zone(dtrace_state_t
*state
)
1079 cred_t
*cr
, *s_cr
= state
->dts_cred
.dcr_cred
;
1082 * We should always have a non-NULL state cred here, since if cred
1083 * is null (anonymous tracing), we fast-path bypass this routine.
1085 ASSERT(s_cr
!= NULL
);
1087 if ((cr
= CRED()) != NULL
&&
1088 s_cr
->cr_zone
== cr
->cr_zone
)
1095 * This privilege check should be used by actions and subroutines to
1096 * verify that the process has not setuid or changed credentials.
1099 dtrace_priv_proc_common_nocd()
1103 if ((proc
= ttoproc(curthread
)) != NULL
&&
1104 !(proc
->p_flag
& SNOCD
))
1111 dtrace_priv_proc_destructive(dtrace_state_t
*state
, dtrace_mstate_t
*mstate
)
1113 int action
= state
->dts_cred
.dcr_action
;
1115 if (!(mstate
->dtms_access
& DTRACE_ACCESS_PROC
))
1118 if (((action
& DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE
) == 0) &&
1119 dtrace_priv_proc_common_zone(state
) == 0)
1122 if (((action
& DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER
) == 0) &&
1123 dtrace_priv_proc_common_user(state
) == 0)
1126 if (((action
& DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG
) == 0) &&
1127 dtrace_priv_proc_common_nocd() == 0)
1133 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
|= CPU_DTRACE_UPRIV
;
1139 dtrace_priv_proc_control(dtrace_state_t
*state
, dtrace_mstate_t
*mstate
)
1141 if (mstate
->dtms_access
& DTRACE_ACCESS_PROC
) {
1142 if (state
->dts_cred
.dcr_action
& DTRACE_CRA_PROC_CONTROL
)
1145 if (dtrace_priv_proc_common_zone(state
) &&
1146 dtrace_priv_proc_common_user(state
) &&
1147 dtrace_priv_proc_common_nocd())
1151 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
|= CPU_DTRACE_UPRIV
;
1157 dtrace_priv_proc(dtrace_state_t
*state
, dtrace_mstate_t
*mstate
)
1159 if ((mstate
->dtms_access
& DTRACE_ACCESS_PROC
) &&
1160 (state
->dts_cred
.dcr_action
& DTRACE_CRA_PROC
))
1163 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
|= CPU_DTRACE_UPRIV
;
1169 dtrace_priv_kernel(dtrace_state_t
*state
)
1171 if (state
->dts_cred
.dcr_action
& DTRACE_CRA_KERNEL
)
1174 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
|= CPU_DTRACE_KPRIV
;
1180 dtrace_priv_kernel_destructive(dtrace_state_t
*state
)
1182 if (state
->dts_cred
.dcr_action
& DTRACE_CRA_KERNEL_DESTRUCTIVE
)
1185 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
|= CPU_DTRACE_KPRIV
;
1191 * Determine if the dte_cond of the specified ECB allows for processing of
1192 * the current probe to continue. Note that this routine may allow continued
1193 * processing, but with access(es) stripped from the mstate's dtms_access
1197 dtrace_priv_probe(dtrace_state_t
*state
, dtrace_mstate_t
*mstate
,
1200 dtrace_probe_t
*probe
= ecb
->dte_probe
;
1201 dtrace_provider_t
*prov
= probe
->dtpr_provider
;
1202 dtrace_pops_t
*pops
= &prov
->dtpv_pops
;
1203 int mode
= DTRACE_MODE_NOPRIV_DROP
;
1205 ASSERT(ecb
->dte_cond
);
1207 if (pops
->dtps_mode
!= NULL
) {
1208 mode
= pops
->dtps_mode(prov
->dtpv_arg
,
1209 probe
->dtpr_id
, probe
->dtpr_arg
);
1211 ASSERT((mode
& DTRACE_MODE_USER
) ||
1212 (mode
& DTRACE_MODE_KERNEL
));
1213 ASSERT((mode
& DTRACE_MODE_NOPRIV_RESTRICT
) ||
1214 (mode
& DTRACE_MODE_NOPRIV_DROP
));
1218 * If the dte_cond bits indicate that this consumer is only allowed to
1219 * see user-mode firings of this probe, call the provider's dtps_mode()
1220 * entry point to check that the probe was fired while in a user
1221 * context. If that's not the case, use the policy specified by the
1222 * provider to determine if we drop the probe or merely restrict
1225 if (ecb
->dte_cond
& DTRACE_COND_USERMODE
) {
1226 ASSERT(mode
!= DTRACE_MODE_NOPRIV_DROP
);
1228 if (!(mode
& DTRACE_MODE_USER
)) {
1229 if (mode
& DTRACE_MODE_NOPRIV_DROP
)
1232 mstate
->dtms_access
&= ~DTRACE_ACCESS_ARGS
;
1237 * This is more subtle than it looks. We have to be absolutely certain
1238 * that CRED() isn't going to change out from under us so it's only
1239 * legit to examine that structure if we're in constrained situations.
1240 * Currently, the only times we'll this check is if a non-super-user
1241 * has enabled the profile or syscall providers -- providers that
1242 * allow visibility of all processes. For the profile case, the check
1243 * above will ensure that we're examining a user context.
1245 if (ecb
->dte_cond
& DTRACE_COND_OWNER
) {
1247 cred_t
*s_cr
= state
->dts_cred
.dcr_cred
;
1250 ASSERT(s_cr
!= NULL
);
1252 if ((cr
= CRED()) == NULL
||
1253 s_cr
->cr_uid
!= cr
->cr_uid
||
1254 s_cr
->cr_uid
!= cr
->cr_ruid
||
1255 s_cr
->cr_uid
!= cr
->cr_suid
||
1256 s_cr
->cr_gid
!= cr
->cr_gid
||
1257 s_cr
->cr_gid
!= cr
->cr_rgid
||
1258 s_cr
->cr_gid
!= cr
->cr_sgid
||
1259 (proc
= ttoproc(curthread
)) == NULL
||
1260 (proc
->p_flag
& SNOCD
)) {
1261 if (mode
& DTRACE_MODE_NOPRIV_DROP
)
1264 mstate
->dtms_access
&= ~DTRACE_ACCESS_PROC
;
1269 * If our dte_cond is set to DTRACE_COND_ZONEOWNER and we are not
1270 * in our zone, check to see if our mode policy is to restrict rather
1271 * than to drop; if to restrict, strip away both DTRACE_ACCESS_PROC
1272 * and DTRACE_ACCESS_ARGS
1274 if (ecb
->dte_cond
& DTRACE_COND_ZONEOWNER
) {
1276 cred_t
*s_cr
= state
->dts_cred
.dcr_cred
;
1278 ASSERT(s_cr
!= NULL
);
1280 if ((cr
= CRED()) == NULL
||
1281 s_cr
->cr_zone
->zone_id
!= cr
->cr_zone
->zone_id
) {
1282 if (mode
& DTRACE_MODE_NOPRIV_DROP
)
1285 mstate
->dtms_access
&=
1286 ~(DTRACE_ACCESS_PROC
| DTRACE_ACCESS_ARGS
);
1294 * Note: not called from probe context. This function is called
1295 * asynchronously (and at a regular interval) from outside of probe context to
1296 * clean the dirty dynamic variable lists on all CPUs. Dynamic variable
1297 * cleaning is explained in detail in <sys/dtrace_impl.h>.
1300 dtrace_dynvar_clean(dtrace_dstate_t
*dstate
)
1302 dtrace_dynvar_t
*dirty
;
1303 dtrace_dstate_percpu_t
*dcpu
;
1304 dtrace_dynvar_t
**rinsep
;
1307 for (i
= 0; i
< NCPU
; i
++) {
1308 dcpu
= &dstate
->dtds_percpu
[i
];
1309 rinsep
= &dcpu
->dtdsc_rinsing
;
1312 * If the dirty list is NULL, there is no dirty work to do.
1314 if (dcpu
->dtdsc_dirty
== NULL
)
1317 if (dcpu
->dtdsc_rinsing
!= NULL
) {
1319 * If the rinsing list is non-NULL, then it is because
1320 * this CPU was selected to accept another CPU's
1321 * dirty list -- and since that time, dirty buffers
1322 * have accumulated. This is a highly unlikely
1323 * condition, but we choose to ignore the dirty
1324 * buffers -- they'll be picked up a future cleanse.
1329 if (dcpu
->dtdsc_clean
!= NULL
) {
1331 * If the clean list is non-NULL, then we're in a
1332 * situation where a CPU has done deallocations (we
1333 * have a non-NULL dirty list) but no allocations (we
1334 * also have a non-NULL clean list). We can't simply
1335 * move the dirty list into the clean list on this
1336 * CPU, yet we also don't want to allow this condition
1337 * to persist, lest a short clean list prevent a
1338 * massive dirty list from being cleaned (which in
1339 * turn could lead to otherwise avoidable dynamic
1340 * drops). To deal with this, we look for some CPU
1341 * with a NULL clean list, NULL dirty list, and NULL
1342 * rinsing list -- and then we borrow this CPU to
1343 * rinse our dirty list.
1345 for (j
= 0; j
< NCPU
; j
++) {
1346 dtrace_dstate_percpu_t
*rinser
;
1348 rinser
= &dstate
->dtds_percpu
[j
];
1350 if (rinser
->dtdsc_rinsing
!= NULL
)
1353 if (rinser
->dtdsc_dirty
!= NULL
)
1356 if (rinser
->dtdsc_clean
!= NULL
)
1359 rinsep
= &rinser
->dtdsc_rinsing
;
1365 * We were unable to find another CPU that
1366 * could accept this dirty list -- we are
1367 * therefore unable to clean it now.
1369 dtrace_dynvar_failclean
++;
1377 * Atomically move the dirty list aside.
1380 dirty
= dcpu
->dtdsc_dirty
;
1383 * Before we zap the dirty list, set the rinsing list.
1384 * (This allows for a potential assertion in
1385 * dtrace_dynvar(): if a free dynamic variable appears
1386 * on a hash chain, either the dirty list or the
1387 * rinsing list for some CPU must be non-NULL.)
1390 dtrace_membar_producer();
1391 } while (dtrace_casptr(&dcpu
->dtdsc_dirty
,
1392 dirty
, NULL
) != dirty
);
1397 * We have no work to do; we can simply return.
1404 for (i
= 0; i
< NCPU
; i
++) {
1405 dcpu
= &dstate
->dtds_percpu
[i
];
1407 if (dcpu
->dtdsc_rinsing
== NULL
)
1411 * We are now guaranteed that no hash chain contains a pointer
1412 * into this dirty list; we can make it clean.
1414 ASSERT(dcpu
->dtdsc_clean
== NULL
);
1415 dcpu
->dtdsc_clean
= dcpu
->dtdsc_rinsing
;
1416 dcpu
->dtdsc_rinsing
= NULL
;
1420 * Before we actually set the state to be DTRACE_DSTATE_CLEAN, make
1421 * sure that all CPUs have seen all of the dtdsc_clean pointers.
1422 * This prevents a race whereby a CPU incorrectly decides that
1423 * the state should be something other than DTRACE_DSTATE_CLEAN
1424 * after dtrace_dynvar_clean() has completed.
1428 dstate
->dtds_state
= DTRACE_DSTATE_CLEAN
;
1432 * Depending on the value of the op parameter, this function looks-up,
1433 * allocates or deallocates an arbitrarily-keyed dynamic variable. If an
1434 * allocation is requested, this function will return a pointer to a
1435 * dtrace_dynvar_t corresponding to the allocated variable -- or NULL if no
1436 * variable can be allocated. If NULL is returned, the appropriate counter
1437 * will be incremented.
1440 dtrace_dynvar(dtrace_dstate_t
*dstate
, uint_t nkeys
,
1441 dtrace_key_t
*key
, size_t dsize
, dtrace_dynvar_op_t op
,
1442 dtrace_mstate_t
*mstate
, dtrace_vstate_t
*vstate
)
1444 uint64_t hashval
= DTRACE_DYNHASH_VALID
;
1445 dtrace_dynhash_t
*hash
= dstate
->dtds_hash
;
1446 dtrace_dynvar_t
*free
, *new_free
, *next
, *dvar
, *start
, *prev
= NULL
;
1447 processorid_t me
= CPU
->cpu_id
, cpu
= me
;
1448 dtrace_dstate_percpu_t
*dcpu
= &dstate
->dtds_percpu
[me
];
1449 size_t bucket
, ksize
;
1450 size_t chunksize
= dstate
->dtds_chunksize
;
1451 uintptr_t kdata
, lock
, nstate
;
1457 * Hash the key. As with aggregations, we use Jenkins' "One-at-a-time"
1458 * algorithm. For the by-value portions, we perform the algorithm in
1459 * 16-bit chunks (as opposed to 8-bit chunks). This speeds things up a
1460 * bit, and seems to have only a minute effect on distribution. For
1461 * the by-reference data, we perform "One-at-a-time" iterating (safely)
1462 * over each referenced byte. It's painful to do this, but it's much
1463 * better than pathological hash distribution. The efficacy of the
1464 * hashing algorithm (and a comparison with other algorithms) may be
1465 * found by running the ::dtrace_dynstat MDB dcmd.
1467 for (i
= 0; i
< nkeys
; i
++) {
1468 if (key
[i
].dttk_size
== 0) {
1469 uint64_t val
= key
[i
].dttk_value
;
1471 hashval
+= (val
>> 48) & 0xffff;
1472 hashval
+= (hashval
<< 10);
1473 hashval
^= (hashval
>> 6);
1475 hashval
+= (val
>> 32) & 0xffff;
1476 hashval
+= (hashval
<< 10);
1477 hashval
^= (hashval
>> 6);
1479 hashval
+= (val
>> 16) & 0xffff;
1480 hashval
+= (hashval
<< 10);
1481 hashval
^= (hashval
>> 6);
1483 hashval
+= val
& 0xffff;
1484 hashval
+= (hashval
<< 10);
1485 hashval
^= (hashval
>> 6);
1488 * This is incredibly painful, but it beats the hell
1489 * out of the alternative.
1491 uint64_t j
, size
= key
[i
].dttk_size
;
1492 uintptr_t base
= (uintptr_t)key
[i
].dttk_value
;
1494 if (!dtrace_canload(base
, size
, mstate
, vstate
))
1497 for (j
= 0; j
< size
; j
++) {
1498 hashval
+= dtrace_load8(base
+ j
);
1499 hashval
+= (hashval
<< 10);
1500 hashval
^= (hashval
>> 6);
1505 if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT
))
1508 hashval
+= (hashval
<< 3);
1509 hashval
^= (hashval
>> 11);
1510 hashval
+= (hashval
<< 15);
1513 * There is a remote chance (ideally, 1 in 2^31) that our hashval
1514 * comes out to be one of our two sentinel hash values. If this
1515 * actually happens, we set the hashval to be a value known to be a
1516 * non-sentinel value.
1518 if (hashval
== DTRACE_DYNHASH_FREE
|| hashval
== DTRACE_DYNHASH_SINK
)
1519 hashval
= DTRACE_DYNHASH_VALID
;
1522 * Yes, it's painful to do a divide here. If the cycle count becomes
1523 * important here, tricks can be pulled to reduce it. (However, it's
1524 * critical that hash collisions be kept to an absolute minimum;
1525 * they're much more painful than a divide.) It's better to have a
1526 * solution that generates few collisions and still keeps things
1527 * relatively simple.
1529 bucket
= hashval
% dstate
->dtds_hashsize
;
1531 if (op
== DTRACE_DYNVAR_DEALLOC
) {
1532 volatile uintptr_t *lockp
= &hash
[bucket
].dtdh_lock
;
1535 while ((lock
= *lockp
) & 1)
1538 if (dtrace_casptr((void *)lockp
,
1539 (void *)lock
, (void *)(lock
+ 1)) == (void *)lock
)
1543 dtrace_membar_producer();
1548 lock
= hash
[bucket
].dtdh_lock
;
1550 dtrace_membar_consumer();
1552 start
= hash
[bucket
].dtdh_chain
;
1553 ASSERT(start
!= NULL
&& (start
->dtdv_hashval
== DTRACE_DYNHASH_SINK
||
1554 start
->dtdv_hashval
!= DTRACE_DYNHASH_FREE
||
1555 op
!= DTRACE_DYNVAR_DEALLOC
));
1557 for (dvar
= start
; dvar
!= NULL
; dvar
= dvar
->dtdv_next
) {
1558 dtrace_tuple_t
*dtuple
= &dvar
->dtdv_tuple
;
1559 dtrace_key_t
*dkey
= &dtuple
->dtt_key
[0];
1561 if (dvar
->dtdv_hashval
!= hashval
) {
1562 if (dvar
->dtdv_hashval
== DTRACE_DYNHASH_SINK
) {
1564 * We've reached the sink, and therefore the
1565 * end of the hash chain; we can kick out of
1566 * the loop knowing that we have seen a valid
1567 * snapshot of state.
1569 ASSERT(dvar
->dtdv_next
== NULL
);
1570 ASSERT(dvar
== &dtrace_dynhash_sink
);
1574 if (dvar
->dtdv_hashval
== DTRACE_DYNHASH_FREE
) {
1576 * We've gone off the rails: somewhere along
1577 * the line, one of the members of this hash
1578 * chain was deleted. Note that we could also
1579 * detect this by simply letting this loop run
1580 * to completion, as we would eventually hit
1581 * the end of the dirty list. However, we
1582 * want to avoid running the length of the
1583 * dirty list unnecessarily (it might be quite
1584 * long), so we catch this as early as
1585 * possible by detecting the hash marker. In
1586 * this case, we simply set dvar to NULL and
1587 * break; the conditional after the loop will
1588 * send us back to top.
1597 if (dtuple
->dtt_nkeys
!= nkeys
)
1600 for (i
= 0; i
< nkeys
; i
++, dkey
++) {
1601 if (dkey
->dttk_size
!= key
[i
].dttk_size
)
1602 goto next
; /* size or type mismatch */
1604 if (dkey
->dttk_size
!= 0) {
1606 (void *)(uintptr_t)key
[i
].dttk_value
,
1607 (void *)(uintptr_t)dkey
->dttk_value
,
1611 if (dkey
->dttk_value
!= key
[i
].dttk_value
)
1616 if (op
!= DTRACE_DYNVAR_DEALLOC
)
1619 ASSERT(dvar
->dtdv_next
== NULL
||
1620 dvar
->dtdv_next
->dtdv_hashval
!= DTRACE_DYNHASH_FREE
);
1623 ASSERT(hash
[bucket
].dtdh_chain
!= dvar
);
1624 ASSERT(start
!= dvar
);
1625 ASSERT(prev
->dtdv_next
== dvar
);
1626 prev
->dtdv_next
= dvar
->dtdv_next
;
1628 if (dtrace_casptr(&hash
[bucket
].dtdh_chain
,
1629 start
, dvar
->dtdv_next
) != start
) {
1631 * We have failed to atomically swing the
1632 * hash table head pointer, presumably because
1633 * of a conflicting allocation on another CPU.
1634 * We need to reread the hash chain and try
1641 dtrace_membar_producer();
1644 * Now set the hash value to indicate that it's free.
1646 ASSERT(hash
[bucket
].dtdh_chain
!= dvar
);
1647 dvar
->dtdv_hashval
= DTRACE_DYNHASH_FREE
;
1649 dtrace_membar_producer();
1652 * Set the next pointer to point at the dirty list, and
1653 * atomically swing the dirty pointer to the newly freed dvar.
1656 next
= dcpu
->dtdsc_dirty
;
1657 dvar
->dtdv_next
= next
;
1658 } while (dtrace_casptr(&dcpu
->dtdsc_dirty
, next
, dvar
) != next
);
1661 * Finally, unlock this hash bucket.
1663 ASSERT(hash
[bucket
].dtdh_lock
== lock
);
1665 hash
[bucket
].dtdh_lock
++;
1675 * If dvar is NULL, it is because we went off the rails:
1676 * one of the elements that we traversed in the hash chain
1677 * was deleted while we were traversing it. In this case,
1678 * we assert that we aren't doing a dealloc (deallocs lock
1679 * the hash bucket to prevent themselves from racing with
1680 * one another), and retry the hash chain traversal.
1682 ASSERT(op
!= DTRACE_DYNVAR_DEALLOC
);
1686 if (op
!= DTRACE_DYNVAR_ALLOC
) {
1688 * If we are not to allocate a new variable, we want to
1689 * return NULL now. Before we return, check that the value
1690 * of the lock word hasn't changed. If it has, we may have
1691 * seen an inconsistent snapshot.
1693 if (op
== DTRACE_DYNVAR_NOALLOC
) {
1694 if (hash
[bucket
].dtdh_lock
!= lock
)
1697 ASSERT(op
== DTRACE_DYNVAR_DEALLOC
);
1698 ASSERT(hash
[bucket
].dtdh_lock
== lock
);
1700 hash
[bucket
].dtdh_lock
++;
1707 * We need to allocate a new dynamic variable. The size we need is the
1708 * size of dtrace_dynvar plus the size of nkeys dtrace_key_t's plus the
1709 * size of any auxiliary key data (rounded up to 8-byte alignment) plus
1710 * the size of any referred-to data (dsize). We then round the final
1711 * size up to the chunksize for allocation.
1713 for (ksize
= 0, i
= 0; i
< nkeys
; i
++)
1714 ksize
+= P2ROUNDUP(key
[i
].dttk_size
, sizeof (uint64_t));
1717 * This should be pretty much impossible, but could happen if, say,
1718 * strange DIF specified the tuple. Ideally, this should be an
1719 * assertion and not an error condition -- but that requires that the
1720 * chunksize calculation in dtrace_difo_chunksize() be absolutely
1721 * bullet-proof. (That is, it must not be able to be fooled by
1722 * malicious DIF.) Given the lack of backwards branches in DIF,
1723 * solving this would presumably not amount to solving the Halting
1724 * Problem -- but it still seems awfully hard.
1726 if (sizeof (dtrace_dynvar_t
) + sizeof (dtrace_key_t
) * (nkeys
- 1) +
1727 ksize
+ dsize
> chunksize
) {
1728 dcpu
->dtdsc_drops
++;
1732 nstate
= DTRACE_DSTATE_EMPTY
;
1736 free
= dcpu
->dtdsc_free
;
1739 dtrace_dynvar_t
*clean
= dcpu
->dtdsc_clean
;
1742 if (clean
== NULL
) {
1744 * We're out of dynamic variable space on
1745 * this CPU. Unless we have tried all CPUs,
1746 * we'll try to allocate from a different
1749 switch (dstate
->dtds_state
) {
1750 case DTRACE_DSTATE_CLEAN
: {
1751 void *sp
= &dstate
->dtds_state
;
1756 if (dcpu
->dtdsc_dirty
!= NULL
&&
1757 nstate
== DTRACE_DSTATE_EMPTY
)
1758 nstate
= DTRACE_DSTATE_DIRTY
;
1760 if (dcpu
->dtdsc_rinsing
!= NULL
)
1761 nstate
= DTRACE_DSTATE_RINSING
;
1763 dcpu
= &dstate
->dtds_percpu
[cpu
];
1768 (void) dtrace_cas32(sp
,
1769 DTRACE_DSTATE_CLEAN
, nstate
);
1772 * To increment the correct bean
1773 * counter, take another lap.
1778 case DTRACE_DSTATE_DIRTY
:
1779 dcpu
->dtdsc_dirty_drops
++;
1782 case DTRACE_DSTATE_RINSING
:
1783 dcpu
->dtdsc_rinsing_drops
++;
1786 case DTRACE_DSTATE_EMPTY
:
1787 dcpu
->dtdsc_drops
++;
1791 DTRACE_CPUFLAG_SET(CPU_DTRACE_DROP
);
1796 * The clean list appears to be non-empty. We want to
1797 * move the clean list to the free list; we start by
1798 * moving the clean pointer aside.
1800 if (dtrace_casptr(&dcpu
->dtdsc_clean
,
1801 clean
, NULL
) != clean
) {
1803 * We are in one of two situations:
1805 * (a) The clean list was switched to the
1806 * free list by another CPU.
1808 * (b) The clean list was added to by the
1811 * In either of these situations, we can
1812 * just reattempt the free list allocation.
1817 ASSERT(clean
->dtdv_hashval
== DTRACE_DYNHASH_FREE
);
1820 * Now we'll move the clean list to our free list.
1821 * It's impossible for this to fail: the only way
1822 * the free list can be updated is through this
1823 * code path, and only one CPU can own the clean list.
1824 * Thus, it would only be possible for this to fail if
1825 * this code were racing with dtrace_dynvar_clean().
1826 * (That is, if dtrace_dynvar_clean() updated the clean
1827 * list, and we ended up racing to update the free
1828 * list.) This race is prevented by the dtrace_sync()
1829 * in dtrace_dynvar_clean() -- which flushes the
1830 * owners of the clean lists out before resetting
1833 dcpu
= &dstate
->dtds_percpu
[me
];
1834 rval
= dtrace_casptr(&dcpu
->dtdsc_free
, NULL
, clean
);
1835 ASSERT(rval
== NULL
);
1840 new_free
= dvar
->dtdv_next
;
1841 } while (dtrace_casptr(&dcpu
->dtdsc_free
, free
, new_free
) != free
);
1844 * We have now allocated a new chunk. We copy the tuple keys into the
1845 * tuple array and copy any referenced key data into the data space
1846 * following the tuple array. As we do this, we relocate dttk_value
1847 * in the final tuple to point to the key data address in the chunk.
1849 kdata
= (uintptr_t)&dvar
->dtdv_tuple
.dtt_key
[nkeys
];
1850 dvar
->dtdv_data
= (void *)(kdata
+ ksize
);
1851 dvar
->dtdv_tuple
.dtt_nkeys
= nkeys
;
1853 for (i
= 0; i
< nkeys
; i
++) {
1854 dtrace_key_t
*dkey
= &dvar
->dtdv_tuple
.dtt_key
[i
];
1855 size_t kesize
= key
[i
].dttk_size
;
1859 (const void *)(uintptr_t)key
[i
].dttk_value
,
1860 (void *)kdata
, kesize
);
1861 dkey
->dttk_value
= kdata
;
1862 kdata
+= P2ROUNDUP(kesize
, sizeof (uint64_t));
1864 dkey
->dttk_value
= key
[i
].dttk_value
;
1867 dkey
->dttk_size
= kesize
;
1870 ASSERT(dvar
->dtdv_hashval
== DTRACE_DYNHASH_FREE
);
1871 dvar
->dtdv_hashval
= hashval
;
1872 dvar
->dtdv_next
= start
;
1874 if (dtrace_casptr(&hash
[bucket
].dtdh_chain
, start
, dvar
) == start
)
1878 * The cas has failed. Either another CPU is adding an element to
1879 * this hash chain, or another CPU is deleting an element from this
1880 * hash chain. The simplest way to deal with both of these cases
1881 * (though not necessarily the most efficient) is to free our
1882 * allocated block and tail-call ourselves. Note that the free is
1883 * to the dirty list and _not_ to the free list. This is to prevent
1884 * races with allocators, above.
1886 dvar
->dtdv_hashval
= DTRACE_DYNHASH_FREE
;
1888 dtrace_membar_producer();
1891 free
= dcpu
->dtdsc_dirty
;
1892 dvar
->dtdv_next
= free
;
1893 } while (dtrace_casptr(&dcpu
->dtdsc_dirty
, free
, dvar
) != free
);
1895 return (dtrace_dynvar(dstate
, nkeys
, key
, dsize
, op
, mstate
, vstate
));
1900 dtrace_aggregate_min(uint64_t *oval
, uint64_t nval
, uint64_t arg
)
1902 if ((int64_t)nval
< (int64_t)*oval
)
1908 dtrace_aggregate_max(uint64_t *oval
, uint64_t nval
, uint64_t arg
)
1910 if ((int64_t)nval
> (int64_t)*oval
)
1915 dtrace_aggregate_quantize(uint64_t *quanta
, uint64_t nval
, uint64_t incr
)
1917 int i
, zero
= DTRACE_QUANTIZE_ZEROBUCKET
;
1918 int64_t val
= (int64_t)nval
;
1921 for (i
= 0; i
< zero
; i
++) {
1922 if (val
<= DTRACE_QUANTIZE_BUCKETVAL(i
)) {
1928 for (i
= zero
+ 1; i
< DTRACE_QUANTIZE_NBUCKETS
; i
++) {
1929 if (val
< DTRACE_QUANTIZE_BUCKETVAL(i
)) {
1930 quanta
[i
- 1] += incr
;
1935 quanta
[DTRACE_QUANTIZE_NBUCKETS
- 1] += incr
;
1943 dtrace_aggregate_lquantize(uint64_t *lquanta
, uint64_t nval
, uint64_t incr
)
1945 uint64_t arg
= *lquanta
++;
1946 int32_t base
= DTRACE_LQUANTIZE_BASE(arg
);
1947 uint16_t step
= DTRACE_LQUANTIZE_STEP(arg
);
1948 uint16_t levels
= DTRACE_LQUANTIZE_LEVELS(arg
);
1949 int32_t val
= (int32_t)nval
, level
;
1952 ASSERT(levels
!= 0);
1956 * This is an underflow.
1962 level
= (val
- base
) / step
;
1964 if (level
< levels
) {
1965 lquanta
[level
+ 1] += incr
;
1970 * This is an overflow.
1972 lquanta
[levels
+ 1] += incr
;
1976 dtrace_aggregate_llquantize_bucket(uint16_t factor
, uint16_t low
,
1977 uint16_t high
, uint16_t nsteps
, int64_t value
)
1979 int64_t this = 1, last
, next
;
1980 int base
= 1, order
;
1982 ASSERT(factor
<= nsteps
);
1983 ASSERT(nsteps
% factor
== 0);
1985 for (order
= 0; order
< low
; order
++)
1989 * If our value is less than our factor taken to the power of the
1990 * low order of magnitude, it goes into the zeroth bucket.
1992 if (value
< (last
= this))
1995 for (this *= factor
; order
<= high
; order
++) {
1996 int nbuckets
= this > nsteps
? nsteps
: this;
1998 if ((next
= this * factor
) < this) {
2000 * We should not generally get log/linear quantizations
2001 * with a high magnitude that allows 64-bits to
2002 * overflow, but we nonetheless protect against this
2003 * by explicitly checking for overflow, and clamping
2004 * our value accordingly.
2011 * If our value lies within this order of magnitude,
2012 * determine its position by taking the offset within
2013 * the order of magnitude, dividing by the bucket
2014 * width, and adding to our (accumulated) base.
2016 return (base
+ (value
- last
) / (this / nbuckets
));
2019 base
+= nbuckets
- (nbuckets
/ factor
);
2025 * Our value is greater than or equal to our factor taken to the
2026 * power of one plus the high magnitude -- return the top bucket.
2032 dtrace_aggregate_llquantize(uint64_t *llquanta
, uint64_t nval
, uint64_t incr
)
2034 uint64_t arg
= *llquanta
++;
2035 uint16_t factor
= DTRACE_LLQUANTIZE_FACTOR(arg
);
2036 uint16_t low
= DTRACE_LLQUANTIZE_LOW(arg
);
2037 uint16_t high
= DTRACE_LLQUANTIZE_HIGH(arg
);
2038 uint16_t nsteps
= DTRACE_LLQUANTIZE_NSTEP(arg
);
2040 llquanta
[dtrace_aggregate_llquantize_bucket(factor
,
2041 low
, high
, nsteps
, nval
)] += incr
;
2046 dtrace_aggregate_avg(uint64_t *data
, uint64_t nval
, uint64_t arg
)
2054 dtrace_aggregate_stddev(uint64_t *data
, uint64_t nval
, uint64_t arg
)
2056 int64_t snval
= (int64_t)nval
;
2063 * What we want to say here is:
2065 * data[2] += nval * nval;
2067 * But given that nval is 64-bit, we could easily overflow, so
2068 * we do this as 128-bit arithmetic.
2073 dtrace_multiply_128((uint64_t)snval
, (uint64_t)snval
, tmp
);
2074 dtrace_add_128(data
+ 2, tmp
, data
+ 2);
2079 dtrace_aggregate_count(uint64_t *oval
, uint64_t nval
, uint64_t arg
)
2086 dtrace_aggregate_sum(uint64_t *oval
, uint64_t nval
, uint64_t arg
)
2092 * Aggregate given the tuple in the principal data buffer, and the aggregating
2093 * action denoted by the specified dtrace_aggregation_t. The aggregation
2094 * buffer is specified as the buf parameter. This routine does not return
2095 * failure; if there is no space in the aggregation buffer, the data will be
2096 * dropped, and a corresponding counter incremented.
2099 dtrace_aggregate(dtrace_aggregation_t
*agg
, dtrace_buffer_t
*dbuf
,
2100 intptr_t offset
, dtrace_buffer_t
*buf
, uint64_t expr
, uint64_t arg
)
2102 dtrace_recdesc_t
*rec
= &agg
->dtag_action
.dta_rec
;
2103 uint32_t i
, ndx
, size
, fsize
;
2104 uint32_t align
= sizeof (uint64_t) - 1;
2105 dtrace_aggbuffer_t
*agb
;
2106 dtrace_aggkey_t
*key
;
2107 uint32_t hashval
= 0, limit
, isstr
;
2108 caddr_t tomax
, data
, kdata
;
2109 dtrace_actkind_t action
;
2110 dtrace_action_t
*act
;
2116 if (!agg
->dtag_hasarg
) {
2118 * Currently, only quantize() and lquantize() take additional
2119 * arguments, and they have the same semantics: an increment
2120 * value that defaults to 1 when not present. If additional
2121 * aggregating actions take arguments, the setting of the
2122 * default argument value will presumably have to become more
2128 action
= agg
->dtag_action
.dta_kind
- DTRACEACT_AGGREGATION
;
2129 size
= rec
->dtrd_offset
- agg
->dtag_base
;
2130 fsize
= size
+ rec
->dtrd_size
;
2132 ASSERT(dbuf
->dtb_tomax
!= NULL
);
2133 data
= dbuf
->dtb_tomax
+ offset
+ agg
->dtag_base
;
2135 if ((tomax
= buf
->dtb_tomax
) == NULL
) {
2136 dtrace_buffer_drop(buf
);
2141 * The metastructure is always at the bottom of the buffer.
2143 agb
= (dtrace_aggbuffer_t
*)(tomax
+ buf
->dtb_size
-
2144 sizeof (dtrace_aggbuffer_t
));
2146 if (buf
->dtb_offset
== 0) {
2148 * We just kludge up approximately 1/8th of the size to be
2149 * buckets. If this guess ends up being routinely
2150 * off-the-mark, we may need to dynamically readjust this
2151 * based on past performance.
2153 uintptr_t hashsize
= (buf
->dtb_size
>> 3) / sizeof (uintptr_t);
2155 if ((uintptr_t)agb
- hashsize
* sizeof (dtrace_aggkey_t
*) <
2156 (uintptr_t)tomax
|| hashsize
== 0) {
2158 * We've been given a ludicrously small buffer;
2159 * increment our drop count and leave.
2161 dtrace_buffer_drop(buf
);
2166 * And now, a pathetic attempt to try to get a an odd (or
2167 * perchance, a prime) hash size for better hash distribution.
2169 if (hashsize
> (DTRACE_AGGHASHSIZE_SLEW
<< 3))
2170 hashsize
-= DTRACE_AGGHASHSIZE_SLEW
;
2172 agb
->dtagb_hashsize
= hashsize
;
2173 agb
->dtagb_hash
= (dtrace_aggkey_t
**)((uintptr_t)agb
-
2174 agb
->dtagb_hashsize
* sizeof (dtrace_aggkey_t
*));
2175 agb
->dtagb_free
= (uintptr_t)agb
->dtagb_hash
;
2177 for (i
= 0; i
< agb
->dtagb_hashsize
; i
++)
2178 agb
->dtagb_hash
[i
] = NULL
;
2181 ASSERT(agg
->dtag_first
!= NULL
);
2182 ASSERT(agg
->dtag_first
->dta_intuple
);
2185 * Calculate the hash value based on the key. Note that we _don't_
2186 * include the aggid in the hashing (but we will store it as part of
2187 * the key). The hashing algorithm is Bob Jenkins' "One-at-a-time"
2188 * algorithm: a simple, quick algorithm that has no known funnels, and
2189 * gets good distribution in practice. The efficacy of the hashing
2190 * algorithm (and a comparison with other algorithms) may be found by
2191 * running the ::dtrace_aggstat MDB dcmd.
2193 for (act
= agg
->dtag_first
; act
->dta_intuple
; act
= act
->dta_next
) {
2194 i
= act
->dta_rec
.dtrd_offset
- agg
->dtag_base
;
2195 limit
= i
+ act
->dta_rec
.dtrd_size
;
2196 ASSERT(limit
<= size
);
2197 isstr
= DTRACEACT_ISSTRING(act
);
2199 for (; i
< limit
; i
++) {
2201 hashval
+= (hashval
<< 10);
2202 hashval
^= (hashval
>> 6);
2204 if (isstr
&& data
[i
] == '\0')
2209 hashval
+= (hashval
<< 3);
2210 hashval
^= (hashval
>> 11);
2211 hashval
+= (hashval
<< 15);
2214 * Yes, the divide here is expensive -- but it's generally the least
2215 * of the performance issues given the amount of data that we iterate
2216 * over to compute hash values, compare data, etc.
2218 ndx
= hashval
% agb
->dtagb_hashsize
;
2220 for (key
= agb
->dtagb_hash
[ndx
]; key
!= NULL
; key
= key
->dtak_next
) {
2221 ASSERT((caddr_t
)key
>= tomax
);
2222 ASSERT((caddr_t
)key
< tomax
+ buf
->dtb_size
);
2224 if (hashval
!= key
->dtak_hashval
|| key
->dtak_size
!= size
)
2227 kdata
= key
->dtak_data
;
2228 ASSERT(kdata
>= tomax
&& kdata
< tomax
+ buf
->dtb_size
);
2230 for (act
= agg
->dtag_first
; act
->dta_intuple
;
2231 act
= act
->dta_next
) {
2232 i
= act
->dta_rec
.dtrd_offset
- agg
->dtag_base
;
2233 limit
= i
+ act
->dta_rec
.dtrd_size
;
2234 ASSERT(limit
<= size
);
2235 isstr
= DTRACEACT_ISSTRING(act
);
2237 for (; i
< limit
; i
++) {
2238 if (kdata
[i
] != data
[i
])
2241 if (isstr
&& data
[i
] == '\0')
2246 if (action
!= key
->dtak_action
) {
2248 * We are aggregating on the same value in the same
2249 * aggregation with two different aggregating actions.
2250 * (This should have been picked up in the compiler,
2251 * so we may be dealing with errant or devious DIF.)
2252 * This is an error condition; we indicate as much,
2255 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
);
2260 * This is a hit: we need to apply the aggregator to
2261 * the value at this key.
2263 agg
->dtag_aggregate((uint64_t *)(kdata
+ size
), expr
, arg
);
2270 * We didn't find it. We need to allocate some zero-filled space,
2271 * link it into the hash table appropriately, and apply the aggregator
2272 * to the (zero-filled) value.
2274 offs
= buf
->dtb_offset
;
2275 while (offs
& (align
- 1))
2276 offs
+= sizeof (uint32_t);
2279 * If we don't have enough room to both allocate a new key _and_
2280 * its associated data, increment the drop count and return.
2282 if ((uintptr_t)tomax
+ offs
+ fsize
>
2283 agb
->dtagb_free
- sizeof (dtrace_aggkey_t
)) {
2284 dtrace_buffer_drop(buf
);
2289 ASSERT(!(sizeof (dtrace_aggkey_t
) & (sizeof (uintptr_t) - 1)));
2290 key
= (dtrace_aggkey_t
*)(agb
->dtagb_free
- sizeof (dtrace_aggkey_t
));
2291 agb
->dtagb_free
-= sizeof (dtrace_aggkey_t
);
2293 key
->dtak_data
= kdata
= tomax
+ offs
;
2294 buf
->dtb_offset
= offs
+ fsize
;
2297 * Now copy the data across.
2299 *((dtrace_aggid_t
*)kdata
) = agg
->dtag_id
;
2301 for (i
= sizeof (dtrace_aggid_t
); i
< size
; i
++)
2305 * Because strings are not zeroed out by default, we need to iterate
2306 * looking for actions that store strings, and we need to explicitly
2307 * pad these strings out with zeroes.
2309 for (act
= agg
->dtag_first
; act
->dta_intuple
; act
= act
->dta_next
) {
2312 if (!DTRACEACT_ISSTRING(act
))
2315 i
= act
->dta_rec
.dtrd_offset
- agg
->dtag_base
;
2316 limit
= i
+ act
->dta_rec
.dtrd_size
;
2317 ASSERT(limit
<= size
);
2319 for (nul
= 0; i
< limit
; i
++) {
2325 if (data
[i
] != '\0')
2332 for (i
= size
; i
< fsize
; i
++)
2335 key
->dtak_hashval
= hashval
;
2336 key
->dtak_size
= size
;
2337 key
->dtak_action
= action
;
2338 key
->dtak_next
= agb
->dtagb_hash
[ndx
];
2339 agb
->dtagb_hash
[ndx
] = key
;
2342 * Finally, apply the aggregator.
2344 *((uint64_t *)(key
->dtak_data
+ size
)) = agg
->dtag_initial
;
2345 agg
->dtag_aggregate((uint64_t *)(key
->dtak_data
+ size
), expr
, arg
);
2349 * Given consumer state, this routine finds a speculation in the INACTIVE
2350 * state and transitions it into the ACTIVE state. If there is no speculation
2351 * in the INACTIVE state, 0 is returned. In this case, no error counter is
2352 * incremented -- it is up to the caller to take appropriate action.
2355 dtrace_speculation(dtrace_state_t
*state
)
2358 dtrace_speculation_state_t current
;
2359 uint32_t *stat
= &state
->dts_speculations_unavail
, count
;
2361 while (i
< state
->dts_nspeculations
) {
2362 dtrace_speculation_t
*spec
= &state
->dts_speculations
[i
];
2364 current
= spec
->dtsp_state
;
2366 if (current
!= DTRACESPEC_INACTIVE
) {
2367 if (current
== DTRACESPEC_COMMITTINGMANY
||
2368 current
== DTRACESPEC_COMMITTING
||
2369 current
== DTRACESPEC_DISCARDING
)
2370 stat
= &state
->dts_speculations_busy
;
2375 if (dtrace_cas32((uint32_t *)&spec
->dtsp_state
,
2376 current
, DTRACESPEC_ACTIVE
) == current
)
2381 * We couldn't find a speculation. If we found as much as a single
2382 * busy speculation buffer, we'll attribute this failure as "busy"
2383 * instead of "unavail".
2387 } while (dtrace_cas32(stat
, count
, count
+ 1) != count
);
2393 * This routine commits an active speculation. If the specified speculation
2394 * is not in a valid state to perform a commit(), this routine will silently do
2395 * nothing. The state of the specified speculation is transitioned according
2396 * to the state transition diagram outlined in <sys/dtrace_impl.h>
2399 dtrace_speculation_commit(dtrace_state_t
*state
, processorid_t cpu
,
2400 dtrace_specid_t which
)
2402 dtrace_speculation_t
*spec
;
2403 dtrace_buffer_t
*src
, *dest
;
2404 uintptr_t daddr
, saddr
, dlimit
;
2405 dtrace_speculation_state_t current
, new;
2411 if (which
> state
->dts_nspeculations
) {
2412 cpu_core
[cpu
].cpuc_dtrace_flags
|= CPU_DTRACE_ILLOP
;
2416 spec
= &state
->dts_speculations
[which
- 1];
2417 src
= &spec
->dtsp_buffer
[cpu
];
2418 dest
= &state
->dts_buffer
[cpu
];
2421 current
= spec
->dtsp_state
;
2423 if (current
== DTRACESPEC_COMMITTINGMANY
)
2427 case DTRACESPEC_INACTIVE
:
2428 case DTRACESPEC_DISCARDING
:
2431 case DTRACESPEC_COMMITTING
:
2433 * This is only possible if we are (a) commit()'ing
2434 * without having done a prior speculate() on this CPU
2435 * and (b) racing with another commit() on a different
2436 * CPU. There's nothing to do -- we just assert that
2439 ASSERT(src
->dtb_offset
== 0);
2442 case DTRACESPEC_ACTIVE
:
2443 new = DTRACESPEC_COMMITTING
;
2446 case DTRACESPEC_ACTIVEONE
:
2448 * This speculation is active on one CPU. If our
2449 * buffer offset is non-zero, we know that the one CPU
2450 * must be us. Otherwise, we are committing on a
2451 * different CPU from the speculate(), and we must
2452 * rely on being asynchronously cleaned.
2454 if (src
->dtb_offset
!= 0) {
2455 new = DTRACESPEC_COMMITTING
;
2460 case DTRACESPEC_ACTIVEMANY
:
2461 new = DTRACESPEC_COMMITTINGMANY
;
2467 } while (dtrace_cas32((uint32_t *)&spec
->dtsp_state
,
2468 current
, new) != current
);
2471 * We have set the state to indicate that we are committing this
2472 * speculation. Now reserve the necessary space in the destination
2475 if ((offs
= dtrace_buffer_reserve(dest
, src
->dtb_offset
,
2476 sizeof (uint64_t), state
, NULL
)) < 0) {
2477 dtrace_buffer_drop(dest
);
2482 * We have the space; copy the buffer across. (Note that this is a
2483 * highly subobtimal bcopy(); in the unlikely event that this becomes
2484 * a serious performance issue, a high-performance DTrace-specific
2485 * bcopy() should obviously be invented.)
2487 daddr
= (uintptr_t)dest
->dtb_tomax
+ offs
;
2488 dlimit
= daddr
+ src
->dtb_offset
;
2489 saddr
= (uintptr_t)src
->dtb_tomax
;
2492 * First, the aligned portion.
2494 while (dlimit
- daddr
>= sizeof (uint64_t)) {
2495 *((uint64_t *)daddr
) = *((uint64_t *)saddr
);
2497 daddr
+= sizeof (uint64_t);
2498 saddr
+= sizeof (uint64_t);
2502 * Now any left-over bit...
2504 while (dlimit
- daddr
)
2505 *((uint8_t *)daddr
++) = *((uint8_t *)saddr
++);
2508 * Finally, commit the reserved space in the destination buffer.
2510 dest
->dtb_offset
= offs
+ src
->dtb_offset
;
2514 * If we're lucky enough to be the only active CPU on this speculation
2515 * buffer, we can just set the state back to DTRACESPEC_INACTIVE.
2517 if (current
== DTRACESPEC_ACTIVE
||
2518 (current
== DTRACESPEC_ACTIVEONE
&& new == DTRACESPEC_COMMITTING
)) {
2519 uint32_t rval
= dtrace_cas32((uint32_t *)&spec
->dtsp_state
,
2520 DTRACESPEC_COMMITTING
, DTRACESPEC_INACTIVE
);
2522 ASSERT(rval
== DTRACESPEC_COMMITTING
);
2525 src
->dtb_offset
= 0;
2526 src
->dtb_xamot_drops
+= src
->dtb_drops
;
2531 * This routine discards an active speculation. If the specified speculation
2532 * is not in a valid state to perform a discard(), this routine will silently
2533 * do nothing. The state of the specified speculation is transitioned
2534 * according to the state transition diagram outlined in <sys/dtrace_impl.h>
2537 dtrace_speculation_discard(dtrace_state_t
*state
, processorid_t cpu
,
2538 dtrace_specid_t which
)
2540 dtrace_speculation_t
*spec
;
2541 dtrace_speculation_state_t current
, new;
2542 dtrace_buffer_t
*buf
;
2547 if (which
> state
->dts_nspeculations
) {
2548 cpu_core
[cpu
].cpuc_dtrace_flags
|= CPU_DTRACE_ILLOP
;
2552 spec
= &state
->dts_speculations
[which
- 1];
2553 buf
= &spec
->dtsp_buffer
[cpu
];
2556 current
= spec
->dtsp_state
;
2559 case DTRACESPEC_INACTIVE
:
2560 case DTRACESPEC_COMMITTINGMANY
:
2561 case DTRACESPEC_COMMITTING
:
2562 case DTRACESPEC_DISCARDING
:
2565 case DTRACESPEC_ACTIVE
:
2566 case DTRACESPEC_ACTIVEMANY
:
2567 new = DTRACESPEC_DISCARDING
;
2570 case DTRACESPEC_ACTIVEONE
:
2571 if (buf
->dtb_offset
!= 0) {
2572 new = DTRACESPEC_INACTIVE
;
2574 new = DTRACESPEC_DISCARDING
;
2581 } while (dtrace_cas32((uint32_t *)&spec
->dtsp_state
,
2582 current
, new) != current
);
2584 buf
->dtb_offset
= 0;
2589 * Note: not called from probe context. This function is called
2590 * asynchronously from cross call context to clean any speculations that are
2591 * in the COMMITTINGMANY or DISCARDING states. These speculations may not be
2592 * transitioned back to the INACTIVE state until all CPUs have cleaned the
2596 dtrace_speculation_clean_here(dtrace_state_t
*state
)
2598 dtrace_icookie_t cookie
;
2599 processorid_t cpu
= CPU
->cpu_id
;
2600 dtrace_buffer_t
*dest
= &state
->dts_buffer
[cpu
];
2603 cookie
= dtrace_interrupt_disable();
2605 if (dest
->dtb_tomax
== NULL
) {
2606 dtrace_interrupt_enable(cookie
);
2610 for (i
= 0; i
< state
->dts_nspeculations
; i
++) {
2611 dtrace_speculation_t
*spec
= &state
->dts_speculations
[i
];
2612 dtrace_buffer_t
*src
= &spec
->dtsp_buffer
[cpu
];
2614 if (src
->dtb_tomax
== NULL
)
2617 if (spec
->dtsp_state
== DTRACESPEC_DISCARDING
) {
2618 src
->dtb_offset
= 0;
2622 if (spec
->dtsp_state
!= DTRACESPEC_COMMITTINGMANY
)
2625 if (src
->dtb_offset
== 0)
2628 dtrace_speculation_commit(state
, cpu
, i
+ 1);
2631 dtrace_interrupt_enable(cookie
);
2635 * Note: not called from probe context. This function is called
2636 * asynchronously (and at a regular interval) to clean any speculations that
2637 * are in the COMMITTINGMANY or DISCARDING states. If it discovers that there
2638 * is work to be done, it cross calls all CPUs to perform that work;
2639 * COMMITMANY and DISCARDING speculations may not be transitioned back to the
2640 * INACTIVE state until they have been cleaned by all CPUs.
2643 dtrace_speculation_clean(dtrace_state_t
*state
)
2648 for (i
= 0; i
< state
->dts_nspeculations
; i
++) {
2649 dtrace_speculation_t
*spec
= &state
->dts_speculations
[i
];
2651 ASSERT(!spec
->dtsp_cleaning
);
2653 if (spec
->dtsp_state
!= DTRACESPEC_DISCARDING
&&
2654 spec
->dtsp_state
!= DTRACESPEC_COMMITTINGMANY
)
2658 spec
->dtsp_cleaning
= 1;
2664 dtrace_xcall(DTRACE_CPUALL
,
2665 (dtrace_xcall_t
)dtrace_speculation_clean_here
, state
);
2668 * We now know that all CPUs have committed or discarded their
2669 * speculation buffers, as appropriate. We can now set the state
2672 for (i
= 0; i
< state
->dts_nspeculations
; i
++) {
2673 dtrace_speculation_t
*spec
= &state
->dts_speculations
[i
];
2674 dtrace_speculation_state_t current
, new;
2676 if (!spec
->dtsp_cleaning
)
2679 current
= spec
->dtsp_state
;
2680 ASSERT(current
== DTRACESPEC_DISCARDING
||
2681 current
== DTRACESPEC_COMMITTINGMANY
);
2683 new = DTRACESPEC_INACTIVE
;
2685 rv
= dtrace_cas32((uint32_t *)&spec
->dtsp_state
, current
, new);
2686 ASSERT(rv
== current
);
2687 spec
->dtsp_cleaning
= 0;
2692 * Called as part of a speculate() to get the speculative buffer associated
2693 * with a given speculation. Returns NULL if the specified speculation is not
2694 * in an ACTIVE state. If the speculation is in the ACTIVEONE state -- and
2695 * the active CPU is not the specified CPU -- the speculation will be
2696 * atomically transitioned into the ACTIVEMANY state.
2698 static dtrace_buffer_t
*
2699 dtrace_speculation_buffer(dtrace_state_t
*state
, processorid_t cpuid
,
2700 dtrace_specid_t which
)
2702 dtrace_speculation_t
*spec
;
2703 dtrace_speculation_state_t current
, new;
2704 dtrace_buffer_t
*buf
;
2709 if (which
> state
->dts_nspeculations
) {
2710 cpu_core
[cpuid
].cpuc_dtrace_flags
|= CPU_DTRACE_ILLOP
;
2714 spec
= &state
->dts_speculations
[which
- 1];
2715 buf
= &spec
->dtsp_buffer
[cpuid
];
2718 current
= spec
->dtsp_state
;
2721 case DTRACESPEC_INACTIVE
:
2722 case DTRACESPEC_COMMITTINGMANY
:
2723 case DTRACESPEC_DISCARDING
:
2726 case DTRACESPEC_COMMITTING
:
2727 ASSERT(buf
->dtb_offset
== 0);
2730 case DTRACESPEC_ACTIVEONE
:
2732 * This speculation is currently active on one CPU.
2733 * Check the offset in the buffer; if it's non-zero,
2734 * that CPU must be us (and we leave the state alone).
2735 * If it's zero, assume that we're starting on a new
2736 * CPU -- and change the state to indicate that the
2737 * speculation is active on more than one CPU.
2739 if (buf
->dtb_offset
!= 0)
2742 new = DTRACESPEC_ACTIVEMANY
;
2745 case DTRACESPEC_ACTIVEMANY
:
2748 case DTRACESPEC_ACTIVE
:
2749 new = DTRACESPEC_ACTIVEONE
;
2755 } while (dtrace_cas32((uint32_t *)&spec
->dtsp_state
,
2756 current
, new) != current
);
2758 ASSERT(new == DTRACESPEC_ACTIVEONE
|| new == DTRACESPEC_ACTIVEMANY
);
2763 * Return a string. In the event that the user lacks the privilege to access
2764 * arbitrary kernel memory, we copy the string out to scratch memory so that we
2765 * don't fail access checking.
2767 * dtrace_dif_variable() uses this routine as a helper for various
2768 * builtin values such as 'execname' and 'probefunc.'
2771 dtrace_dif_varstr(uintptr_t addr
, dtrace_state_t
*state
,
2772 dtrace_mstate_t
*mstate
)
2774 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
2779 * The easy case: this probe is allowed to read all of memory, so
2780 * we can just return this as a vanilla pointer.
2782 if ((mstate
->dtms_access
& DTRACE_ACCESS_KERNEL
) != 0)
2786 * This is the tougher case: we copy the string in question from
2787 * kernel memory into scratch memory and return it that way: this
2788 * ensures that we won't trip up when access checking tests the
2789 * BYREF return value.
2791 strsz
= dtrace_strlen((char *)addr
, size
) + 1;
2793 if (mstate
->dtms_scratch_ptr
+ strsz
>
2794 mstate
->dtms_scratch_base
+ mstate
->dtms_scratch_size
) {
2795 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
2799 dtrace_strcpy((const void *)addr
, (void *)mstate
->dtms_scratch_ptr
,
2801 ret
= mstate
->dtms_scratch_ptr
;
2802 mstate
->dtms_scratch_ptr
+= strsz
;
2807 * This function implements the DIF emulator's variable lookups. The emulator
2808 * passes a reserved variable identifier and optional built-in array index.
2811 dtrace_dif_variable(dtrace_mstate_t
*mstate
, dtrace_state_t
*state
, uint64_t v
,
2815 * If we're accessing one of the uncached arguments, we'll turn this
2816 * into a reference in the args array.
2818 if (v
>= DIF_VAR_ARG0
&& v
<= DIF_VAR_ARG9
) {
2819 ndx
= v
- DIF_VAR_ARG0
;
2825 if (!(mstate
->dtms_access
& DTRACE_ACCESS_ARGS
)) {
2826 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
|=
2831 ASSERT(mstate
->dtms_present
& DTRACE_MSTATE_ARGS
);
2832 if (ndx
>= sizeof (mstate
->dtms_arg
) /
2833 sizeof (mstate
->dtms_arg
[0])) {
2834 int aframes
= mstate
->dtms_probe
->dtpr_aframes
+ 2;
2835 dtrace_provider_t
*pv
;
2838 pv
= mstate
->dtms_probe
->dtpr_provider
;
2839 if (pv
->dtpv_pops
.dtps_getargval
!= NULL
)
2840 val
= pv
->dtpv_pops
.dtps_getargval(pv
->dtpv_arg
,
2841 mstate
->dtms_probe
->dtpr_id
,
2842 mstate
->dtms_probe
->dtpr_arg
, ndx
, aframes
);
2844 val
= dtrace_getarg(ndx
, aframes
);
2847 * This is regrettably required to keep the compiler
2848 * from tail-optimizing the call to dtrace_getarg().
2849 * The condition always evaluates to true, but the
2850 * compiler has no way of figuring that out a priori.
2851 * (None of this would be necessary if the compiler
2852 * could be relied upon to _always_ tail-optimize
2853 * the call to dtrace_getarg() -- but it can't.)
2855 if (mstate
->dtms_probe
!= NULL
)
2861 return (mstate
->dtms_arg
[ndx
]);
2863 case DIF_VAR_UREGS
: {
2866 if (!dtrace_priv_proc(state
, mstate
))
2869 if ((lwp
= curthread
->t_lwp
) == NULL
) {
2870 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
2871 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= NULL
;
2875 return (dtrace_getreg(lwp
->lwp_regs
, ndx
));
2878 case DIF_VAR_VMREGS
: {
2881 if (!dtrace_priv_kernel(state
))
2884 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
2886 rval
= dtrace_getvmreg(ndx
,
2887 &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
);
2889 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
2894 case DIF_VAR_CURTHREAD
:
2895 if (!dtrace_priv_kernel(state
))
2897 return ((uint64_t)(uintptr_t)curthread
);
2899 case DIF_VAR_TIMESTAMP
:
2900 if (!(mstate
->dtms_present
& DTRACE_MSTATE_TIMESTAMP
)) {
2901 mstate
->dtms_timestamp
= dtrace_gethrtime();
2902 mstate
->dtms_present
|= DTRACE_MSTATE_TIMESTAMP
;
2904 return (mstate
->dtms_timestamp
);
2906 case DIF_VAR_VTIMESTAMP
:
2907 ASSERT(dtrace_vtime_references
!= 0);
2908 return (curthread
->t_dtrace_vtime
);
2910 case DIF_VAR_WALLTIMESTAMP
:
2911 if (!(mstate
->dtms_present
& DTRACE_MSTATE_WALLTIMESTAMP
)) {
2912 mstate
->dtms_walltimestamp
= dtrace_gethrestime();
2913 mstate
->dtms_present
|= DTRACE_MSTATE_WALLTIMESTAMP
;
2915 return (mstate
->dtms_walltimestamp
);
2918 if (!dtrace_priv_kernel(state
))
2920 if (!(mstate
->dtms_present
& DTRACE_MSTATE_IPL
)) {
2921 mstate
->dtms_ipl
= dtrace_getipl();
2922 mstate
->dtms_present
|= DTRACE_MSTATE_IPL
;
2924 return (mstate
->dtms_ipl
);
2927 ASSERT(mstate
->dtms_present
& DTRACE_MSTATE_EPID
);
2928 return (mstate
->dtms_epid
);
2931 ASSERT(mstate
->dtms_present
& DTRACE_MSTATE_PROBE
);
2932 return (mstate
->dtms_probe
->dtpr_id
);
2934 case DIF_VAR_STACKDEPTH
:
2935 if (!dtrace_priv_kernel(state
))
2937 if (!(mstate
->dtms_present
& DTRACE_MSTATE_STACKDEPTH
)) {
2938 int aframes
= mstate
->dtms_probe
->dtpr_aframes
+ 2;
2940 mstate
->dtms_stackdepth
= dtrace_getstackdepth(aframes
);
2941 mstate
->dtms_present
|= DTRACE_MSTATE_STACKDEPTH
;
2943 return (mstate
->dtms_stackdepth
);
2945 case DIF_VAR_USTACKDEPTH
:
2946 if (!dtrace_priv_proc(state
, mstate
))
2948 if (!(mstate
->dtms_present
& DTRACE_MSTATE_USTACKDEPTH
)) {
2950 * See comment in DIF_VAR_PID.
2952 if (DTRACE_ANCHORED(mstate
->dtms_probe
) &&
2954 mstate
->dtms_ustackdepth
= 0;
2956 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
2957 mstate
->dtms_ustackdepth
=
2958 dtrace_getustackdepth();
2959 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
2961 mstate
->dtms_present
|= DTRACE_MSTATE_USTACKDEPTH
;
2963 return (mstate
->dtms_ustackdepth
);
2965 case DIF_VAR_CALLER
:
2966 if (!dtrace_priv_kernel(state
))
2968 if (!(mstate
->dtms_present
& DTRACE_MSTATE_CALLER
)) {
2969 int aframes
= mstate
->dtms_probe
->dtpr_aframes
+ 2;
2971 if (!DTRACE_ANCHORED(mstate
->dtms_probe
)) {
2973 * If this is an unanchored probe, we are
2974 * required to go through the slow path:
2975 * dtrace_caller() only guarantees correct
2976 * results for anchored probes.
2980 dtrace_getpcstack(caller
, 2, aframes
,
2981 (uint32_t *)(uintptr_t)mstate
->dtms_arg
[0]);
2982 mstate
->dtms_caller
= caller
[1];
2983 } else if ((mstate
->dtms_caller
=
2984 dtrace_caller(aframes
)) == -1) {
2986 * We have failed to do this the quick way;
2987 * we must resort to the slower approach of
2988 * calling dtrace_getpcstack().
2992 dtrace_getpcstack(&caller
, 1, aframes
, NULL
);
2993 mstate
->dtms_caller
= caller
;
2996 mstate
->dtms_present
|= DTRACE_MSTATE_CALLER
;
2998 return (mstate
->dtms_caller
);
3000 case DIF_VAR_UCALLER
:
3001 if (!dtrace_priv_proc(state
, mstate
))
3004 if (!(mstate
->dtms_present
& DTRACE_MSTATE_UCALLER
)) {
3008 * dtrace_getupcstack() fills in the first uint64_t
3009 * with the current PID. The second uint64_t will
3010 * be the program counter at user-level. The third
3011 * uint64_t will contain the caller, which is what
3015 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3016 dtrace_getupcstack(ustack
, 3);
3017 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3018 mstate
->dtms_ucaller
= ustack
[2];
3019 mstate
->dtms_present
|= DTRACE_MSTATE_UCALLER
;
3022 return (mstate
->dtms_ucaller
);
3024 case DIF_VAR_PROBEPROV
:
3025 ASSERT(mstate
->dtms_present
& DTRACE_MSTATE_PROBE
);
3026 return (dtrace_dif_varstr(
3027 (uintptr_t)mstate
->dtms_probe
->dtpr_provider
->dtpv_name
,
3030 case DIF_VAR_PROBEMOD
:
3031 ASSERT(mstate
->dtms_present
& DTRACE_MSTATE_PROBE
);
3032 return (dtrace_dif_varstr(
3033 (uintptr_t)mstate
->dtms_probe
->dtpr_mod
,
3036 case DIF_VAR_PROBEFUNC
:
3037 ASSERT(mstate
->dtms_present
& DTRACE_MSTATE_PROBE
);
3038 return (dtrace_dif_varstr(
3039 (uintptr_t)mstate
->dtms_probe
->dtpr_func
,
3042 case DIF_VAR_PROBENAME
:
3043 ASSERT(mstate
->dtms_present
& DTRACE_MSTATE_PROBE
);
3044 return (dtrace_dif_varstr(
3045 (uintptr_t)mstate
->dtms_probe
->dtpr_name
,
3049 if (!dtrace_priv_proc(state
, mstate
))
3053 * Note that we are assuming that an unanchored probe is
3054 * always due to a high-level interrupt. (And we're assuming
3055 * that there is only a single high level interrupt.)
3057 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
3058 return (pid0
.pid_id
);
3061 * It is always safe to dereference one's own t_procp pointer:
3062 * it always points to a valid, allocated proc structure.
3063 * Further, it is always safe to dereference the p_pidp member
3064 * of one's own proc structure. (These are truisms becuase
3065 * threads and processes don't clean up their own state --
3066 * they leave that task to whomever reaps them.)
3068 return ((uint64_t)curthread
->t_procp
->p_pidp
->pid_id
);
3071 if (!dtrace_priv_proc(state
, mstate
))
3075 * See comment in DIF_VAR_PID.
3077 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
3078 return (pid0
.pid_id
);
3081 * It is always safe to dereference one's own t_procp pointer:
3082 * it always points to a valid, allocated proc structure.
3083 * (This is true because threads don't clean up their own
3084 * state -- they leave that task to whomever reaps them.)
3086 return ((uint64_t)curthread
->t_procp
->p_ppid
);
3090 * See comment in DIF_VAR_PID.
3092 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
3095 return ((uint64_t)curthread
->t_tid
);
3097 case DIF_VAR_EXECNAME
:
3098 if (!dtrace_priv_proc(state
, mstate
))
3102 * See comment in DIF_VAR_PID.
3104 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
3105 return ((uint64_t)(uintptr_t)p0
.p_user
.u_comm
);
3108 * It is always safe to dereference one's own t_procp pointer:
3109 * it always points to a valid, allocated proc structure.
3110 * (This is true because threads don't clean up their own
3111 * state -- they leave that task to whomever reaps them.)
3113 return (dtrace_dif_varstr(
3114 (uintptr_t)curthread
->t_procp
->p_user
.u_comm
,
3117 case DIF_VAR_ZONENAME
:
3118 if (!dtrace_priv_proc(state
, mstate
))
3122 * See comment in DIF_VAR_PID.
3124 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
3125 return ((uint64_t)(uintptr_t)p0
.p_zone
->zone_name
);
3128 * It is always safe to dereference one's own t_procp pointer:
3129 * it always points to a valid, allocated proc structure.
3130 * (This is true because threads don't clean up their own
3131 * state -- they leave that task to whomever reaps them.)
3133 return (dtrace_dif_varstr(
3134 (uintptr_t)curthread
->t_procp
->p_zone
->zone_name
,
3138 if (!dtrace_priv_proc(state
, mstate
))
3142 * See comment in DIF_VAR_PID.
3144 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
3145 return ((uint64_t)p0
.p_cred
->cr_uid
);
3148 * It is always safe to dereference one's own t_procp pointer:
3149 * it always points to a valid, allocated proc structure.
3150 * (This is true because threads don't clean up their own
3151 * state -- they leave that task to whomever reaps them.)
3153 * Additionally, it is safe to dereference one's own process
3154 * credential, since this is never NULL after process birth.
3156 return ((uint64_t)curthread
->t_procp
->p_cred
->cr_uid
);
3159 if (!dtrace_priv_proc(state
, mstate
))
3163 * See comment in DIF_VAR_PID.
3165 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
3166 return ((uint64_t)p0
.p_cred
->cr_gid
);
3169 * It is always safe to dereference one's own t_procp pointer:
3170 * it always points to a valid, allocated proc structure.
3171 * (This is true because threads don't clean up their own
3172 * state -- they leave that task to whomever reaps them.)
3174 * Additionally, it is safe to dereference one's own process
3175 * credential, since this is never NULL after process birth.
3177 return ((uint64_t)curthread
->t_procp
->p_cred
->cr_gid
);
3179 case DIF_VAR_ERRNO
: {
3181 if (!dtrace_priv_proc(state
, mstate
))
3185 * See comment in DIF_VAR_PID.
3187 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
3191 * It is always safe to dereference one's own t_lwp pointer in
3192 * the event that this pointer is non-NULL. (This is true
3193 * because threads and lwps don't clean up their own state --
3194 * they leave that task to whomever reaps them.)
3196 if ((lwp
= curthread
->t_lwp
) == NULL
)
3199 return ((uint64_t)lwp
->lwp_errno
);
3202 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
);
3208 * Emulate the execution of DTrace ID subroutines invoked by the call opcode.
3209 * Notice that we don't bother validating the proper number of arguments or
3210 * their types in the tuple stack. This isn't needed because all argument
3211 * interpretation is safe because of our load safety -- the worst that can
3212 * happen is that a bogus program can obtain bogus results.
3215 dtrace_dif_subr(uint_t subr
, uint_t rd
, uint64_t *regs
,
3216 dtrace_key_t
*tupregs
, int nargs
,
3217 dtrace_mstate_t
*mstate
, dtrace_state_t
*state
)
3219 volatile uint16_t *flags
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
3220 volatile uintptr_t *illval
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
;
3221 dtrace_vstate_t
*vstate
= &state
->dts_vstate
;
3235 regs
[rd
] = (dtrace_gethrtime() * 2416 + 374441) % 1771875;
3238 case DIF_SUBR_MUTEX_OWNED
:
3239 if (!dtrace_canload(tupregs
[0].dttk_value
, sizeof (kmutex_t
),
3245 m
.mx
= dtrace_load64(tupregs
[0].dttk_value
);
3246 if (MUTEX_TYPE_ADAPTIVE(&m
.mi
))
3247 regs
[rd
] = MUTEX_OWNER(&m
.mi
) != MUTEX_NO_OWNER
;
3249 regs
[rd
] = LOCK_HELD(&m
.mi
.m_spin
.m_spinlock
);
3252 case DIF_SUBR_MUTEX_OWNER
:
3253 if (!dtrace_canload(tupregs
[0].dttk_value
, sizeof (kmutex_t
),
3259 m
.mx
= dtrace_load64(tupregs
[0].dttk_value
);
3260 if (MUTEX_TYPE_ADAPTIVE(&m
.mi
) &&
3261 MUTEX_OWNER(&m
.mi
) != MUTEX_NO_OWNER
)
3262 regs
[rd
] = (uintptr_t)MUTEX_OWNER(&m
.mi
);
3267 case DIF_SUBR_MUTEX_TYPE_ADAPTIVE
:
3268 if (!dtrace_canload(tupregs
[0].dttk_value
, sizeof (kmutex_t
),
3274 m
.mx
= dtrace_load64(tupregs
[0].dttk_value
);
3275 regs
[rd
] = MUTEX_TYPE_ADAPTIVE(&m
.mi
);
3278 case DIF_SUBR_MUTEX_TYPE_SPIN
:
3279 if (!dtrace_canload(tupregs
[0].dttk_value
, sizeof (kmutex_t
),
3285 m
.mx
= dtrace_load64(tupregs
[0].dttk_value
);
3286 regs
[rd
] = MUTEX_TYPE_SPIN(&m
.mi
);
3289 case DIF_SUBR_RW_READ_HELD
: {
3292 if (!dtrace_canload(tupregs
[0].dttk_value
, sizeof (uintptr_t),
3298 r
.rw
= dtrace_loadptr(tupregs
[0].dttk_value
);
3299 regs
[rd
] = _RW_READ_HELD(&r
.ri
, tmp
);
3303 case DIF_SUBR_RW_WRITE_HELD
:
3304 if (!dtrace_canload(tupregs
[0].dttk_value
, sizeof (krwlock_t
),
3310 r
.rw
= dtrace_loadptr(tupregs
[0].dttk_value
);
3311 regs
[rd
] = _RW_WRITE_HELD(&r
.ri
);
3314 case DIF_SUBR_RW_ISWRITER
:
3315 if (!dtrace_canload(tupregs
[0].dttk_value
, sizeof (krwlock_t
),
3321 r
.rw
= dtrace_loadptr(tupregs
[0].dttk_value
);
3322 regs
[rd
] = _RW_ISWRITER(&r
.ri
);
3325 case DIF_SUBR_BCOPY
: {
3327 * We need to be sure that the destination is in the scratch
3328 * region -- no other region is allowed.
3330 uintptr_t src
= tupregs
[0].dttk_value
;
3331 uintptr_t dest
= tupregs
[1].dttk_value
;
3332 size_t size
= tupregs
[2].dttk_value
;
3334 if (!dtrace_inscratch(dest
, size
, mstate
)) {
3335 *flags
|= CPU_DTRACE_BADADDR
;
3340 if (!dtrace_canload(src
, size
, mstate
, vstate
)) {
3345 dtrace_bcopy((void *)src
, (void *)dest
, size
);
3349 case DIF_SUBR_ALLOCA
:
3350 case DIF_SUBR_COPYIN
: {
3351 uintptr_t dest
= P2ROUNDUP(mstate
->dtms_scratch_ptr
, 8);
3353 tupregs
[subr
== DIF_SUBR_ALLOCA
? 0 : 1].dttk_value
;
3354 size_t scratch_size
= (dest
- mstate
->dtms_scratch_ptr
) + size
;
3357 * This action doesn't require any credential checks since
3358 * probes will not activate in user contexts to which the
3359 * enabling user does not have permissions.
3363 * Rounding up the user allocation size could have overflowed
3364 * a large, bogus allocation (like -1ULL) to 0.
3366 if (scratch_size
< size
||
3367 !DTRACE_INSCRATCH(mstate
, scratch_size
)) {
3368 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3373 if (subr
== DIF_SUBR_COPYIN
) {
3374 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3375 dtrace_copyin(tupregs
[0].dttk_value
, dest
, size
, flags
);
3376 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3379 mstate
->dtms_scratch_ptr
+= scratch_size
;
3384 case DIF_SUBR_COPYINTO
: {
3385 uint64_t size
= tupregs
[1].dttk_value
;
3386 uintptr_t dest
= tupregs
[2].dttk_value
;
3389 * This action doesn't require any credential checks since
3390 * probes will not activate in user contexts to which the
3391 * enabling user does not have permissions.
3393 if (!dtrace_inscratch(dest
, size
, mstate
)) {
3394 *flags
|= CPU_DTRACE_BADADDR
;
3399 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3400 dtrace_copyin(tupregs
[0].dttk_value
, dest
, size
, flags
);
3401 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3405 case DIF_SUBR_COPYINSTR
: {
3406 uintptr_t dest
= mstate
->dtms_scratch_ptr
;
3407 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
3409 if (nargs
> 1 && tupregs
[1].dttk_value
< size
)
3410 size
= tupregs
[1].dttk_value
+ 1;
3413 * This action doesn't require any credential checks since
3414 * probes will not activate in user contexts to which the
3415 * enabling user does not have permissions.
3417 if (!DTRACE_INSCRATCH(mstate
, size
)) {
3418 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3423 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3424 dtrace_copyinstr(tupregs
[0].dttk_value
, dest
, size
, flags
);
3425 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3427 ((char *)dest
)[size
- 1] = '\0';
3428 mstate
->dtms_scratch_ptr
+= size
;
3433 case DIF_SUBR_MSGSIZE
:
3434 case DIF_SUBR_MSGDSIZE
: {
3435 uintptr_t baddr
= tupregs
[0].dttk_value
, daddr
;
3436 uintptr_t wptr
, rptr
;
3440 while (baddr
!= NULL
&& !(*flags
& CPU_DTRACE_FAULT
)) {
3442 if (!dtrace_canload(baddr
, sizeof (mblk_t
), mstate
,
3448 wptr
= dtrace_loadptr(baddr
+
3449 offsetof(mblk_t
, b_wptr
));
3451 rptr
= dtrace_loadptr(baddr
+
3452 offsetof(mblk_t
, b_rptr
));
3455 *flags
|= CPU_DTRACE_BADADDR
;
3456 *illval
= tupregs
[0].dttk_value
;
3460 daddr
= dtrace_loadptr(baddr
+
3461 offsetof(mblk_t
, b_datap
));
3463 baddr
= dtrace_loadptr(baddr
+
3464 offsetof(mblk_t
, b_cont
));
3467 * We want to prevent against denial-of-service here,
3468 * so we're only going to search the list for
3469 * dtrace_msgdsize_max mblks.
3471 if (cont
++ > dtrace_msgdsize_max
) {
3472 *flags
|= CPU_DTRACE_ILLOP
;
3476 if (subr
== DIF_SUBR_MSGDSIZE
) {
3477 if (dtrace_load8(daddr
+
3478 offsetof(dblk_t
, db_type
)) != M_DATA
)
3482 count
+= wptr
- rptr
;
3485 if (!(*flags
& CPU_DTRACE_FAULT
))
3491 case DIF_SUBR_PROGENYOF
: {
3492 pid_t pid
= tupregs
[0].dttk_value
;
3496 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3498 for (p
= curthread
->t_procp
; p
!= NULL
; p
= p
->p_parent
) {
3499 if (p
->p_pidp
->pid_id
== pid
) {
3505 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3511 case DIF_SUBR_SPECULATION
:
3512 regs
[rd
] = dtrace_speculation(state
);
3515 case DIF_SUBR_COPYOUT
: {
3516 uintptr_t kaddr
= tupregs
[0].dttk_value
;
3517 uintptr_t uaddr
= tupregs
[1].dttk_value
;
3518 uint64_t size
= tupregs
[2].dttk_value
;
3520 if (!dtrace_destructive_disallow
&&
3521 dtrace_priv_proc_control(state
, mstate
) &&
3522 !dtrace_istoxic(kaddr
, size
)) {
3523 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3524 dtrace_copyout(kaddr
, uaddr
, size
, flags
);
3525 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3530 case DIF_SUBR_COPYOUTSTR
: {
3531 uintptr_t kaddr
= tupregs
[0].dttk_value
;
3532 uintptr_t uaddr
= tupregs
[1].dttk_value
;
3533 uint64_t size
= tupregs
[2].dttk_value
;
3535 if (!dtrace_destructive_disallow
&&
3536 dtrace_priv_proc_control(state
, mstate
) &&
3537 !dtrace_istoxic(kaddr
, size
)) {
3538 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3539 dtrace_copyoutstr(kaddr
, uaddr
, size
, flags
);
3540 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3545 case DIF_SUBR_STRLEN
: {
3547 uintptr_t addr
= (uintptr_t)tupregs
[0].dttk_value
;
3548 sz
= dtrace_strlen((char *)addr
,
3549 state
->dts_options
[DTRACEOPT_STRSIZE
]);
3551 if (!dtrace_canload(addr
, sz
+ 1, mstate
, vstate
)) {
3561 case DIF_SUBR_STRCHR
:
3562 case DIF_SUBR_STRRCHR
: {
3564 * We're going to iterate over the string looking for the
3565 * specified character. We will iterate until we have reached
3566 * the string length or we have found the character. If this
3567 * is DIF_SUBR_STRRCHR, we will look for the last occurrence
3568 * of the specified character instead of the first.
3570 uintptr_t saddr
= tupregs
[0].dttk_value
;
3571 uintptr_t addr
= tupregs
[0].dttk_value
;
3572 uintptr_t limit
= addr
+ state
->dts_options
[DTRACEOPT_STRSIZE
];
3573 char c
, target
= (char)tupregs
[1].dttk_value
;
3575 for (regs
[rd
] = NULL
; addr
< limit
; addr
++) {
3576 if ((c
= dtrace_load8(addr
)) == target
) {
3579 if (subr
== DIF_SUBR_STRCHR
)
3587 if (!dtrace_canload(saddr
, addr
- saddr
, mstate
, vstate
)) {
3595 case DIF_SUBR_STRSTR
:
3596 case DIF_SUBR_INDEX
:
3597 case DIF_SUBR_RINDEX
: {
3599 * We're going to iterate over the string looking for the
3600 * specified string. We will iterate until we have reached
3601 * the string length or we have found the string. (Yes, this
3602 * is done in the most naive way possible -- but considering
3603 * that the string we're searching for is likely to be
3604 * relatively short, the complexity of Rabin-Karp or similar
3605 * hardly seems merited.)
3607 char *addr
= (char *)(uintptr_t)tupregs
[0].dttk_value
;
3608 char *substr
= (char *)(uintptr_t)tupregs
[1].dttk_value
;
3609 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
3610 size_t len
= dtrace_strlen(addr
, size
);
3611 size_t sublen
= dtrace_strlen(substr
, size
);
3612 char *limit
= addr
+ len
, *orig
= addr
;
3613 int notfound
= subr
== DIF_SUBR_STRSTR
? 0 : -1;
3616 regs
[rd
] = notfound
;
3618 if (!dtrace_canload((uintptr_t)addr
, len
+ 1, mstate
, vstate
)) {
3623 if (!dtrace_canload((uintptr_t)substr
, sublen
+ 1, mstate
,
3630 * strstr() and index()/rindex() have similar semantics if
3631 * both strings are the empty string: strstr() returns a
3632 * pointer to the (empty) string, and index() and rindex()
3633 * both return index 0 (regardless of any position argument).
3635 if (sublen
== 0 && len
== 0) {
3636 if (subr
== DIF_SUBR_STRSTR
)
3637 regs
[rd
] = (uintptr_t)addr
;
3643 if (subr
!= DIF_SUBR_STRSTR
) {
3644 if (subr
== DIF_SUBR_RINDEX
) {
3651 * Both index() and rindex() take an optional position
3652 * argument that denotes the starting position.
3655 int64_t pos
= (int64_t)tupregs
[2].dttk_value
;
3658 * If the position argument to index() is
3659 * negative, Perl implicitly clamps it at
3660 * zero. This semantic is a little surprising
3661 * given the special meaning of negative
3662 * positions to similar Perl functions like
3663 * substr(), but it appears to reflect a
3664 * notion that index() can start from a
3665 * negative index and increment its way up to
3666 * the string. Given this notion, Perl's
3667 * rindex() is at least self-consistent in
3668 * that it implicitly clamps positions greater
3669 * than the string length to be the string
3670 * length. Where Perl completely loses
3671 * coherence, however, is when the specified
3672 * substring is the empty string (""). In
3673 * this case, even if the position is
3674 * negative, rindex() returns 0 -- and even if
3675 * the position is greater than the length,
3676 * index() returns the string length. These
3677 * semantics violate the notion that index()
3678 * should never return a value less than the
3679 * specified position and that rindex() should
3680 * never return a value greater than the
3681 * specified position. (One assumes that
3682 * these semantics are artifacts of Perl's
3683 * implementation and not the results of
3684 * deliberate design -- it beggars belief that
3685 * even Larry Wall could desire such oddness.)
3686 * While in the abstract one would wish for
3687 * consistent position semantics across
3688 * substr(), index() and rindex() -- or at the
3689 * very least self-consistent position
3690 * semantics for index() and rindex() -- we
3691 * instead opt to keep with the extant Perl
3692 * semantics, in all their broken glory. (Do
3693 * we have more desire to maintain Perl's
3694 * semantics than Perl does? Probably.)
3696 if (subr
== DIF_SUBR_RINDEX
) {
3720 for (regs
[rd
] = notfound
; addr
!= limit
; addr
+= inc
) {
3721 if (dtrace_strncmp(addr
, substr
, sublen
) == 0) {
3722 if (subr
!= DIF_SUBR_STRSTR
) {
3724 * As D index() and rindex() are
3725 * modeled on Perl (and not on awk),
3726 * we return a zero-based (and not a
3727 * one-based) index. (For you Perl
3728 * weenies: no, we're not going to add
3729 * $[ -- and shouldn't you be at a con
3732 regs
[rd
] = (uintptr_t)(addr
- orig
);
3736 ASSERT(subr
== DIF_SUBR_STRSTR
);
3737 regs
[rd
] = (uintptr_t)addr
;
3745 case DIF_SUBR_STRTOK
: {
3746 uintptr_t addr
= tupregs
[0].dttk_value
;
3747 uintptr_t tokaddr
= tupregs
[1].dttk_value
;
3748 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
3749 uintptr_t limit
, toklimit
= tokaddr
+ size
;
3750 uint8_t c
, tokmap
[32]; /* 256 / 8 */
3751 char *dest
= (char *)mstate
->dtms_scratch_ptr
;
3755 * Check both the token buffer and (later) the input buffer,
3756 * since both could be non-scratch addresses.
3758 if (!dtrace_strcanload(tokaddr
, size
, mstate
, vstate
)) {
3763 if (!DTRACE_INSCRATCH(mstate
, size
)) {
3764 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3771 * If the address specified is NULL, we use our saved
3772 * strtok pointer from the mstate. Note that this
3773 * means that the saved strtok pointer is _only_
3774 * valid within multiple enablings of the same probe --
3775 * it behaves like an implicit clause-local variable.
3777 addr
= mstate
->dtms_strtok
;
3780 * If the user-specified address is non-NULL we must
3781 * access check it. This is the only time we have
3782 * a chance to do so, since this address may reside
3783 * in the string table of this clause-- future calls
3784 * (when we fetch addr from mstate->dtms_strtok)
3785 * would fail this access check.
3787 if (!dtrace_strcanload(addr
, size
, mstate
, vstate
)) {
3794 * First, zero the token map, and then process the token
3795 * string -- setting a bit in the map for every character
3796 * found in the token string.
3798 for (i
= 0; i
< sizeof (tokmap
); i
++)
3801 for (; tokaddr
< toklimit
; tokaddr
++) {
3802 if ((c
= dtrace_load8(tokaddr
)) == '\0')
3805 ASSERT((c
>> 3) < sizeof (tokmap
));
3806 tokmap
[c
>> 3] |= (1 << (c
& 0x7));
3809 for (limit
= addr
+ size
; addr
< limit
; addr
++) {
3811 * We're looking for a character that is _not_ contained
3812 * in the token string.
3814 if ((c
= dtrace_load8(addr
)) == '\0')
3817 if (!(tokmap
[c
>> 3] & (1 << (c
& 0x7))))
3823 * We reached the end of the string without finding
3824 * any character that was not in the token string.
3825 * We return NULL in this case, and we set the saved
3826 * address to NULL as well.
3829 mstate
->dtms_strtok
= NULL
;
3834 * From here on, we're copying into the destination string.
3836 for (i
= 0; addr
< limit
&& i
< size
- 1; addr
++) {
3837 if ((c
= dtrace_load8(addr
)) == '\0')
3840 if (tokmap
[c
>> 3] & (1 << (c
& 0x7)))
3849 regs
[rd
] = (uintptr_t)dest
;
3850 mstate
->dtms_scratch_ptr
+= size
;
3851 mstate
->dtms_strtok
= addr
;
3855 case DIF_SUBR_SUBSTR
: {
3856 uintptr_t s
= tupregs
[0].dttk_value
;
3857 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
3858 char *d
= (char *)mstate
->dtms_scratch_ptr
;
3859 int64_t index
= (int64_t)tupregs
[1].dttk_value
;
3860 int64_t remaining
= (int64_t)tupregs
[2].dttk_value
;
3861 size_t len
= dtrace_strlen((char *)s
, size
);
3864 if (!dtrace_canload(s
, len
+ 1, mstate
, vstate
)) {
3869 if (!DTRACE_INSCRATCH(mstate
, size
)) {
3870 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3876 remaining
= (int64_t)size
;
3881 if (index
< 0 && index
+ remaining
> 0) {
3887 if (index
>= len
|| index
< 0) {
3889 } else if (remaining
< 0) {
3890 remaining
+= len
- index
;
3891 } else if (index
+ remaining
> size
) {
3892 remaining
= size
- index
;
3895 for (i
= 0; i
< remaining
; i
++) {
3896 if ((d
[i
] = dtrace_load8(s
+ index
+ i
)) == '\0')
3902 mstate
->dtms_scratch_ptr
+= size
;
3903 regs
[rd
] = (uintptr_t)d
;
3907 case DIF_SUBR_GETMAJOR
:
3909 regs
[rd
] = (tupregs
[0].dttk_value
>> NBITSMINOR64
) & MAXMAJ64
;
3911 regs
[rd
] = (tupregs
[0].dttk_value
>> NBITSMINOR
) & MAXMAJ
;
3915 case DIF_SUBR_GETMINOR
:
3917 regs
[rd
] = tupregs
[0].dttk_value
& MAXMIN64
;
3919 regs
[rd
] = tupregs
[0].dttk_value
& MAXMIN
;
3923 case DIF_SUBR_DDI_PATHNAME
: {
3925 * This one is a galactic mess. We are going to roughly
3926 * emulate ddi_pathname(), but it's made more complicated
3927 * by the fact that we (a) want to include the minor name and
3928 * (b) must proceed iteratively instead of recursively.
3930 uintptr_t dest
= mstate
->dtms_scratch_ptr
;
3931 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
3932 char *start
= (char *)dest
, *end
= start
+ size
- 1;
3933 uintptr_t daddr
= tupregs
[0].dttk_value
;
3934 int64_t minor
= (int64_t)tupregs
[1].dttk_value
;
3936 int i
, len
, depth
= 0;
3939 * Due to all the pointer jumping we do and context we must
3940 * rely upon, we just mandate that the user must have kernel
3941 * read privileges to use this routine.
3943 if ((mstate
->dtms_access
& DTRACE_ACCESS_KERNEL
) == 0) {
3944 *flags
|= CPU_DTRACE_KPRIV
;
3949 if (!DTRACE_INSCRATCH(mstate
, size
)) {
3950 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3958 * We want to have a name for the minor. In order to do this,
3959 * we need to walk the minor list from the devinfo. We want
3960 * to be sure that we don't infinitely walk a circular list,
3961 * so we check for circularity by sending a scout pointer
3962 * ahead two elements for every element that we iterate over;
3963 * if the list is circular, these will ultimately point to the
3964 * same element. You may recognize this little trick as the
3965 * answer to a stupid interview question -- one that always
3966 * seems to be asked by those who had to have it laboriously
3967 * explained to them, and who can't even concisely describe
3968 * the conditions under which one would be forced to resort to
3969 * this technique. Needless to say, those conditions are
3970 * found here -- and probably only here. Is this the only use
3971 * of this infamous trick in shipping, production code? If it
3972 * isn't, it probably should be...
3975 uintptr_t maddr
= dtrace_loadptr(daddr
+
3976 offsetof(struct dev_info
, devi_minor
));
3978 uintptr_t next
= offsetof(struct ddi_minor_data
, next
);
3979 uintptr_t name
= offsetof(struct ddi_minor_data
,
3980 d_minor
) + offsetof(struct ddi_minor
, name
);
3981 uintptr_t dev
= offsetof(struct ddi_minor_data
,
3982 d_minor
) + offsetof(struct ddi_minor
, dev
);
3986 scout
= dtrace_loadptr(maddr
+ next
);
3988 while (maddr
!= NULL
&& !(*flags
& CPU_DTRACE_FAULT
)) {
3991 m
= dtrace_load64(maddr
+ dev
) & MAXMIN64
;
3993 m
= dtrace_load32(maddr
+ dev
) & MAXMIN
;
3996 maddr
= dtrace_loadptr(maddr
+ next
);
4001 scout
= dtrace_loadptr(scout
+ next
);
4006 scout
= dtrace_loadptr(scout
+ next
);
4011 if (scout
== maddr
) {
4012 *flags
|= CPU_DTRACE_ILLOP
;
4020 * We have the minor data. Now we need to
4021 * copy the minor's name into the end of the
4024 s
= (char *)dtrace_loadptr(maddr
+ name
);
4025 len
= dtrace_strlen(s
, size
);
4027 if (*flags
& CPU_DTRACE_FAULT
)
4031 if ((end
-= (len
+ 1)) < start
)
4037 for (i
= 1; i
<= len
; i
++)
4038 end
[i
] = dtrace_load8((uintptr_t)s
++);
4043 while (daddr
!= NULL
&& !(*flags
& CPU_DTRACE_FAULT
)) {
4044 ddi_node_state_t devi_state
;
4046 devi_state
= dtrace_load32(daddr
+
4047 offsetof(struct dev_info
, devi_node_state
));
4049 if (*flags
& CPU_DTRACE_FAULT
)
4052 if (devi_state
>= DS_INITIALIZED
) {
4053 s
= (char *)dtrace_loadptr(daddr
+
4054 offsetof(struct dev_info
, devi_addr
));
4055 len
= dtrace_strlen(s
, size
);
4057 if (*flags
& CPU_DTRACE_FAULT
)
4061 if ((end
-= (len
+ 1)) < start
)
4067 for (i
= 1; i
<= len
; i
++)
4068 end
[i
] = dtrace_load8((uintptr_t)s
++);
4072 * Now for the node name...
4074 s
= (char *)dtrace_loadptr(daddr
+
4075 offsetof(struct dev_info
, devi_node_name
));
4077 daddr
= dtrace_loadptr(daddr
+
4078 offsetof(struct dev_info
, devi_parent
));
4081 * If our parent is NULL (that is, if we're the root
4082 * node), we're going to use the special path
4088 len
= dtrace_strlen(s
, size
);
4089 if (*flags
& CPU_DTRACE_FAULT
)
4092 if ((end
-= (len
+ 1)) < start
)
4095 for (i
= 1; i
<= len
; i
++)
4096 end
[i
] = dtrace_load8((uintptr_t)s
++);
4099 if (depth
++ > dtrace_devdepth_max
) {
4100 *flags
|= CPU_DTRACE_ILLOP
;
4106 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
4108 if (daddr
== NULL
) {
4109 regs
[rd
] = (uintptr_t)end
;
4110 mstate
->dtms_scratch_ptr
+= size
;
4116 case DIF_SUBR_STRJOIN
: {
4117 char *d
= (char *)mstate
->dtms_scratch_ptr
;
4118 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
4119 uintptr_t s1
= tupregs
[0].dttk_value
;
4120 uintptr_t s2
= tupregs
[1].dttk_value
;
4123 if (!dtrace_strcanload(s1
, size
, mstate
, vstate
) ||
4124 !dtrace_strcanload(s2
, size
, mstate
, vstate
)) {
4129 if (!DTRACE_INSCRATCH(mstate
, size
)) {
4130 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
4137 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
4142 if ((d
[i
++] = dtrace_load8(s1
++)) == '\0') {
4150 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
4155 if ((d
[i
++] = dtrace_load8(s2
++)) == '\0')
4160 mstate
->dtms_scratch_ptr
+= i
;
4161 regs
[rd
] = (uintptr_t)d
;
4167 case DIF_SUBR_LLTOSTR
: {
4168 int64_t i
= (int64_t)tupregs
[0].dttk_value
;
4169 int64_t val
= i
< 0 ? i
* -1 : i
;
4170 uint64_t size
= 22; /* enough room for 2^64 in decimal */
4171 char *end
= (char *)mstate
->dtms_scratch_ptr
+ size
- 1;
4173 if (!DTRACE_INSCRATCH(mstate
, size
)) {
4174 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
4179 for (*end
-- = '\0'; val
; val
/= 10)
4180 *end
-- = '0' + (val
% 10);
4188 regs
[rd
] = (uintptr_t)end
+ 1;
4189 mstate
->dtms_scratch_ptr
+= size
;
4193 case DIF_SUBR_HTONS
:
4194 case DIF_SUBR_NTOHS
:
4196 regs
[rd
] = (uint16_t)tupregs
[0].dttk_value
;
4198 regs
[rd
] = DT_BSWAP_16((uint16_t)tupregs
[0].dttk_value
);
4203 case DIF_SUBR_HTONL
:
4204 case DIF_SUBR_NTOHL
:
4206 regs
[rd
] = (uint32_t)tupregs
[0].dttk_value
;
4208 regs
[rd
] = DT_BSWAP_32((uint32_t)tupregs
[0].dttk_value
);
4213 case DIF_SUBR_HTONLL
:
4214 case DIF_SUBR_NTOHLL
:
4216 regs
[rd
] = (uint64_t)tupregs
[0].dttk_value
;
4218 regs
[rd
] = DT_BSWAP_64((uint64_t)tupregs
[0].dttk_value
);
4223 case DIF_SUBR_DIRNAME
:
4224 case DIF_SUBR_BASENAME
: {
4225 char *dest
= (char *)mstate
->dtms_scratch_ptr
;
4226 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
4227 uintptr_t src
= tupregs
[0].dttk_value
;
4228 int i
, j
, len
= dtrace_strlen((char *)src
, size
);
4229 int lastbase
= -1, firstbase
= -1, lastdir
= -1;
4232 if (!dtrace_canload(src
, len
+ 1, mstate
, vstate
)) {
4237 if (!DTRACE_INSCRATCH(mstate
, size
)) {
4238 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
4244 * The basename and dirname for a zero-length string is
4249 src
= (uintptr_t)".";
4253 * Start from the back of the string, moving back toward the
4254 * front until we see a character that isn't a slash. That
4255 * character is the last character in the basename.
4257 for (i
= len
- 1; i
>= 0; i
--) {
4258 if (dtrace_load8(src
+ i
) != '/')
4266 * Starting from the last character in the basename, move
4267 * towards the front until we find a slash. The character
4268 * that we processed immediately before that is the first
4269 * character in the basename.
4271 for (; i
>= 0; i
--) {
4272 if (dtrace_load8(src
+ i
) == '/')
4280 * Now keep going until we find a non-slash character. That
4281 * character is the last character in the dirname.
4283 for (; i
>= 0; i
--) {
4284 if (dtrace_load8(src
+ i
) != '/')
4291 ASSERT(!(lastbase
== -1 && firstbase
!= -1));
4292 ASSERT(!(firstbase
== -1 && lastdir
!= -1));
4294 if (lastbase
== -1) {
4296 * We didn't find a non-slash character. We know that
4297 * the length is non-zero, so the whole string must be
4298 * slashes. In either the dirname or the basename
4299 * case, we return '/'.
4301 ASSERT(firstbase
== -1);
4302 firstbase
= lastbase
= lastdir
= 0;
4305 if (firstbase
== -1) {
4307 * The entire string consists only of a basename
4308 * component. If we're looking for dirname, we need
4309 * to change our string to be just "."; if we're
4310 * looking for a basename, we'll just set the first
4311 * character of the basename to be 0.
4313 if (subr
== DIF_SUBR_DIRNAME
) {
4314 ASSERT(lastdir
== -1);
4315 src
= (uintptr_t)".";
4322 if (subr
== DIF_SUBR_DIRNAME
) {
4323 if (lastdir
== -1) {
4325 * We know that we have a slash in the name --
4326 * or lastdir would be set to 0, above. And
4327 * because lastdir is -1, we know that this
4328 * slash must be the first character. (That
4329 * is, the full string must be of the form
4330 * "/basename".) In this case, the last
4331 * character of the directory name is 0.
4339 ASSERT(subr
== DIF_SUBR_BASENAME
);
4340 ASSERT(firstbase
!= -1 && lastbase
!= -1);
4345 for (i
= start
, j
= 0; i
<= end
&& j
< size
- 1; i
++, j
++)
4346 dest
[j
] = dtrace_load8(src
+ i
);
4349 regs
[rd
] = (uintptr_t)dest
;
4350 mstate
->dtms_scratch_ptr
+= size
;
4354 case DIF_SUBR_CLEANPATH
: {
4355 char *dest
= (char *)mstate
->dtms_scratch_ptr
, c
;
4356 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
4357 uintptr_t src
= tupregs
[0].dttk_value
;
4360 if (!dtrace_strcanload(src
, size
, mstate
, vstate
)) {
4365 if (!DTRACE_INSCRATCH(mstate
, size
)) {
4366 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
4372 * Move forward, loading each character.
4375 c
= dtrace_load8(src
+ i
++);
4377 if (j
+ 5 >= size
) /* 5 = strlen("/..c\0") */
4385 c
= dtrace_load8(src
+ i
++);
4389 * We have two slashes -- we can just advance
4390 * to the next character.
4397 * This is not "." and it's not ".." -- we can
4398 * just store the "/" and this character and
4406 c
= dtrace_load8(src
+ i
++);
4410 * This is a "/./" component. We're not going
4411 * to store anything in the destination buffer;
4412 * we're just going to go to the next component.
4419 * This is not ".." -- we can just store the
4420 * "/." and this character and continue
4429 c
= dtrace_load8(src
+ i
++);
4431 if (c
!= '/' && c
!= '\0') {
4433 * This is not ".." -- it's "..[mumble]".
4434 * We'll store the "/.." and this character
4435 * and continue processing.
4445 * This is "/../" or "/..\0". We need to back up
4446 * our destination pointer until we find a "/".
4449 while (j
!= 0 && dest
[--j
] != '/')
4454 } while (c
!= '\0');
4457 regs
[rd
] = (uintptr_t)dest
;
4458 mstate
->dtms_scratch_ptr
+= size
;
4462 case DIF_SUBR_INET_NTOA
:
4463 case DIF_SUBR_INET_NTOA6
:
4464 case DIF_SUBR_INET_NTOP
: {
4469 if (subr
== DIF_SUBR_INET_NTOP
) {
4470 af
= (int)tupregs
[0].dttk_value
;
4473 af
= subr
== DIF_SUBR_INET_NTOA
? AF_INET
: AF_INET6
;
4477 if (af
== AF_INET
) {
4482 * Safely load the IPv4 address.
4484 ip4
= dtrace_load32(tupregs
[argi
].dttk_value
);
4487 * Check an IPv4 string will fit in scratch.
4489 size
= INET_ADDRSTRLEN
;
4490 if (!DTRACE_INSCRATCH(mstate
, size
)) {
4491 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
4495 base
= (char *)mstate
->dtms_scratch_ptr
;
4496 end
= (char *)mstate
->dtms_scratch_ptr
+ size
- 1;
4499 * Stringify as a dotted decimal quad.
4502 ptr8
= (uint8_t *)&ip4
;
4503 for (i
= 3; i
>= 0; i
--) {
4509 for (; val
; val
/= 10) {
4510 *end
-- = '0' + (val
% 10);
4517 ASSERT(end
+ 1 >= base
);
4519 } else if (af
== AF_INET6
) {
4520 struct in6_addr ip6
;
4521 int firstzero
, tryzero
, numzero
, v6end
;
4523 const char digits
[] = "0123456789abcdef";
4526 * Stringify using RFC 1884 convention 2 - 16 bit
4527 * hexadecimal values with a zero-run compression.
4528 * Lower case hexadecimal digits are used.
4529 * eg, fe80::214:4fff:fe0b:76c8.
4530 * The IPv4 embedded form is returned for inet_ntop,
4531 * just the IPv4 string is returned for inet_ntoa6.
4535 * Safely load the IPv6 address.
4538 (void *)(uintptr_t)tupregs
[argi
].dttk_value
,
4539 (void *)(uintptr_t)&ip6
, sizeof (struct in6_addr
));
4542 * Check an IPv6 string will fit in scratch.
4544 size
= INET6_ADDRSTRLEN
;
4545 if (!DTRACE_INSCRATCH(mstate
, size
)) {
4546 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
4550 base
= (char *)mstate
->dtms_scratch_ptr
;
4551 end
= (char *)mstate
->dtms_scratch_ptr
+ size
- 1;
4555 * Find the longest run of 16 bit zero values
4556 * for the single allowed zero compression - "::".
4561 for (i
= 0; i
< sizeof (struct in6_addr
); i
++) {
4562 if (ip6
._S6_un
._S6_u8
[i
] == 0 &&
4563 tryzero
== -1 && i
% 2 == 0) {
4568 if (tryzero
!= -1 &&
4569 (ip6
._S6_un
._S6_u8
[i
] != 0 ||
4570 i
== sizeof (struct in6_addr
) - 1)) {
4572 if (i
- tryzero
<= numzero
) {
4577 firstzero
= tryzero
;
4578 numzero
= i
- i
% 2 - tryzero
;
4581 if (ip6
._S6_un
._S6_u8
[i
] == 0 &&
4582 i
== sizeof (struct in6_addr
) - 1)
4586 ASSERT(firstzero
+ numzero
<= sizeof (struct in6_addr
));
4589 * Check for an IPv4 embedded address.
4591 v6end
= sizeof (struct in6_addr
) - 2;
4592 if (IN6_IS_ADDR_V4MAPPED(&ip6
) ||
4593 IN6_IS_ADDR_V4COMPAT(&ip6
)) {
4594 for (i
= sizeof (struct in6_addr
) - 1;
4595 i
>= DTRACE_V4MAPPED_OFFSET
; i
--) {
4596 ASSERT(end
>= base
);
4598 val
= ip6
._S6_un
._S6_u8
[i
];
4603 for (; val
; val
/= 10) {
4604 *end
-- = '0' + val
% 10;
4608 if (i
> DTRACE_V4MAPPED_OFFSET
)
4612 if (subr
== DIF_SUBR_INET_NTOA6
)
4616 * Set v6end to skip the IPv4 address that
4617 * we have already stringified.
4623 * Build the IPv6 string by working through the
4624 * address in reverse.
4626 for (i
= v6end
; i
>= 0; i
-= 2) {
4627 ASSERT(end
>= base
);
4629 if (i
== firstzero
+ numzero
- 2) {
4636 if (i
< 14 && i
!= firstzero
- 2)
4639 val
= (ip6
._S6_un
._S6_u8
[i
] << 8) +
4640 ip6
._S6_un
._S6_u8
[i
+ 1];
4645 for (; val
; val
/= 16) {
4646 *end
-- = digits
[val
% 16];
4650 ASSERT(end
+ 1 >= base
);
4654 * The user didn't use AH_INET or AH_INET6.
4656 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
);
4661 inetout
: regs
[rd
] = (uintptr_t)end
+ 1;
4662 mstate
->dtms_scratch_ptr
+= size
;
4670 * Emulate the execution of DTrace IR instructions specified by the given
4671 * DIF object. This function is deliberately void of assertions as all of
4672 * the necessary checks are handled by a call to dtrace_difo_validate().
4675 dtrace_dif_emulate(dtrace_difo_t
*difo
, dtrace_mstate_t
*mstate
,
4676 dtrace_vstate_t
*vstate
, dtrace_state_t
*state
)
4678 const dif_instr_t
*text
= difo
->dtdo_buf
;
4679 const uint_t textlen
= difo
->dtdo_len
;
4680 const char *strtab
= difo
->dtdo_strtab
;
4681 const uint64_t *inttab
= difo
->dtdo_inttab
;
4684 dtrace_statvar_t
*svar
;
4685 dtrace_dstate_t
*dstate
= &vstate
->dtvs_dynvars
;
4687 volatile uint16_t *flags
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
4688 volatile uintptr_t *illval
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
;
4690 dtrace_key_t tupregs
[DIF_DTR_NREGS
+ 2]; /* +2 for thread and id */
4691 uint64_t regs
[DIF_DIR_NREGS
];
4694 uint8_t cc_n
= 0, cc_z
= 0, cc_v
= 0, cc_c
= 0;
4696 uint_t pc
= 0, id
, opc
;
4702 * We stash the current DIF object into the machine state: we need it
4703 * for subsequent access checking.
4705 mstate
->dtms_difo
= difo
;
4707 regs
[DIF_REG_R0
] = 0; /* %r0 is fixed at zero */
4709 while (pc
< textlen
&& !(*flags
& CPU_DTRACE_FAULT
)) {
4713 r1
= DIF_INSTR_R1(instr
);
4714 r2
= DIF_INSTR_R2(instr
);
4715 rd
= DIF_INSTR_RD(instr
);
4717 switch (DIF_INSTR_OP(instr
)) {
4719 regs
[rd
] = regs
[r1
] | regs
[r2
];
4722 regs
[rd
] = regs
[r1
] ^ regs
[r2
];
4725 regs
[rd
] = regs
[r1
] & regs
[r2
];
4728 regs
[rd
] = regs
[r1
] << regs
[r2
];
4731 regs
[rd
] = regs
[r1
] >> regs
[r2
];
4734 regs
[rd
] = regs
[r1
] - regs
[r2
];
4737 regs
[rd
] = regs
[r1
] + regs
[r2
];
4740 regs
[rd
] = regs
[r1
] * regs
[r2
];
4743 if (regs
[r2
] == 0) {
4745 *flags
|= CPU_DTRACE_DIVZERO
;
4747 regs
[rd
] = (int64_t)regs
[r1
] /
4753 if (regs
[r2
] == 0) {
4755 *flags
|= CPU_DTRACE_DIVZERO
;
4757 regs
[rd
] = regs
[r1
] / regs
[r2
];
4762 if (regs
[r2
] == 0) {
4764 *flags
|= CPU_DTRACE_DIVZERO
;
4766 regs
[rd
] = (int64_t)regs
[r1
] %
4772 if (regs
[r2
] == 0) {
4774 *flags
|= CPU_DTRACE_DIVZERO
;
4776 regs
[rd
] = regs
[r1
] % regs
[r2
];
4781 regs
[rd
] = ~regs
[r1
];
4784 regs
[rd
] = regs
[r1
];
4787 cc_r
= regs
[r1
] - regs
[r2
];
4791 cc_c
= regs
[r1
] < regs
[r2
];
4794 cc_n
= cc_v
= cc_c
= 0;
4795 cc_z
= regs
[r1
] == 0;
4798 pc
= DIF_INSTR_LABEL(instr
);
4802 pc
= DIF_INSTR_LABEL(instr
);
4806 pc
= DIF_INSTR_LABEL(instr
);
4809 if ((cc_z
| (cc_n
^ cc_v
)) == 0)
4810 pc
= DIF_INSTR_LABEL(instr
);
4813 if ((cc_c
| cc_z
) == 0)
4814 pc
= DIF_INSTR_LABEL(instr
);
4817 if ((cc_n
^ cc_v
) == 0)
4818 pc
= DIF_INSTR_LABEL(instr
);
4822 pc
= DIF_INSTR_LABEL(instr
);
4826 pc
= DIF_INSTR_LABEL(instr
);
4830 pc
= DIF_INSTR_LABEL(instr
);
4833 if (cc_z
| (cc_n
^ cc_v
))
4834 pc
= DIF_INSTR_LABEL(instr
);
4838 pc
= DIF_INSTR_LABEL(instr
);
4841 if (!dtrace_canstore(regs
[r1
], 1, mstate
, vstate
)) {
4842 *flags
|= CPU_DTRACE_KPRIV
;
4848 regs
[rd
] = (int8_t)dtrace_load8(regs
[r1
]);
4851 if (!dtrace_canstore(regs
[r1
], 2, mstate
, vstate
)) {
4852 *flags
|= CPU_DTRACE_KPRIV
;
4858 regs
[rd
] = (int16_t)dtrace_load16(regs
[r1
]);
4861 if (!dtrace_canstore(regs
[r1
], 4, mstate
, vstate
)) {
4862 *flags
|= CPU_DTRACE_KPRIV
;
4868 regs
[rd
] = (int32_t)dtrace_load32(regs
[r1
]);
4871 if (!dtrace_canstore(regs
[r1
], 1, mstate
, vstate
)) {
4872 *flags
|= CPU_DTRACE_KPRIV
;
4878 regs
[rd
] = dtrace_load8(regs
[r1
]);
4881 if (!dtrace_canstore(regs
[r1
], 2, mstate
, vstate
)) {
4882 *flags
|= CPU_DTRACE_KPRIV
;
4888 regs
[rd
] = dtrace_load16(regs
[r1
]);
4891 if (!dtrace_canstore(regs
[r1
], 4, mstate
, vstate
)) {
4892 *flags
|= CPU_DTRACE_KPRIV
;
4898 regs
[rd
] = dtrace_load32(regs
[r1
]);
4901 if (!dtrace_canstore(regs
[r1
], 8, mstate
, vstate
)) {
4902 *flags
|= CPU_DTRACE_KPRIV
;
4908 regs
[rd
] = dtrace_load64(regs
[r1
]);
4912 dtrace_fuword8((void *)(uintptr_t)regs
[r1
]);
4915 regs
[rd
] = (int16_t)
4916 dtrace_fuword16((void *)(uintptr_t)regs
[r1
]);
4919 regs
[rd
] = (int32_t)
4920 dtrace_fuword32((void *)(uintptr_t)regs
[r1
]);
4924 dtrace_fuword8((void *)(uintptr_t)regs
[r1
]);
4928 dtrace_fuword16((void *)(uintptr_t)regs
[r1
]);
4932 dtrace_fuword32((void *)(uintptr_t)regs
[r1
]);
4936 dtrace_fuword64((void *)(uintptr_t)regs
[r1
]);
4945 regs
[rd
] = inttab
[DIF_INSTR_INTEGER(instr
)];
4948 regs
[rd
] = (uint64_t)(uintptr_t)
4949 (strtab
+ DIF_INSTR_STRING(instr
));
4952 size_t sz
= state
->dts_options
[DTRACEOPT_STRSIZE
];
4953 uintptr_t s1
= regs
[r1
];
4954 uintptr_t s2
= regs
[r2
];
4957 !dtrace_strcanload(s1
, sz
, mstate
, vstate
))
4960 !dtrace_strcanload(s2
, sz
, mstate
, vstate
))
4963 cc_r
= dtrace_strncmp((char *)s1
, (char *)s2
, sz
);
4971 regs
[rd
] = dtrace_dif_variable(mstate
, state
,
4975 id
= DIF_INSTR_VAR(instr
);
4977 if (id
>= DIF_VAR_OTHER_UBASE
) {
4980 id
-= DIF_VAR_OTHER_UBASE
;
4981 svar
= vstate
->dtvs_globals
[id
];
4982 ASSERT(svar
!= NULL
);
4983 v
= &svar
->dtsv_var
;
4985 if (!(v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
)) {
4986 regs
[rd
] = svar
->dtsv_data
;
4990 a
= (uintptr_t)svar
->dtsv_data
;
4992 if (*(uint8_t *)a
== UINT8_MAX
) {
4994 * If the 0th byte is set to UINT8_MAX
4995 * then this is to be treated as a
4996 * reference to a NULL variable.
5000 regs
[rd
] = a
+ sizeof (uint64_t);
5006 regs
[rd
] = dtrace_dif_variable(mstate
, state
, id
, 0);
5010 id
= DIF_INSTR_VAR(instr
);
5012 ASSERT(id
>= DIF_VAR_OTHER_UBASE
);
5013 id
-= DIF_VAR_OTHER_UBASE
;
5015 svar
= vstate
->dtvs_globals
[id
];
5016 ASSERT(svar
!= NULL
);
5017 v
= &svar
->dtsv_var
;
5019 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
) {
5020 uintptr_t a
= (uintptr_t)svar
->dtsv_data
;
5023 ASSERT(svar
->dtsv_size
!= 0);
5025 if (regs
[rd
] == NULL
) {
5026 *(uint8_t *)a
= UINT8_MAX
;
5030 a
+= sizeof (uint64_t);
5032 if (!dtrace_vcanload(
5033 (void *)(uintptr_t)regs
[rd
], &v
->dtdv_type
,
5037 dtrace_vcopy((void *)(uintptr_t)regs
[rd
],
5038 (void *)a
, &v
->dtdv_type
);
5042 svar
->dtsv_data
= regs
[rd
];
5047 * There are no DTrace built-in thread-local arrays at
5048 * present. This opcode is saved for future work.
5050 *flags
|= CPU_DTRACE_ILLOP
;
5055 id
= DIF_INSTR_VAR(instr
);
5057 if (id
< DIF_VAR_OTHER_UBASE
) {
5059 * For now, this has no meaning.
5065 id
-= DIF_VAR_OTHER_UBASE
;
5067 ASSERT(id
< vstate
->dtvs_nlocals
);
5068 ASSERT(vstate
->dtvs_locals
!= NULL
);
5070 svar
= vstate
->dtvs_locals
[id
];
5071 ASSERT(svar
!= NULL
);
5072 v
= &svar
->dtsv_var
;
5074 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
) {
5075 uintptr_t a
= (uintptr_t)svar
->dtsv_data
;
5076 size_t sz
= v
->dtdv_type
.dtdt_size
;
5078 sz
+= sizeof (uint64_t);
5079 ASSERT(svar
->dtsv_size
== NCPU
* sz
);
5080 a
+= CPU
->cpu_id
* sz
;
5082 if (*(uint8_t *)a
== UINT8_MAX
) {
5084 * If the 0th byte is set to UINT8_MAX
5085 * then this is to be treated as a
5086 * reference to a NULL variable.
5090 regs
[rd
] = a
+ sizeof (uint64_t);
5096 ASSERT(svar
->dtsv_size
== NCPU
* sizeof (uint64_t));
5097 tmp
= (uint64_t *)(uintptr_t)svar
->dtsv_data
;
5098 regs
[rd
] = tmp
[CPU
->cpu_id
];
5102 id
= DIF_INSTR_VAR(instr
);
5104 ASSERT(id
>= DIF_VAR_OTHER_UBASE
);
5105 id
-= DIF_VAR_OTHER_UBASE
;
5106 ASSERT(id
< vstate
->dtvs_nlocals
);
5108 ASSERT(vstate
->dtvs_locals
!= NULL
);
5109 svar
= vstate
->dtvs_locals
[id
];
5110 ASSERT(svar
!= NULL
);
5111 v
= &svar
->dtsv_var
;
5113 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
) {
5114 uintptr_t a
= (uintptr_t)svar
->dtsv_data
;
5115 size_t sz
= v
->dtdv_type
.dtdt_size
;
5117 sz
+= sizeof (uint64_t);
5118 ASSERT(svar
->dtsv_size
== NCPU
* sz
);
5119 a
+= CPU
->cpu_id
* sz
;
5121 if (regs
[rd
] == NULL
) {
5122 *(uint8_t *)a
= UINT8_MAX
;
5126 a
+= sizeof (uint64_t);
5129 if (!dtrace_vcanload(
5130 (void *)(uintptr_t)regs
[rd
], &v
->dtdv_type
,
5134 dtrace_vcopy((void *)(uintptr_t)regs
[rd
],
5135 (void *)a
, &v
->dtdv_type
);
5139 ASSERT(svar
->dtsv_size
== NCPU
* sizeof (uint64_t));
5140 tmp
= (uint64_t *)(uintptr_t)svar
->dtsv_data
;
5141 tmp
[CPU
->cpu_id
] = regs
[rd
];
5145 dtrace_dynvar_t
*dvar
;
5148 id
= DIF_INSTR_VAR(instr
);
5149 ASSERT(id
>= DIF_VAR_OTHER_UBASE
);
5150 id
-= DIF_VAR_OTHER_UBASE
;
5151 v
= &vstate
->dtvs_tlocals
[id
];
5153 key
= &tupregs
[DIF_DTR_NREGS
];
5154 key
[0].dttk_value
= (uint64_t)id
;
5155 key
[0].dttk_size
= 0;
5156 DTRACE_TLS_THRKEY(key
[1].dttk_value
);
5157 key
[1].dttk_size
= 0;
5159 dvar
= dtrace_dynvar(dstate
, 2, key
,
5160 sizeof (uint64_t), DTRACE_DYNVAR_NOALLOC
,
5168 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
) {
5169 regs
[rd
] = (uint64_t)(uintptr_t)dvar
->dtdv_data
;
5171 regs
[rd
] = *((uint64_t *)dvar
->dtdv_data
);
5178 dtrace_dynvar_t
*dvar
;
5181 id
= DIF_INSTR_VAR(instr
);
5182 ASSERT(id
>= DIF_VAR_OTHER_UBASE
);
5183 id
-= DIF_VAR_OTHER_UBASE
;
5185 key
= &tupregs
[DIF_DTR_NREGS
];
5186 key
[0].dttk_value
= (uint64_t)id
;
5187 key
[0].dttk_size
= 0;
5188 DTRACE_TLS_THRKEY(key
[1].dttk_value
);
5189 key
[1].dttk_size
= 0;
5190 v
= &vstate
->dtvs_tlocals
[id
];
5192 dvar
= dtrace_dynvar(dstate
, 2, key
,
5193 v
->dtdv_type
.dtdt_size
> sizeof (uint64_t) ?
5194 v
->dtdv_type
.dtdt_size
: sizeof (uint64_t),
5195 regs
[rd
] ? DTRACE_DYNVAR_ALLOC
:
5196 DTRACE_DYNVAR_DEALLOC
, mstate
, vstate
);
5199 * Given that we're storing to thread-local data,
5200 * we need to flush our predicate cache.
5202 curthread
->t_predcache
= NULL
;
5207 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
) {
5208 if (!dtrace_vcanload(
5209 (void *)(uintptr_t)regs
[rd
],
5210 &v
->dtdv_type
, mstate
, vstate
))
5213 dtrace_vcopy((void *)(uintptr_t)regs
[rd
],
5214 dvar
->dtdv_data
, &v
->dtdv_type
);
5216 *((uint64_t *)dvar
->dtdv_data
) = regs
[rd
];
5223 regs
[rd
] = (int64_t)regs
[r1
] >> regs
[r2
];
5227 dtrace_dif_subr(DIF_INSTR_SUBR(instr
), rd
,
5228 regs
, tupregs
, ttop
, mstate
, state
);
5232 if (ttop
== DIF_DTR_NREGS
) {
5233 *flags
|= CPU_DTRACE_TUPOFLOW
;
5237 if (r1
== DIF_TYPE_STRING
) {
5239 * If this is a string type and the size is 0,
5240 * we'll use the system-wide default string
5241 * size. Note that we are _not_ looking at
5242 * the value of the DTRACEOPT_STRSIZE option;
5243 * had this been set, we would expect to have
5244 * a non-zero size value in the "pushtr".
5246 tupregs
[ttop
].dttk_size
=
5247 dtrace_strlen((char *)(uintptr_t)regs
[rd
],
5248 regs
[r2
] ? regs
[r2
] :
5249 dtrace_strsize_default
) + 1;
5251 tupregs
[ttop
].dttk_size
= regs
[r2
];
5254 tupregs
[ttop
++].dttk_value
= regs
[rd
];
5258 if (ttop
== DIF_DTR_NREGS
) {
5259 *flags
|= CPU_DTRACE_TUPOFLOW
;
5263 tupregs
[ttop
].dttk_value
= regs
[rd
];
5264 tupregs
[ttop
++].dttk_size
= 0;
5272 case DIF_OP_FLUSHTS
:
5277 case DIF_OP_LDTAA
: {
5278 dtrace_dynvar_t
*dvar
;
5279 dtrace_key_t
*key
= tupregs
;
5280 uint_t nkeys
= ttop
;
5282 id
= DIF_INSTR_VAR(instr
);
5283 ASSERT(id
>= DIF_VAR_OTHER_UBASE
);
5284 id
-= DIF_VAR_OTHER_UBASE
;
5286 key
[nkeys
].dttk_value
= (uint64_t)id
;
5287 key
[nkeys
++].dttk_size
= 0;
5289 if (DIF_INSTR_OP(instr
) == DIF_OP_LDTAA
) {
5290 DTRACE_TLS_THRKEY(key
[nkeys
].dttk_value
);
5291 key
[nkeys
++].dttk_size
= 0;
5292 v
= &vstate
->dtvs_tlocals
[id
];
5294 v
= &vstate
->dtvs_globals
[id
]->dtsv_var
;
5297 dvar
= dtrace_dynvar(dstate
, nkeys
, key
,
5298 v
->dtdv_type
.dtdt_size
> sizeof (uint64_t) ?
5299 v
->dtdv_type
.dtdt_size
: sizeof (uint64_t),
5300 DTRACE_DYNVAR_NOALLOC
, mstate
, vstate
);
5307 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
) {
5308 regs
[rd
] = (uint64_t)(uintptr_t)dvar
->dtdv_data
;
5310 regs
[rd
] = *((uint64_t *)dvar
->dtdv_data
);
5317 case DIF_OP_STTAA
: {
5318 dtrace_dynvar_t
*dvar
;
5319 dtrace_key_t
*key
= tupregs
;
5320 uint_t nkeys
= ttop
;
5322 id
= DIF_INSTR_VAR(instr
);
5323 ASSERT(id
>= DIF_VAR_OTHER_UBASE
);
5324 id
-= DIF_VAR_OTHER_UBASE
;
5326 key
[nkeys
].dttk_value
= (uint64_t)id
;
5327 key
[nkeys
++].dttk_size
= 0;
5329 if (DIF_INSTR_OP(instr
) == DIF_OP_STTAA
) {
5330 DTRACE_TLS_THRKEY(key
[nkeys
].dttk_value
);
5331 key
[nkeys
++].dttk_size
= 0;
5332 v
= &vstate
->dtvs_tlocals
[id
];
5334 v
= &vstate
->dtvs_globals
[id
]->dtsv_var
;
5337 dvar
= dtrace_dynvar(dstate
, nkeys
, key
,
5338 v
->dtdv_type
.dtdt_size
> sizeof (uint64_t) ?
5339 v
->dtdv_type
.dtdt_size
: sizeof (uint64_t),
5340 regs
[rd
] ? DTRACE_DYNVAR_ALLOC
:
5341 DTRACE_DYNVAR_DEALLOC
, mstate
, vstate
);
5346 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
) {
5347 if (!dtrace_vcanload(
5348 (void *)(uintptr_t)regs
[rd
], &v
->dtdv_type
,
5352 dtrace_vcopy((void *)(uintptr_t)regs
[rd
],
5353 dvar
->dtdv_data
, &v
->dtdv_type
);
5355 *((uint64_t *)dvar
->dtdv_data
) = regs
[rd
];
5361 case DIF_OP_ALLOCS
: {
5362 uintptr_t ptr
= P2ROUNDUP(mstate
->dtms_scratch_ptr
, 8);
5363 size_t size
= ptr
- mstate
->dtms_scratch_ptr
+ regs
[r1
];
5366 * Rounding up the user allocation size could have
5367 * overflowed large, bogus allocations (like -1ULL) to
5370 if (size
< regs
[r1
] ||
5371 !DTRACE_INSCRATCH(mstate
, size
)) {
5372 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
5377 dtrace_bzero((void *) mstate
->dtms_scratch_ptr
, size
);
5378 mstate
->dtms_scratch_ptr
+= size
;
5384 if (!dtrace_canstore(regs
[rd
], regs
[r2
],
5386 *flags
|= CPU_DTRACE_BADADDR
;
5391 if (!dtrace_canload(regs
[r1
], regs
[r2
], mstate
, vstate
))
5394 dtrace_bcopy((void *)(uintptr_t)regs
[r1
],
5395 (void *)(uintptr_t)regs
[rd
], (size_t)regs
[r2
]);
5399 if (!dtrace_canstore(regs
[rd
], 1, mstate
, vstate
)) {
5400 *flags
|= CPU_DTRACE_BADADDR
;
5404 *((uint8_t *)(uintptr_t)regs
[rd
]) = (uint8_t)regs
[r1
];
5408 if (!dtrace_canstore(regs
[rd
], 2, mstate
, vstate
)) {
5409 *flags
|= CPU_DTRACE_BADADDR
;
5414 *flags
|= CPU_DTRACE_BADALIGN
;
5418 *((uint16_t *)(uintptr_t)regs
[rd
]) = (uint16_t)regs
[r1
];
5422 if (!dtrace_canstore(regs
[rd
], 4, mstate
, vstate
)) {
5423 *flags
|= CPU_DTRACE_BADADDR
;
5428 *flags
|= CPU_DTRACE_BADALIGN
;
5432 *((uint32_t *)(uintptr_t)regs
[rd
]) = (uint32_t)regs
[r1
];
5436 if (!dtrace_canstore(regs
[rd
], 8, mstate
, vstate
)) {
5437 *flags
|= CPU_DTRACE_BADADDR
;
5442 *flags
|= CPU_DTRACE_BADALIGN
;
5446 *((uint64_t *)(uintptr_t)regs
[rd
]) = regs
[r1
];
5451 if (!(*flags
& CPU_DTRACE_FAULT
))
5454 mstate
->dtms_fltoffs
= opc
* sizeof (dif_instr_t
);
5455 mstate
->dtms_present
|= DTRACE_MSTATE_FLTOFFS
;
5461 dtrace_action_breakpoint(dtrace_ecb_t
*ecb
)
5463 dtrace_probe_t
*probe
= ecb
->dte_probe
;
5464 dtrace_provider_t
*prov
= probe
->dtpr_provider
;
5465 char c
[DTRACE_FULLNAMELEN
+ 80], *str
;
5466 char *msg
= "dtrace: breakpoint action at probe ";
5467 char *ecbmsg
= " (ecb ";
5468 uintptr_t mask
= (0xf << (sizeof (uintptr_t) * NBBY
/ 4));
5469 uintptr_t val
= (uintptr_t)ecb
;
5470 int shift
= (sizeof (uintptr_t) * NBBY
) - 4, i
= 0;
5472 if (dtrace_destructive_disallow
)
5476 * It's impossible to be taking action on the NULL probe.
5478 ASSERT(probe
!= NULL
);
5481 * This is a poor man's (destitute man's?) sprintf(): we want to
5482 * print the provider name, module name, function name and name of
5483 * the probe, along with the hex address of the ECB with the breakpoint
5484 * action -- all of which we must place in the character buffer by
5487 while (*msg
!= '\0')
5490 for (str
= prov
->dtpv_name
; *str
!= '\0'; str
++)
5494 for (str
= probe
->dtpr_mod
; *str
!= '\0'; str
++)
5498 for (str
= probe
->dtpr_func
; *str
!= '\0'; str
++)
5502 for (str
= probe
->dtpr_name
; *str
!= '\0'; str
++)
5505 while (*ecbmsg
!= '\0')
5508 while (shift
>= 0) {
5509 mask
= (uintptr_t)0xf << shift
;
5511 if (val
>= ((uintptr_t)1 << shift
))
5512 c
[i
++] = "0123456789abcdef"[(val
& mask
) >> shift
];
5523 dtrace_action_panic(dtrace_ecb_t
*ecb
)
5525 dtrace_probe_t
*probe
= ecb
->dte_probe
;
5528 * It's impossible to be taking action on the NULL probe.
5530 ASSERT(probe
!= NULL
);
5532 if (dtrace_destructive_disallow
)
5535 if (dtrace_panicked
!= NULL
)
5538 if (dtrace_casptr(&dtrace_panicked
, NULL
, curthread
) != NULL
)
5542 * We won the right to panic. (We want to be sure that only one
5543 * thread calls panic() from dtrace_probe(), and that panic() is
5544 * called exactly once.)
5546 dtrace_panic("dtrace: panic action at probe %s:%s:%s:%s (ecb %p)",
5547 probe
->dtpr_provider
->dtpv_name
, probe
->dtpr_mod
,
5548 probe
->dtpr_func
, probe
->dtpr_name
, (void *)ecb
);
5552 dtrace_action_raise(uint64_t sig
)
5554 if (dtrace_destructive_disallow
)
5558 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
);
5563 * raise() has a queue depth of 1 -- we ignore all subsequent
5564 * invocations of the raise() action.
5566 if (curthread
->t_dtrace_sig
== 0)
5567 curthread
->t_dtrace_sig
= (uint8_t)sig
;
5569 curthread
->t_sig_check
= 1;
5574 dtrace_action_stop(void)
5576 if (dtrace_destructive_disallow
)
5579 if (!curthread
->t_dtrace_stop
) {
5580 curthread
->t_dtrace_stop
= 1;
5581 curthread
->t_sig_check
= 1;
5587 dtrace_action_chill(dtrace_mstate_t
*mstate
, hrtime_t val
)
5590 volatile uint16_t *flags
;
5593 if (dtrace_destructive_disallow
)
5596 flags
= (volatile uint16_t *)&cpu_core
[cpu
->cpu_id
].cpuc_dtrace_flags
;
5598 now
= dtrace_gethrtime();
5600 if (now
- cpu
->cpu_dtrace_chillmark
> dtrace_chill_interval
) {
5602 * We need to advance the mark to the current time.
5604 cpu
->cpu_dtrace_chillmark
= now
;
5605 cpu
->cpu_dtrace_chilled
= 0;
5609 * Now check to see if the requested chill time would take us over
5610 * the maximum amount of time allowed in the chill interval. (Or
5611 * worse, if the calculation itself induces overflow.)
5613 if (cpu
->cpu_dtrace_chilled
+ val
> dtrace_chill_max
||
5614 cpu
->cpu_dtrace_chilled
+ val
< cpu
->cpu_dtrace_chilled
) {
5615 *flags
|= CPU_DTRACE_ILLOP
;
5619 while (dtrace_gethrtime() - now
< val
)
5623 * Normally, we assure that the value of the variable "timestamp" does
5624 * not change within an ECB. The presence of chill() represents an
5625 * exception to this rule, however.
5627 mstate
->dtms_present
&= ~DTRACE_MSTATE_TIMESTAMP
;
5628 cpu
->cpu_dtrace_chilled
+= val
;
5632 dtrace_action_ustack(dtrace_mstate_t
*mstate
, dtrace_state_t
*state
,
5633 uint64_t *buf
, uint64_t arg
)
5635 int nframes
= DTRACE_USTACK_NFRAMES(arg
);
5636 int strsize
= DTRACE_USTACK_STRSIZE(arg
);
5637 uint64_t *pcs
= &buf
[1], *fps
;
5638 char *str
= (char *)&pcs
[nframes
];
5639 int size
, offs
= 0, i
, j
;
5640 uintptr_t old
= mstate
->dtms_scratch_ptr
, saved
;
5641 uint16_t *flags
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
5645 * Should be taking a faster path if string space has not been
5648 ASSERT(strsize
!= 0);
5651 * We will first allocate some temporary space for the frame pointers.
5653 fps
= (uint64_t *)P2ROUNDUP(mstate
->dtms_scratch_ptr
, 8);
5654 size
= (uintptr_t)fps
- mstate
->dtms_scratch_ptr
+
5655 (nframes
* sizeof (uint64_t));
5657 if (!DTRACE_INSCRATCH(mstate
, size
)) {
5659 * Not enough room for our frame pointers -- need to indicate
5660 * that we ran out of scratch space.
5662 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
5666 mstate
->dtms_scratch_ptr
+= size
;
5667 saved
= mstate
->dtms_scratch_ptr
;
5670 * Now get a stack with both program counters and frame pointers.
5672 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
5673 dtrace_getufpstack(buf
, fps
, nframes
+ 1);
5674 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
5677 * If that faulted, we're cooked.
5679 if (*flags
& CPU_DTRACE_FAULT
)
5683 * Now we want to walk up the stack, calling the USTACK helper. For
5684 * each iteration, we restore the scratch pointer.
5686 for (i
= 0; i
< nframes
; i
++) {
5687 mstate
->dtms_scratch_ptr
= saved
;
5689 if (offs
>= strsize
)
5692 sym
= (char *)(uintptr_t)dtrace_helper(
5693 DTRACE_HELPER_ACTION_USTACK
,
5694 mstate
, state
, pcs
[i
], fps
[i
]);
5697 * If we faulted while running the helper, we're going to
5698 * clear the fault and null out the corresponding string.
5700 if (*flags
& CPU_DTRACE_FAULT
) {
5701 *flags
&= ~CPU_DTRACE_FAULT
;
5711 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
5714 * Now copy in the string that the helper returned to us.
5716 for (j
= 0; offs
+ j
< strsize
; j
++) {
5717 if ((str
[offs
+ j
] = sym
[j
]) == '\0')
5721 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
5726 if (offs
>= strsize
) {
5728 * If we didn't have room for all of the strings, we don't
5729 * abort processing -- this needn't be a fatal error -- but we
5730 * still want to increment a counter (dts_stkstroverflows) to
5731 * allow this condition to be warned about. (If this is from
5732 * a jstack() action, it is easily tuned via jstackstrsize.)
5734 dtrace_error(&state
->dts_stkstroverflows
);
5737 while (offs
< strsize
)
5741 mstate
->dtms_scratch_ptr
= old
;
5745 * If you're looking for the epicenter of DTrace, you just found it. This
5746 * is the function called by the provider to fire a probe -- from which all
5747 * subsequent probe-context DTrace activity emanates.
5750 dtrace_probe(dtrace_id_t id
, uintptr_t arg0
, uintptr_t arg1
,
5751 uintptr_t arg2
, uintptr_t arg3
, uintptr_t arg4
)
5753 processorid_t cpuid
;
5754 dtrace_icookie_t cookie
;
5755 dtrace_probe_t
*probe
;
5756 dtrace_mstate_t mstate
;
5758 dtrace_action_t
*act
;
5762 volatile uint16_t *flags
;
5766 * Kick out immediately if this CPU is still being born (in which case
5767 * curthread will be set to -1) or the current thread can't allow
5768 * probes in its current context.
5770 if (((uintptr_t)curthread
& 1) || (curthread
->t_flag
& T_DONTDTRACE
))
5773 cookie
= dtrace_interrupt_disable();
5774 probe
= dtrace_probes
[id
- 1];
5775 cpuid
= CPU
->cpu_id
;
5776 onintr
= CPU_ON_INTR(CPU
);
5778 if (!onintr
&& probe
->dtpr_predcache
!= DTRACE_CACHEIDNONE
&&
5779 probe
->dtpr_predcache
== curthread
->t_predcache
) {
5781 * We have hit in the predicate cache; we know that
5782 * this predicate would evaluate to be false.
5784 dtrace_interrupt_enable(cookie
);
5788 if (panic_quiesce
) {
5790 * We don't trace anything if we're panicking.
5792 dtrace_interrupt_enable(cookie
);
5796 now
= dtrace_gethrtime();
5797 vtime
= dtrace_vtime_references
!= 0;
5799 if (vtime
&& curthread
->t_dtrace_start
)
5800 curthread
->t_dtrace_vtime
+= now
- curthread
->t_dtrace_start
;
5802 mstate
.dtms_difo
= NULL
;
5803 mstate
.dtms_probe
= probe
;
5804 mstate
.dtms_strtok
= NULL
;
5805 mstate
.dtms_arg
[0] = arg0
;
5806 mstate
.dtms_arg
[1] = arg1
;
5807 mstate
.dtms_arg
[2] = arg2
;
5808 mstate
.dtms_arg
[3] = arg3
;
5809 mstate
.dtms_arg
[4] = arg4
;
5811 flags
= (volatile uint16_t *)&cpu_core
[cpuid
].cpuc_dtrace_flags
;
5813 for (ecb
= probe
->dtpr_ecb
; ecb
!= NULL
; ecb
= ecb
->dte_next
) {
5814 dtrace_predicate_t
*pred
= ecb
->dte_predicate
;
5815 dtrace_state_t
*state
= ecb
->dte_state
;
5816 dtrace_buffer_t
*buf
= &state
->dts_buffer
[cpuid
];
5817 dtrace_buffer_t
*aggbuf
= &state
->dts_aggbuffer
[cpuid
];
5818 dtrace_vstate_t
*vstate
= &state
->dts_vstate
;
5819 dtrace_provider_t
*prov
= probe
->dtpr_provider
;
5824 * A little subtlety with the following (seemingly innocuous)
5825 * declaration of the automatic 'val': by looking at the
5826 * code, you might think that it could be declared in the
5827 * action processing loop, below. (That is, it's only used in
5828 * the action processing loop.) However, it must be declared
5829 * out of that scope because in the case of DIF expression
5830 * arguments to aggregating actions, one iteration of the
5831 * action loop will use the last iteration's value.
5839 mstate
.dtms_present
= DTRACE_MSTATE_ARGS
| DTRACE_MSTATE_PROBE
;
5840 mstate
.dtms_access
= DTRACE_ACCESS_ARGS
| DTRACE_ACCESS_PROC
;
5841 *flags
&= ~CPU_DTRACE_ERROR
;
5843 if (prov
== dtrace_provider
) {
5845 * If dtrace itself is the provider of this probe,
5846 * we're only going to continue processing the ECB if
5847 * arg0 (the dtrace_state_t) is equal to the ECB's
5848 * creating state. (This prevents disjoint consumers
5849 * from seeing one another's metaprobes.)
5851 if (arg0
!= (uint64_t)(uintptr_t)state
)
5855 if (state
->dts_activity
!= DTRACE_ACTIVITY_ACTIVE
) {
5857 * We're not currently active. If our provider isn't
5858 * the dtrace pseudo provider, we're not interested.
5860 if (prov
!= dtrace_provider
)
5864 * Now we must further check if we are in the BEGIN
5865 * probe. If we are, we will only continue processing
5866 * if we're still in WARMUP -- if one BEGIN enabling
5867 * has invoked the exit() action, we don't want to
5868 * evaluate subsequent BEGIN enablings.
5870 if (probe
->dtpr_id
== dtrace_probeid_begin
&&
5871 state
->dts_activity
!= DTRACE_ACTIVITY_WARMUP
) {
5872 ASSERT(state
->dts_activity
==
5873 DTRACE_ACTIVITY_DRAINING
);
5878 if (ecb
->dte_cond
&& !dtrace_priv_probe(state
, &mstate
, ecb
))
5881 if (now
- state
->dts_alive
> dtrace_deadman_timeout
) {
5883 * We seem to be dead. Unless we (a) have kernel
5884 * destructive permissions (b) have expicitly enabled
5885 * destructive actions and (c) destructive actions have
5886 * not been disabled, we're going to transition into
5887 * the KILLED state, from which no further processing
5888 * on this state will be performed.
5890 if (!dtrace_priv_kernel_destructive(state
) ||
5891 !state
->dts_cred
.dcr_destructive
||
5892 dtrace_destructive_disallow
) {
5893 void *activity
= &state
->dts_activity
;
5894 dtrace_activity_t current
;
5897 current
= state
->dts_activity
;
5898 } while (dtrace_cas32(activity
, current
,
5899 DTRACE_ACTIVITY_KILLED
) != current
);
5905 if ((offs
= dtrace_buffer_reserve(buf
, ecb
->dte_needed
,
5906 ecb
->dte_alignment
, state
, &mstate
)) < 0)
5909 tomax
= buf
->dtb_tomax
;
5910 ASSERT(tomax
!= NULL
);
5912 if (ecb
->dte_size
!= 0)
5913 DTRACE_STORE(uint32_t, tomax
, offs
, ecb
->dte_epid
);
5915 mstate
.dtms_epid
= ecb
->dte_epid
;
5916 mstate
.dtms_present
|= DTRACE_MSTATE_EPID
;
5918 if (state
->dts_cred
.dcr_visible
& DTRACE_CRV_KERNEL
)
5919 mstate
.dtms_access
|= DTRACE_ACCESS_KERNEL
;
5922 dtrace_difo_t
*dp
= pred
->dtp_difo
;
5925 rval
= dtrace_dif_emulate(dp
, &mstate
, vstate
, state
);
5927 if (!(*flags
& CPU_DTRACE_ERROR
) && !rval
) {
5928 dtrace_cacheid_t cid
= probe
->dtpr_predcache
;
5930 if (cid
!= DTRACE_CACHEIDNONE
&& !onintr
) {
5932 * Update the predicate cache...
5934 ASSERT(cid
== pred
->dtp_cacheid
);
5935 curthread
->t_predcache
= cid
;
5942 for (act
= ecb
->dte_action
; !(*flags
& CPU_DTRACE_ERROR
) &&
5943 act
!= NULL
; act
= act
->dta_next
) {
5946 dtrace_recdesc_t
*rec
= &act
->dta_rec
;
5948 size
= rec
->dtrd_size
;
5949 valoffs
= offs
+ rec
->dtrd_offset
;
5951 if (DTRACEACT_ISAGG(act
->dta_kind
)) {
5953 dtrace_aggregation_t
*agg
;
5955 agg
= (dtrace_aggregation_t
*)act
;
5957 if ((dp
= act
->dta_difo
) != NULL
)
5958 v
= dtrace_dif_emulate(dp
,
5959 &mstate
, vstate
, state
);
5961 if (*flags
& CPU_DTRACE_ERROR
)
5965 * Note that we always pass the expression
5966 * value from the previous iteration of the
5967 * action loop. This value will only be used
5968 * if there is an expression argument to the
5969 * aggregating action, denoted by the
5970 * dtag_hasarg field.
5972 dtrace_aggregate(agg
, buf
,
5973 offs
, aggbuf
, v
, val
);
5977 switch (act
->dta_kind
) {
5978 case DTRACEACT_STOP
:
5979 if (dtrace_priv_proc_destructive(state
,
5981 dtrace_action_stop();
5984 case DTRACEACT_BREAKPOINT
:
5985 if (dtrace_priv_kernel_destructive(state
))
5986 dtrace_action_breakpoint(ecb
);
5989 case DTRACEACT_PANIC
:
5990 if (dtrace_priv_kernel_destructive(state
))
5991 dtrace_action_panic(ecb
);
5994 case DTRACEACT_STACK
:
5995 if (!dtrace_priv_kernel(state
))
5998 dtrace_getpcstack((pc_t
*)(tomax
+ valoffs
),
5999 size
/ sizeof (pc_t
), probe
->dtpr_aframes
,
6000 DTRACE_ANCHORED(probe
) ? NULL
:
6005 case DTRACEACT_JSTACK
:
6006 case DTRACEACT_USTACK
:
6007 if (!dtrace_priv_proc(state
, &mstate
))
6011 * See comment in DIF_VAR_PID.
6013 if (DTRACE_ANCHORED(mstate
.dtms_probe
) &&
6015 int depth
= DTRACE_USTACK_NFRAMES(
6018 dtrace_bzero((void *)(tomax
+ valoffs
),
6019 DTRACE_USTACK_STRSIZE(rec
->dtrd_arg
)
6020 + depth
* sizeof (uint64_t));
6025 if (DTRACE_USTACK_STRSIZE(rec
->dtrd_arg
) != 0 &&
6026 curproc
->p_dtrace_helpers
!= NULL
) {
6028 * This is the slow path -- we have
6029 * allocated string space, and we're
6030 * getting the stack of a process that
6031 * has helpers. Call into a separate
6032 * routine to perform this processing.
6034 dtrace_action_ustack(&mstate
, state
,
6035 (uint64_t *)(tomax
+ valoffs
),
6040 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
6041 dtrace_getupcstack((uint64_t *)
6043 DTRACE_USTACK_NFRAMES(rec
->dtrd_arg
) + 1);
6044 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
6054 val
= dtrace_dif_emulate(dp
, &mstate
, vstate
, state
);
6056 if (*flags
& CPU_DTRACE_ERROR
)
6059 switch (act
->dta_kind
) {
6060 case DTRACEACT_SPECULATE
:
6061 ASSERT(buf
== &state
->dts_buffer
[cpuid
]);
6062 buf
= dtrace_speculation_buffer(state
,
6066 *flags
|= CPU_DTRACE_DROP
;
6070 offs
= dtrace_buffer_reserve(buf
,
6071 ecb
->dte_needed
, ecb
->dte_alignment
,
6075 *flags
|= CPU_DTRACE_DROP
;
6079 tomax
= buf
->dtb_tomax
;
6080 ASSERT(tomax
!= NULL
);
6082 if (ecb
->dte_size
!= 0)
6083 DTRACE_STORE(uint32_t, tomax
, offs
,
6087 case DTRACEACT_CHILL
:
6088 if (dtrace_priv_kernel_destructive(state
))
6089 dtrace_action_chill(&mstate
, val
);
6092 case DTRACEACT_RAISE
:
6093 if (dtrace_priv_proc_destructive(state
,
6095 dtrace_action_raise(val
);
6098 case DTRACEACT_COMMIT
:
6102 * We need to commit our buffer state.
6105 buf
->dtb_offset
= offs
+ ecb
->dte_size
;
6106 buf
= &state
->dts_buffer
[cpuid
];
6107 dtrace_speculation_commit(state
, cpuid
, val
);
6111 case DTRACEACT_DISCARD
:
6112 dtrace_speculation_discard(state
, cpuid
, val
);
6115 case DTRACEACT_DIFEXPR
:
6116 case DTRACEACT_LIBACT
:
6117 case DTRACEACT_PRINTF
:
6118 case DTRACEACT_PRINTA
:
6119 case DTRACEACT_SYSTEM
:
6120 case DTRACEACT_FREOPEN
:
6125 if (!dtrace_priv_kernel(state
))
6129 case DTRACEACT_USYM
:
6130 case DTRACEACT_UMOD
:
6131 case DTRACEACT_UADDR
: {
6132 struct pid
*pid
= curthread
->t_procp
->p_pidp
;
6134 if (!dtrace_priv_proc(state
, &mstate
))
6137 DTRACE_STORE(uint64_t, tomax
,
6138 valoffs
, (uint64_t)pid
->pid_id
);
6139 DTRACE_STORE(uint64_t, tomax
,
6140 valoffs
+ sizeof (uint64_t), val
);
6145 case DTRACEACT_EXIT
: {
6147 * For the exit action, we are going to attempt
6148 * to atomically set our activity to be
6149 * draining. If this fails (either because
6150 * another CPU has beat us to the exit action,
6151 * or because our current activity is something
6152 * other than ACTIVE or WARMUP), we will
6153 * continue. This assures that the exit action
6154 * can be successfully recorded at most once
6155 * when we're in the ACTIVE state. If we're
6156 * encountering the exit() action while in
6157 * COOLDOWN, however, we want to honor the new
6158 * status code. (We know that we're the only
6159 * thread in COOLDOWN, so there is no race.)
6161 void *activity
= &state
->dts_activity
;
6162 dtrace_activity_t current
= state
->dts_activity
;
6164 if (current
== DTRACE_ACTIVITY_COOLDOWN
)
6167 if (current
!= DTRACE_ACTIVITY_WARMUP
)
6168 current
= DTRACE_ACTIVITY_ACTIVE
;
6170 if (dtrace_cas32(activity
, current
,
6171 DTRACE_ACTIVITY_DRAINING
) != current
) {
6172 *flags
|= CPU_DTRACE_DROP
;
6183 if (dp
->dtdo_rtype
.dtdt_flags
& DIF_TF_BYREF
) {
6184 uintptr_t end
= valoffs
+ size
;
6186 if (!dtrace_vcanload((void *)(uintptr_t)val
,
6187 &dp
->dtdo_rtype
, &mstate
, vstate
))
6191 * If this is a string, we're going to only
6192 * load until we find the zero byte -- after
6193 * which we'll store zero bytes.
6195 if (dp
->dtdo_rtype
.dtdt_kind
==
6198 int intuple
= act
->dta_intuple
;
6201 for (s
= 0; s
< size
; s
++) {
6203 c
= dtrace_load8(val
++);
6205 DTRACE_STORE(uint8_t, tomax
,
6208 if (c
== '\0' && intuple
)
6215 while (valoffs
< end
) {
6216 DTRACE_STORE(uint8_t, tomax
, valoffs
++,
6217 dtrace_load8(val
++));
6227 case sizeof (uint8_t):
6228 DTRACE_STORE(uint8_t, tomax
, valoffs
, val
);
6230 case sizeof (uint16_t):
6231 DTRACE_STORE(uint16_t, tomax
, valoffs
, val
);
6233 case sizeof (uint32_t):
6234 DTRACE_STORE(uint32_t, tomax
, valoffs
, val
);
6236 case sizeof (uint64_t):
6237 DTRACE_STORE(uint64_t, tomax
, valoffs
, val
);
6241 * Any other size should have been returned by
6242 * reference, not by value.
6249 if (*flags
& CPU_DTRACE_DROP
)
6252 if (*flags
& CPU_DTRACE_FAULT
) {
6254 dtrace_action_t
*err
;
6258 if (probe
->dtpr_id
== dtrace_probeid_error
) {
6260 * There's nothing we can do -- we had an
6261 * error on the error probe. We bump an
6262 * error counter to at least indicate that
6263 * this condition happened.
6265 dtrace_error(&state
->dts_dblerrors
);
6271 * Before recursing on dtrace_probe(), we
6272 * need to explicitly clear out our start
6273 * time to prevent it from being accumulated
6274 * into t_dtrace_vtime.
6276 curthread
->t_dtrace_start
= 0;
6280 * Iterate over the actions to figure out which action
6281 * we were processing when we experienced the error.
6282 * Note that act points _past_ the faulting action; if
6283 * act is ecb->dte_action, the fault was in the
6284 * predicate, if it's ecb->dte_action->dta_next it's
6285 * in action #1, and so on.
6287 for (err
= ecb
->dte_action
, ndx
= 0;
6288 err
!= act
; err
= err
->dta_next
, ndx
++)
6291 dtrace_probe_error(state
, ecb
->dte_epid
, ndx
,
6292 (mstate
.dtms_present
& DTRACE_MSTATE_FLTOFFS
) ?
6293 mstate
.dtms_fltoffs
: -1, DTRACE_FLAGS2FLT(*flags
),
6294 cpu_core
[cpuid
].cpuc_dtrace_illval
);
6300 buf
->dtb_offset
= offs
+ ecb
->dte_size
;
6304 curthread
->t_dtrace_start
= dtrace_gethrtime();
6306 dtrace_interrupt_enable(cookie
);
6310 * DTrace Probe Hashing Functions
6312 * The functions in this section (and indeed, the functions in remaining
6313 * sections) are not _called_ from probe context. (Any exceptions to this are
6314 * marked with a "Note:".) Rather, they are called from elsewhere in the
6315 * DTrace framework to look-up probes in, add probes to and remove probes from
6316 * the DTrace probe hashes. (Each probe is hashed by each element of the
6317 * probe tuple -- allowing for fast lookups, regardless of what was
6321 dtrace_hash_str(char *p
)
6327 hval
= (hval
<< 4) + *p
++;
6328 if ((g
= (hval
& 0xf0000000)) != 0)
6335 static dtrace_hash_t
*
6336 dtrace_hash_create(uintptr_t stroffs
, uintptr_t nextoffs
, uintptr_t prevoffs
)
6338 dtrace_hash_t
*hash
= kmem_zalloc(sizeof (dtrace_hash_t
), KM_SLEEP
);
6340 hash
->dth_stroffs
= stroffs
;
6341 hash
->dth_nextoffs
= nextoffs
;
6342 hash
->dth_prevoffs
= prevoffs
;
6345 hash
->dth_mask
= hash
->dth_size
- 1;
6347 hash
->dth_tab
= kmem_zalloc(hash
->dth_size
*
6348 sizeof (dtrace_hashbucket_t
*), KM_SLEEP
);
6354 dtrace_hash_destroy(dtrace_hash_t
*hash
)
6359 for (i
= 0; i
< hash
->dth_size
; i
++)
6360 ASSERT(hash
->dth_tab
[i
] == NULL
);
6363 kmem_free(hash
->dth_tab
,
6364 hash
->dth_size
* sizeof (dtrace_hashbucket_t
*));
6365 kmem_free(hash
, sizeof (dtrace_hash_t
));
6369 dtrace_hash_resize(dtrace_hash_t
*hash
)
6371 int size
= hash
->dth_size
, i
, ndx
;
6372 int new_size
= hash
->dth_size
<< 1;
6373 int new_mask
= new_size
- 1;
6374 dtrace_hashbucket_t
**new_tab
, *bucket
, *next
;
6376 ASSERT((new_size
& new_mask
) == 0);
6378 new_tab
= kmem_zalloc(new_size
* sizeof (void *), KM_SLEEP
);
6380 for (i
= 0; i
< size
; i
++) {
6381 for (bucket
= hash
->dth_tab
[i
]; bucket
!= NULL
; bucket
= next
) {
6382 dtrace_probe_t
*probe
= bucket
->dthb_chain
;
6384 ASSERT(probe
!= NULL
);
6385 ndx
= DTRACE_HASHSTR(hash
, probe
) & new_mask
;
6387 next
= bucket
->dthb_next
;
6388 bucket
->dthb_next
= new_tab
[ndx
];
6389 new_tab
[ndx
] = bucket
;
6393 kmem_free(hash
->dth_tab
, hash
->dth_size
* sizeof (void *));
6394 hash
->dth_tab
= new_tab
;
6395 hash
->dth_size
= new_size
;
6396 hash
->dth_mask
= new_mask
;
6400 dtrace_hash_add(dtrace_hash_t
*hash
, dtrace_probe_t
*new)
6402 int hashval
= DTRACE_HASHSTR(hash
, new);
6403 int ndx
= hashval
& hash
->dth_mask
;
6404 dtrace_hashbucket_t
*bucket
= hash
->dth_tab
[ndx
];
6405 dtrace_probe_t
**nextp
, **prevp
;
6407 for (; bucket
!= NULL
; bucket
= bucket
->dthb_next
) {
6408 if (DTRACE_HASHEQ(hash
, bucket
->dthb_chain
, new))
6412 if ((hash
->dth_nbuckets
>> 1) > hash
->dth_size
) {
6413 dtrace_hash_resize(hash
);
6414 dtrace_hash_add(hash
, new);
6418 bucket
= kmem_zalloc(sizeof (dtrace_hashbucket_t
), KM_SLEEP
);
6419 bucket
->dthb_next
= hash
->dth_tab
[ndx
];
6420 hash
->dth_tab
[ndx
] = bucket
;
6421 hash
->dth_nbuckets
++;
6424 nextp
= DTRACE_HASHNEXT(hash
, new);
6425 ASSERT(*nextp
== NULL
&& *(DTRACE_HASHPREV(hash
, new)) == NULL
);
6426 *nextp
= bucket
->dthb_chain
;
6428 if (bucket
->dthb_chain
!= NULL
) {
6429 prevp
= DTRACE_HASHPREV(hash
, bucket
->dthb_chain
);
6430 ASSERT(*prevp
== NULL
);
6434 bucket
->dthb_chain
= new;
6438 static dtrace_probe_t
*
6439 dtrace_hash_lookup(dtrace_hash_t
*hash
, dtrace_probe_t
*template)
6441 int hashval
= DTRACE_HASHSTR(hash
, template);
6442 int ndx
= hashval
& hash
->dth_mask
;
6443 dtrace_hashbucket_t
*bucket
= hash
->dth_tab
[ndx
];
6445 for (; bucket
!= NULL
; bucket
= bucket
->dthb_next
) {
6446 if (DTRACE_HASHEQ(hash
, bucket
->dthb_chain
, template))
6447 return (bucket
->dthb_chain
);
6454 dtrace_hash_collisions(dtrace_hash_t
*hash
, dtrace_probe_t
*template)
6456 int hashval
= DTRACE_HASHSTR(hash
, template);
6457 int ndx
= hashval
& hash
->dth_mask
;
6458 dtrace_hashbucket_t
*bucket
= hash
->dth_tab
[ndx
];
6460 for (; bucket
!= NULL
; bucket
= bucket
->dthb_next
) {
6461 if (DTRACE_HASHEQ(hash
, bucket
->dthb_chain
, template))
6462 return (bucket
->dthb_len
);
6469 dtrace_hash_remove(dtrace_hash_t
*hash
, dtrace_probe_t
*probe
)
6471 int ndx
= DTRACE_HASHSTR(hash
, probe
) & hash
->dth_mask
;
6472 dtrace_hashbucket_t
*bucket
= hash
->dth_tab
[ndx
];
6474 dtrace_probe_t
**prevp
= DTRACE_HASHPREV(hash
, probe
);
6475 dtrace_probe_t
**nextp
= DTRACE_HASHNEXT(hash
, probe
);
6478 * Find the bucket that we're removing this probe from.
6480 for (; bucket
!= NULL
; bucket
= bucket
->dthb_next
) {
6481 if (DTRACE_HASHEQ(hash
, bucket
->dthb_chain
, probe
))
6485 ASSERT(bucket
!= NULL
);
6487 if (*prevp
== NULL
) {
6488 if (*nextp
== NULL
) {
6490 * The removed probe was the only probe on this
6491 * bucket; we need to remove the bucket.
6493 dtrace_hashbucket_t
*b
= hash
->dth_tab
[ndx
];
6495 ASSERT(bucket
->dthb_chain
== probe
);
6499 hash
->dth_tab
[ndx
] = bucket
->dthb_next
;
6501 while (b
->dthb_next
!= bucket
)
6503 b
->dthb_next
= bucket
->dthb_next
;
6506 ASSERT(hash
->dth_nbuckets
> 0);
6507 hash
->dth_nbuckets
--;
6508 kmem_free(bucket
, sizeof (dtrace_hashbucket_t
));
6512 bucket
->dthb_chain
= *nextp
;
6514 *(DTRACE_HASHNEXT(hash
, *prevp
)) = *nextp
;
6518 *(DTRACE_HASHPREV(hash
, *nextp
)) = *prevp
;
6522 * DTrace Utility Functions
6524 * These are random utility functions that are _not_ called from probe context.
6527 dtrace_badattr(const dtrace_attribute_t
*a
)
6529 return (a
->dtat_name
> DTRACE_STABILITY_MAX
||
6530 a
->dtat_data
> DTRACE_STABILITY_MAX
||
6531 a
->dtat_class
> DTRACE_CLASS_MAX
);
6535 * Return a duplicate copy of a string. If the specified string is NULL,
6536 * this function returns a zero-length string.
6539 dtrace_strdup(const char *str
)
6541 char *new = kmem_zalloc((str
!= NULL
? strlen(str
) : 0) + 1, KM_SLEEP
);
6544 (void) strcpy(new, str
);
6549 #define DTRACE_ISALPHA(c) \
6550 (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
6553 dtrace_badname(const char *s
)
6557 if (s
== NULL
|| (c
= *s
++) == '\0')
6560 if (!DTRACE_ISALPHA(c
) && c
!= '-' && c
!= '_' && c
!= '.')
6563 while ((c
= *s
++) != '\0') {
6564 if (!DTRACE_ISALPHA(c
) && (c
< '0' || c
> '9') &&
6565 c
!= '-' && c
!= '_' && c
!= '.' && c
!= '`')
6573 dtrace_cred2priv(cred_t
*cr
, uint32_t *privp
, uid_t
*uidp
, zoneid_t
*zoneidp
)
6577 if (cr
== NULL
|| PRIV_POLICY_ONLY(cr
, PRIV_ALL
, B_FALSE
)) {
6579 * For DTRACE_PRIV_ALL, the uid and zoneid don't matter.
6581 priv
= DTRACE_PRIV_ALL
;
6583 *uidp
= crgetuid(cr
);
6584 *zoneidp
= crgetzoneid(cr
);
6587 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_KERNEL
, B_FALSE
))
6588 priv
|= DTRACE_PRIV_KERNEL
| DTRACE_PRIV_USER
;
6589 else if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_USER
, B_FALSE
))
6590 priv
|= DTRACE_PRIV_USER
;
6591 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_PROC
, B_FALSE
))
6592 priv
|= DTRACE_PRIV_PROC
;
6593 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_OWNER
, B_FALSE
))
6594 priv
|= DTRACE_PRIV_OWNER
;
6595 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_ZONE
, B_FALSE
))
6596 priv
|= DTRACE_PRIV_ZONEOWNER
;
6602 #ifdef DTRACE_ERRDEBUG
6604 dtrace_errdebug(const char *str
)
6606 int hval
= dtrace_hash_str((char *)str
) % DTRACE_ERRHASHSZ
;
6609 mutex_enter(&dtrace_errlock
);
6610 dtrace_errlast
= str
;
6611 dtrace_errthread
= curthread
;
6613 while (occupied
++ < DTRACE_ERRHASHSZ
) {
6614 if (dtrace_errhash
[hval
].dter_msg
== str
) {
6615 dtrace_errhash
[hval
].dter_count
++;
6619 if (dtrace_errhash
[hval
].dter_msg
!= NULL
) {
6620 hval
= (hval
+ 1) % DTRACE_ERRHASHSZ
;
6624 dtrace_errhash
[hval
].dter_msg
= str
;
6625 dtrace_errhash
[hval
].dter_count
= 1;
6629 panic("dtrace: undersized error hash");
6631 mutex_exit(&dtrace_errlock
);
6636 * DTrace Matching Functions
6638 * These functions are used to match groups of probes, given some elements of
6639 * a probe tuple, or some globbed expressions for elements of a probe tuple.
6642 dtrace_match_priv(const dtrace_probe_t
*prp
, uint32_t priv
, uid_t uid
,
6645 if (priv
!= DTRACE_PRIV_ALL
) {
6646 uint32_t ppriv
= prp
->dtpr_provider
->dtpv_priv
.dtpp_flags
;
6647 uint32_t match
= priv
& ppriv
;
6650 * No PRIV_DTRACE_* privileges...
6652 if ((priv
& (DTRACE_PRIV_PROC
| DTRACE_PRIV_USER
|
6653 DTRACE_PRIV_KERNEL
)) == 0)
6657 * No matching bits, but there were bits to match...
6659 if (match
== 0 && ppriv
!= 0)
6663 * Need to have permissions to the process, but don't...
6665 if (((ppriv
& ~match
) & DTRACE_PRIV_OWNER
) != 0 &&
6666 uid
!= prp
->dtpr_provider
->dtpv_priv
.dtpp_uid
) {
6671 * Need to be in the same zone unless we possess the
6672 * privilege to examine all zones.
6674 if (((ppriv
& ~match
) & DTRACE_PRIV_ZONEOWNER
) != 0 &&
6675 zoneid
!= prp
->dtpr_provider
->dtpv_priv
.dtpp_zoneid
) {
6684 * dtrace_match_probe compares a dtrace_probe_t to a pre-compiled key, which
6685 * consists of input pattern strings and an ops-vector to evaluate them.
6686 * This function returns >0 for match, 0 for no match, and <0 for error.
6689 dtrace_match_probe(const dtrace_probe_t
*prp
, const dtrace_probekey_t
*pkp
,
6690 uint32_t priv
, uid_t uid
, zoneid_t zoneid
)
6692 dtrace_provider_t
*pvp
= prp
->dtpr_provider
;
6695 if (pvp
->dtpv_defunct
)
6698 if ((rv
= pkp
->dtpk_pmatch(pvp
->dtpv_name
, pkp
->dtpk_prov
, 0)) <= 0)
6701 if ((rv
= pkp
->dtpk_mmatch(prp
->dtpr_mod
, pkp
->dtpk_mod
, 0)) <= 0)
6704 if ((rv
= pkp
->dtpk_fmatch(prp
->dtpr_func
, pkp
->dtpk_func
, 0)) <= 0)
6707 if ((rv
= pkp
->dtpk_nmatch(prp
->dtpr_name
, pkp
->dtpk_name
, 0)) <= 0)
6710 if (dtrace_match_priv(prp
, priv
, uid
, zoneid
) == 0)
6717 * dtrace_match_glob() is a safe kernel implementation of the gmatch(3GEN)
6718 * interface for matching a glob pattern 'p' to an input string 's'. Unlike
6719 * libc's version, the kernel version only applies to 8-bit ASCII strings.
6720 * In addition, all of the recursion cases except for '*' matching have been
6721 * unwound. For '*', we still implement recursive evaluation, but a depth
6722 * counter is maintained and matching is aborted if we recurse too deep.
6723 * The function returns 0 if no match, >0 if match, and <0 if recursion error.
6726 dtrace_match_glob(const char *s
, const char *p
, int depth
)
6732 if (depth
> DTRACE_PROBEKEY_MAXDEPTH
)
6736 s
= ""; /* treat NULL as empty string */
6745 if ((c
= *p
++) == '\0')
6746 return (s1
== '\0');
6750 int ok
= 0, notflag
= 0;
6761 if ((c
= *p
++) == '\0')
6765 if (c
== '-' && lc
!= '\0' && *p
!= ']') {
6766 if ((c
= *p
++) == '\0')
6768 if (c
== '\\' && (c
= *p
++) == '\0')
6772 if (s1
< lc
|| s1
> c
)
6776 } else if (lc
<= s1
&& s1
<= c
)
6779 } else if (c
== '\\' && (c
= *p
++) == '\0')
6782 lc
= c
; /* save left-hand 'c' for next iteration */
6792 if ((c
= *p
++) == '\0')
6804 if ((c
= *p
++) == '\0')
6820 p
++; /* consecutive *'s are identical to a single one */
6825 for (s
= olds
; *s
!= '\0'; s
++) {
6826 if ((gs
= dtrace_match_glob(s
, p
, depth
+ 1)) != 0)
6836 dtrace_match_string(const char *s
, const char *p
, int depth
)
6838 return (s
!= NULL
&& strcmp(s
, p
) == 0);
6843 dtrace_match_nul(const char *s
, const char *p
, int depth
)
6845 return (1); /* always match the empty pattern */
6850 dtrace_match_nonzero(const char *s
, const char *p
, int depth
)
6852 return (s
!= NULL
&& s
[0] != '\0');
6856 dtrace_match(const dtrace_probekey_t
*pkp
, uint32_t priv
, uid_t uid
,
6857 zoneid_t zoneid
, int (*matched
)(dtrace_probe_t
*, void *), void *arg
)
6859 dtrace_probe_t
template, *probe
;
6860 dtrace_hash_t
*hash
= NULL
;
6861 int len
, rc
, best
= INT_MAX
, nmatched
= 0;
6864 ASSERT(MUTEX_HELD(&dtrace_lock
));
6867 * If the probe ID is specified in the key, just lookup by ID and
6868 * invoke the match callback once if a matching probe is found.
6870 if (pkp
->dtpk_id
!= DTRACE_IDNONE
) {
6871 if ((probe
= dtrace_probe_lookup_id(pkp
->dtpk_id
)) != NULL
&&
6872 dtrace_match_probe(probe
, pkp
, priv
, uid
, zoneid
) > 0) {
6873 if ((*matched
)(probe
, arg
) == DTRACE_MATCH_FAIL
)
6874 return (DTRACE_MATCH_FAIL
);
6880 template.dtpr_mod
= (char *)pkp
->dtpk_mod
;
6881 template.dtpr_func
= (char *)pkp
->dtpk_func
;
6882 template.dtpr_name
= (char *)pkp
->dtpk_name
;
6885 * We want to find the most distinct of the module name, function
6886 * name, and name. So for each one that is not a glob pattern or
6887 * empty string, we perform a lookup in the corresponding hash and
6888 * use the hash table with the fewest collisions to do our search.
6890 if (pkp
->dtpk_mmatch
== &dtrace_match_string
&&
6891 (len
= dtrace_hash_collisions(dtrace_bymod
, &template)) < best
) {
6893 hash
= dtrace_bymod
;
6896 if (pkp
->dtpk_fmatch
== &dtrace_match_string
&&
6897 (len
= dtrace_hash_collisions(dtrace_byfunc
, &template)) < best
) {
6899 hash
= dtrace_byfunc
;
6902 if (pkp
->dtpk_nmatch
== &dtrace_match_string
&&
6903 (len
= dtrace_hash_collisions(dtrace_byname
, &template)) < best
) {
6905 hash
= dtrace_byname
;
6909 * If we did not select a hash table, iterate over every probe and
6910 * invoke our callback for each one that matches our input probe key.
6913 for (i
= 0; i
< dtrace_nprobes
; i
++) {
6914 if ((probe
= dtrace_probes
[i
]) == NULL
||
6915 dtrace_match_probe(probe
, pkp
, priv
, uid
,
6921 if ((rc
= (*matched
)(probe
, arg
)) !=
6922 DTRACE_MATCH_NEXT
) {
6923 if (rc
== DTRACE_MATCH_FAIL
)
6924 return (DTRACE_MATCH_FAIL
);
6933 * If we selected a hash table, iterate over each probe of the same key
6934 * name and invoke the callback for every probe that matches the other
6935 * attributes of our input probe key.
6937 for (probe
= dtrace_hash_lookup(hash
, &template); probe
!= NULL
;
6938 probe
= *(DTRACE_HASHNEXT(hash
, probe
))) {
6940 if (dtrace_match_probe(probe
, pkp
, priv
, uid
, zoneid
) <= 0)
6945 if ((rc
= (*matched
)(probe
, arg
)) != DTRACE_MATCH_NEXT
) {
6946 if (rc
== DTRACE_MATCH_FAIL
)
6947 return (DTRACE_MATCH_FAIL
);
6956 * Return the function pointer dtrace_probecmp() should use to compare the
6957 * specified pattern with a string. For NULL or empty patterns, we select
6958 * dtrace_match_nul(). For glob pattern strings, we use dtrace_match_glob().
6959 * For non-empty non-glob strings, we use dtrace_match_string().
6961 static dtrace_probekey_f
*
6962 dtrace_probekey_func(const char *p
)
6966 if (p
== NULL
|| *p
== '\0')
6967 return (&dtrace_match_nul
);
6969 while ((c
= *p
++) != '\0') {
6970 if (c
== '[' || c
== '?' || c
== '*' || c
== '\\')
6971 return (&dtrace_match_glob
);
6974 return (&dtrace_match_string
);
6978 * Build a probe comparison key for use with dtrace_match_probe() from the
6979 * given probe description. By convention, a null key only matches anchored
6980 * probes: if each field is the empty string, reset dtpk_fmatch to
6981 * dtrace_match_nonzero().
6984 dtrace_probekey(const dtrace_probedesc_t
*pdp
, dtrace_probekey_t
*pkp
)
6986 pkp
->dtpk_prov
= pdp
->dtpd_provider
;
6987 pkp
->dtpk_pmatch
= dtrace_probekey_func(pdp
->dtpd_provider
);
6989 pkp
->dtpk_mod
= pdp
->dtpd_mod
;
6990 pkp
->dtpk_mmatch
= dtrace_probekey_func(pdp
->dtpd_mod
);
6992 pkp
->dtpk_func
= pdp
->dtpd_func
;
6993 pkp
->dtpk_fmatch
= dtrace_probekey_func(pdp
->dtpd_func
);
6995 pkp
->dtpk_name
= pdp
->dtpd_name
;
6996 pkp
->dtpk_nmatch
= dtrace_probekey_func(pdp
->dtpd_name
);
6998 pkp
->dtpk_id
= pdp
->dtpd_id
;
7000 if (pkp
->dtpk_id
== DTRACE_IDNONE
&&
7001 pkp
->dtpk_pmatch
== &dtrace_match_nul
&&
7002 pkp
->dtpk_mmatch
== &dtrace_match_nul
&&
7003 pkp
->dtpk_fmatch
== &dtrace_match_nul
&&
7004 pkp
->dtpk_nmatch
== &dtrace_match_nul
)
7005 pkp
->dtpk_fmatch
= &dtrace_match_nonzero
;
7009 * DTrace Provider-to-Framework API Functions
7011 * These functions implement much of the Provider-to-Framework API, as
7012 * described in <sys/dtrace.h>. The parts of the API not in this section are
7013 * the functions in the API for probe management (found below), and
7014 * dtrace_probe() itself (found above).
7018 * Register the calling provider with the DTrace framework. This should
7019 * generally be called by DTrace providers in their attach(9E) entry point.
7022 dtrace_register(const char *name
, const dtrace_pattr_t
*pap
, uint32_t priv
,
7023 cred_t
*cr
, const dtrace_pops_t
*pops
, void *arg
, dtrace_provider_id_t
*idp
)
7025 dtrace_provider_t
*provider
;
7027 if (name
== NULL
|| pap
== NULL
|| pops
== NULL
|| idp
== NULL
) {
7028 cmn_err(CE_WARN
, "failed to register provider '%s': invalid "
7029 "arguments", name
? name
: "<NULL>");
7033 if (name
[0] == '\0' || dtrace_badname(name
)) {
7034 cmn_err(CE_WARN
, "failed to register provider '%s': invalid "
7035 "provider name", name
);
7039 if ((pops
->dtps_provide
== NULL
&& pops
->dtps_provide_module
== NULL
) ||
7040 pops
->dtps_enable
== NULL
|| pops
->dtps_disable
== NULL
||
7041 pops
->dtps_destroy
== NULL
||
7042 ((pops
->dtps_resume
== NULL
) != (pops
->dtps_suspend
== NULL
))) {
7043 cmn_err(CE_WARN
, "failed to register provider '%s': invalid "
7044 "provider ops", name
);
7048 if (dtrace_badattr(&pap
->dtpa_provider
) ||
7049 dtrace_badattr(&pap
->dtpa_mod
) ||
7050 dtrace_badattr(&pap
->dtpa_func
) ||
7051 dtrace_badattr(&pap
->dtpa_name
) ||
7052 dtrace_badattr(&pap
->dtpa_args
)) {
7053 cmn_err(CE_WARN
, "failed to register provider '%s': invalid "
7054 "provider attributes", name
);
7058 if (priv
& ~DTRACE_PRIV_ALL
) {
7059 cmn_err(CE_WARN
, "failed to register provider '%s': invalid "
7060 "privilege attributes", name
);
7064 if ((priv
& DTRACE_PRIV_KERNEL
) &&
7065 (priv
& (DTRACE_PRIV_USER
| DTRACE_PRIV_OWNER
)) &&
7066 pops
->dtps_mode
== NULL
) {
7067 cmn_err(CE_WARN
, "failed to register provider '%s': need "
7068 "dtps_mode() op for given privilege attributes", name
);
7072 provider
= kmem_zalloc(sizeof (dtrace_provider_t
), KM_SLEEP
);
7073 provider
->dtpv_name
= kmem_alloc(strlen(name
) + 1, KM_SLEEP
);
7074 (void) strcpy(provider
->dtpv_name
, name
);
7076 provider
->dtpv_attr
= *pap
;
7077 provider
->dtpv_priv
.dtpp_flags
= priv
;
7079 provider
->dtpv_priv
.dtpp_uid
= crgetuid(cr
);
7080 provider
->dtpv_priv
.dtpp_zoneid
= crgetzoneid(cr
);
7082 provider
->dtpv_pops
= *pops
;
7084 if (pops
->dtps_provide
== NULL
) {
7085 ASSERT(pops
->dtps_provide_module
!= NULL
);
7086 provider
->dtpv_pops
.dtps_provide
=
7087 (void (*)(void *, const dtrace_probedesc_t
*))dtrace_nullop
;
7090 if (pops
->dtps_provide_module
== NULL
) {
7091 ASSERT(pops
->dtps_provide
!= NULL
);
7092 provider
->dtpv_pops
.dtps_provide_module
=
7093 (void (*)(void *, struct modctl
*))dtrace_nullop
;
7096 if (pops
->dtps_suspend
== NULL
) {
7097 ASSERT(pops
->dtps_resume
== NULL
);
7098 provider
->dtpv_pops
.dtps_suspend
=
7099 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
;
7100 provider
->dtpv_pops
.dtps_resume
=
7101 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
;
7104 provider
->dtpv_arg
= arg
;
7105 *idp
= (dtrace_provider_id_t
)provider
;
7107 if (pops
== &dtrace_provider_ops
) {
7108 ASSERT(MUTEX_HELD(&dtrace_provider_lock
));
7109 ASSERT(MUTEX_HELD(&dtrace_lock
));
7110 ASSERT(dtrace_anon
.dta_enabling
== NULL
);
7113 * We make sure that the DTrace provider is at the head of
7114 * the provider chain.
7116 provider
->dtpv_next
= dtrace_provider
;
7117 dtrace_provider
= provider
;
7121 mutex_enter(&dtrace_provider_lock
);
7122 mutex_enter(&dtrace_lock
);
7125 * If there is at least one provider registered, we'll add this
7126 * provider after the first provider.
7128 if (dtrace_provider
!= NULL
) {
7129 provider
->dtpv_next
= dtrace_provider
->dtpv_next
;
7130 dtrace_provider
->dtpv_next
= provider
;
7132 dtrace_provider
= provider
;
7135 if (dtrace_retained
!= NULL
) {
7136 dtrace_enabling_provide(provider
);
7139 * Now we need to call dtrace_enabling_matchall() -- which
7140 * will acquire cpu_lock and dtrace_lock. We therefore need
7141 * to drop all of our locks before calling into it...
7143 mutex_exit(&dtrace_lock
);
7144 mutex_exit(&dtrace_provider_lock
);
7145 dtrace_enabling_matchall();
7150 mutex_exit(&dtrace_lock
);
7151 mutex_exit(&dtrace_provider_lock
);
7157 * Unregister the specified provider from the DTrace framework. This should
7158 * generally be called by DTrace providers in their detach(9E) entry point.
7161 dtrace_unregister(dtrace_provider_id_t id
)
7163 dtrace_provider_t
*old
= (dtrace_provider_t
*)id
;
7164 dtrace_provider_t
*prev
= NULL
;
7165 int i
, self
= 0, noreap
= 0;
7166 dtrace_probe_t
*probe
, *first
= NULL
;
7168 if (old
->dtpv_pops
.dtps_enable
==
7169 (int (*)(void *, dtrace_id_t
, void *))dtrace_enable_nullop
) {
7171 * If DTrace itself is the provider, we're called with locks
7174 ASSERT(old
== dtrace_provider
);
7175 ASSERT(dtrace_devi
!= NULL
);
7176 ASSERT(MUTEX_HELD(&dtrace_provider_lock
));
7177 ASSERT(MUTEX_HELD(&dtrace_lock
));
7180 if (dtrace_provider
->dtpv_next
!= NULL
) {
7182 * There's another provider here; return failure.
7187 mutex_enter(&dtrace_provider_lock
);
7188 mutex_enter(&mod_lock
);
7189 mutex_enter(&dtrace_lock
);
7193 * If anyone has /dev/dtrace open, or if there are anonymous enabled
7194 * probes, we refuse to let providers slither away, unless this
7195 * provider has already been explicitly invalidated.
7197 if (!old
->dtpv_defunct
&&
7198 (dtrace_opens
|| (dtrace_anon
.dta_state
!= NULL
&&
7199 dtrace_anon
.dta_state
->dts_necbs
> 0))) {
7201 mutex_exit(&dtrace_lock
);
7202 mutex_exit(&mod_lock
);
7203 mutex_exit(&dtrace_provider_lock
);
7209 * Attempt to destroy the probes associated with this provider.
7211 for (i
= 0; i
< dtrace_nprobes
; i
++) {
7212 if ((probe
= dtrace_probes
[i
]) == NULL
)
7215 if (probe
->dtpr_provider
!= old
)
7218 if (probe
->dtpr_ecb
== NULL
)
7222 * If we are trying to unregister a defunct provider, and the
7223 * provider was made defunct within the interval dictated by
7224 * dtrace_unregister_defunct_reap, we'll (asynchronously)
7225 * attempt to reap our enablings. To denote that the provider
7226 * should reattempt to unregister itself at some point in the
7227 * future, we will return a differentiable error code (EAGAIN
7228 * instead of EBUSY) in this case.
7230 if (dtrace_gethrtime() - old
->dtpv_defunct
>
7231 dtrace_unregister_defunct_reap
)
7235 mutex_exit(&dtrace_lock
);
7236 mutex_exit(&mod_lock
);
7237 mutex_exit(&dtrace_provider_lock
);
7243 (void) taskq_dispatch(dtrace_taskq
,
7244 (task_func_t
*)dtrace_enabling_reap
, NULL
, TQ_SLEEP
);
7250 * All of the probes for this provider are disabled; we can safely
7251 * remove all of them from their hash chains and from the probe array.
7253 for (i
= 0; i
< dtrace_nprobes
; i
++) {
7254 if ((probe
= dtrace_probes
[i
]) == NULL
)
7257 if (probe
->dtpr_provider
!= old
)
7260 dtrace_probes
[i
] = NULL
;
7262 dtrace_hash_remove(dtrace_bymod
, probe
);
7263 dtrace_hash_remove(dtrace_byfunc
, probe
);
7264 dtrace_hash_remove(dtrace_byname
, probe
);
7266 if (first
== NULL
) {
7268 probe
->dtpr_nextmod
= NULL
;
7270 probe
->dtpr_nextmod
= first
;
7276 * The provider's probes have been removed from the hash chains and
7277 * from the probe array. Now issue a dtrace_sync() to be sure that
7278 * everyone has cleared out from any probe array processing.
7282 for (probe
= first
; probe
!= NULL
; probe
= first
) {
7283 first
= probe
->dtpr_nextmod
;
7285 old
->dtpv_pops
.dtps_destroy(old
->dtpv_arg
, probe
->dtpr_id
,
7287 kmem_free(probe
->dtpr_mod
, strlen(probe
->dtpr_mod
) + 1);
7288 kmem_free(probe
->dtpr_func
, strlen(probe
->dtpr_func
) + 1);
7289 kmem_free(probe
->dtpr_name
, strlen(probe
->dtpr_name
) + 1);
7290 vmem_free(dtrace_arena
, (void *)(uintptr_t)(probe
->dtpr_id
), 1);
7291 kmem_free(probe
, sizeof (dtrace_probe_t
));
7294 if ((prev
= dtrace_provider
) == old
) {
7295 ASSERT(self
|| dtrace_devi
== NULL
);
7296 ASSERT(old
->dtpv_next
== NULL
|| dtrace_devi
== NULL
);
7297 dtrace_provider
= old
->dtpv_next
;
7299 while (prev
!= NULL
&& prev
->dtpv_next
!= old
)
7300 prev
= prev
->dtpv_next
;
7303 panic("attempt to unregister non-existent "
7304 "dtrace provider %p\n", (void *)id
);
7307 prev
->dtpv_next
= old
->dtpv_next
;
7311 mutex_exit(&dtrace_lock
);
7312 mutex_exit(&mod_lock
);
7313 mutex_exit(&dtrace_provider_lock
);
7316 kmem_free(old
->dtpv_name
, strlen(old
->dtpv_name
) + 1);
7317 kmem_free(old
, sizeof (dtrace_provider_t
));
7323 * Invalidate the specified provider. All subsequent probe lookups for the
7324 * specified provider will fail, but its probes will not be removed.
7327 dtrace_invalidate(dtrace_provider_id_t id
)
7329 dtrace_provider_t
*pvp
= (dtrace_provider_t
*)id
;
7331 ASSERT(pvp
->dtpv_pops
.dtps_enable
!=
7332 (int (*)(void *, dtrace_id_t
, void *))dtrace_enable_nullop
);
7334 mutex_enter(&dtrace_provider_lock
);
7335 mutex_enter(&dtrace_lock
);
7337 pvp
->dtpv_defunct
= dtrace_gethrtime();
7339 mutex_exit(&dtrace_lock
);
7340 mutex_exit(&dtrace_provider_lock
);
7344 * Indicate whether or not DTrace has attached.
7347 dtrace_attached(void)
7350 * dtrace_provider will be non-NULL iff the DTrace driver has
7351 * attached. (It's non-NULL because DTrace is always itself a
7354 return (dtrace_provider
!= NULL
);
7358 * Remove all the unenabled probes for the given provider. This function is
7359 * not unlike dtrace_unregister(), except that it doesn't remove the provider
7360 * -- just as many of its associated probes as it can.
7363 dtrace_condense(dtrace_provider_id_t id
)
7365 dtrace_provider_t
*prov
= (dtrace_provider_t
*)id
;
7367 dtrace_probe_t
*probe
;
7370 * Make sure this isn't the dtrace provider itself.
7372 ASSERT(prov
->dtpv_pops
.dtps_enable
!=
7373 (int (*)(void *, dtrace_id_t
, void *))dtrace_enable_nullop
);
7375 mutex_enter(&dtrace_provider_lock
);
7376 mutex_enter(&dtrace_lock
);
7379 * Attempt to destroy the probes associated with this provider.
7381 for (i
= 0; i
< dtrace_nprobes
; i
++) {
7382 if ((probe
= dtrace_probes
[i
]) == NULL
)
7385 if (probe
->dtpr_provider
!= prov
)
7388 if (probe
->dtpr_ecb
!= NULL
)
7391 dtrace_probes
[i
] = NULL
;
7393 dtrace_hash_remove(dtrace_bymod
, probe
);
7394 dtrace_hash_remove(dtrace_byfunc
, probe
);
7395 dtrace_hash_remove(dtrace_byname
, probe
);
7397 prov
->dtpv_pops
.dtps_destroy(prov
->dtpv_arg
, i
+ 1,
7399 kmem_free(probe
->dtpr_mod
, strlen(probe
->dtpr_mod
) + 1);
7400 kmem_free(probe
->dtpr_func
, strlen(probe
->dtpr_func
) + 1);
7401 kmem_free(probe
->dtpr_name
, strlen(probe
->dtpr_name
) + 1);
7402 kmem_free(probe
, sizeof (dtrace_probe_t
));
7403 vmem_free(dtrace_arena
, (void *)((uintptr_t)i
+ 1), 1);
7406 mutex_exit(&dtrace_lock
);
7407 mutex_exit(&dtrace_provider_lock
);
7413 * DTrace Probe Management Functions
7415 * The functions in this section perform the DTrace probe management,
7416 * including functions to create probes, look-up probes, and call into the
7417 * providers to request that probes be provided. Some of these functions are
7418 * in the Provider-to-Framework API; these functions can be identified by the
7419 * fact that they are not declared "static".
7423 * Create a probe with the specified module name, function name, and name.
7426 dtrace_probe_create(dtrace_provider_id_t prov
, const char *mod
,
7427 const char *func
, const char *name
, int aframes
, void *arg
)
7429 dtrace_probe_t
*probe
, **probes
;
7430 dtrace_provider_t
*provider
= (dtrace_provider_t
*)prov
;
7433 if (provider
== dtrace_provider
) {
7434 ASSERT(MUTEX_HELD(&dtrace_lock
));
7436 mutex_enter(&dtrace_lock
);
7439 id
= (dtrace_id_t
)(uintptr_t)vmem_alloc(dtrace_arena
, 1,
7440 VM_BESTFIT
| VM_SLEEP
);
7441 probe
= kmem_zalloc(sizeof (dtrace_probe_t
), KM_SLEEP
);
7443 probe
->dtpr_id
= id
;
7444 probe
->dtpr_gen
= dtrace_probegen
++;
7445 probe
->dtpr_mod
= dtrace_strdup(mod
);
7446 probe
->dtpr_func
= dtrace_strdup(func
);
7447 probe
->dtpr_name
= dtrace_strdup(name
);
7448 probe
->dtpr_arg
= arg
;
7449 probe
->dtpr_aframes
= aframes
;
7450 probe
->dtpr_provider
= provider
;
7452 dtrace_hash_add(dtrace_bymod
, probe
);
7453 dtrace_hash_add(dtrace_byfunc
, probe
);
7454 dtrace_hash_add(dtrace_byname
, probe
);
7456 if (id
- 1 >= dtrace_nprobes
) {
7457 size_t osize
= dtrace_nprobes
* sizeof (dtrace_probe_t
*);
7458 size_t nsize
= osize
<< 1;
7462 ASSERT(dtrace_probes
== NULL
);
7463 nsize
= sizeof (dtrace_probe_t
*);
7466 probes
= kmem_zalloc(nsize
, KM_SLEEP
);
7468 if (dtrace_probes
== NULL
) {
7470 dtrace_probes
= probes
;
7473 dtrace_probe_t
**oprobes
= dtrace_probes
;
7475 bcopy(oprobes
, probes
, osize
);
7476 dtrace_membar_producer();
7477 dtrace_probes
= probes
;
7482 * All CPUs are now seeing the new probes array; we can
7483 * safely free the old array.
7485 kmem_free(oprobes
, osize
);
7486 dtrace_nprobes
<<= 1;
7489 ASSERT(id
- 1 < dtrace_nprobes
);
7492 ASSERT(dtrace_probes
[id
- 1] == NULL
);
7493 dtrace_probes
[id
- 1] = probe
;
7495 if (provider
!= dtrace_provider
)
7496 mutex_exit(&dtrace_lock
);
7501 static dtrace_probe_t
*
7502 dtrace_probe_lookup_id(dtrace_id_t id
)
7504 ASSERT(MUTEX_HELD(&dtrace_lock
));
7506 if (id
== 0 || id
> dtrace_nprobes
)
7509 return (dtrace_probes
[id
- 1]);
7513 dtrace_probe_lookup_match(dtrace_probe_t
*probe
, void *arg
)
7515 *((dtrace_id_t
*)arg
) = probe
->dtpr_id
;
7517 return (DTRACE_MATCH_DONE
);
7521 * Look up a probe based on provider and one or more of module name, function
7522 * name and probe name.
7525 dtrace_probe_lookup(dtrace_provider_id_t prid
, const char *mod
,
7526 const char *func
, const char *name
)
7528 dtrace_probekey_t pkey
;
7532 pkey
.dtpk_prov
= ((dtrace_provider_t
*)prid
)->dtpv_name
;
7533 pkey
.dtpk_pmatch
= &dtrace_match_string
;
7534 pkey
.dtpk_mod
= mod
;
7535 pkey
.dtpk_mmatch
= mod
? &dtrace_match_string
: &dtrace_match_nul
;
7536 pkey
.dtpk_func
= func
;
7537 pkey
.dtpk_fmatch
= func
? &dtrace_match_string
: &dtrace_match_nul
;
7538 pkey
.dtpk_name
= name
;
7539 pkey
.dtpk_nmatch
= name
? &dtrace_match_string
: &dtrace_match_nul
;
7540 pkey
.dtpk_id
= DTRACE_IDNONE
;
7542 mutex_enter(&dtrace_lock
);
7543 match
= dtrace_match(&pkey
, DTRACE_PRIV_ALL
, 0, 0,
7544 dtrace_probe_lookup_match
, &id
);
7545 mutex_exit(&dtrace_lock
);
7547 ASSERT(match
== 1 || match
== 0);
7548 return (match
? id
: 0);
7552 * Returns the probe argument associated with the specified probe.
7555 dtrace_probe_arg(dtrace_provider_id_t id
, dtrace_id_t pid
)
7557 dtrace_probe_t
*probe
;
7560 mutex_enter(&dtrace_lock
);
7562 if ((probe
= dtrace_probe_lookup_id(pid
)) != NULL
&&
7563 probe
->dtpr_provider
== (dtrace_provider_t
*)id
)
7564 rval
= probe
->dtpr_arg
;
7566 mutex_exit(&dtrace_lock
);
7572 * Copy a probe into a probe description.
7575 dtrace_probe_description(const dtrace_probe_t
*prp
, dtrace_probedesc_t
*pdp
)
7577 bzero(pdp
, sizeof (dtrace_probedesc_t
));
7578 pdp
->dtpd_id
= prp
->dtpr_id
;
7580 (void) strncpy(pdp
->dtpd_provider
,
7581 prp
->dtpr_provider
->dtpv_name
, DTRACE_PROVNAMELEN
- 1);
7583 (void) strncpy(pdp
->dtpd_mod
, prp
->dtpr_mod
, DTRACE_MODNAMELEN
- 1);
7584 (void) strncpy(pdp
->dtpd_func
, prp
->dtpr_func
, DTRACE_FUNCNAMELEN
- 1);
7585 (void) strncpy(pdp
->dtpd_name
, prp
->dtpr_name
, DTRACE_NAMELEN
- 1);
7589 * Called to indicate that a probe -- or probes -- should be provided by a
7590 * specfied provider. If the specified description is NULL, the provider will
7591 * be told to provide all of its probes. (This is done whenever a new
7592 * consumer comes along, or whenever a retained enabling is to be matched.) If
7593 * the specified description is non-NULL, the provider is given the
7594 * opportunity to dynamically provide the specified probe, allowing providers
7595 * to support the creation of probes on-the-fly. (So-called _autocreated_
7596 * probes.) If the provider is NULL, the operations will be applied to all
7597 * providers; if the provider is non-NULL the operations will only be applied
7598 * to the specified provider. The dtrace_provider_lock must be held, and the
7599 * dtrace_lock must _not_ be held -- the provider's dtps_provide() operation
7600 * will need to grab the dtrace_lock when it reenters the framework through
7601 * dtrace_probe_lookup(), dtrace_probe_create(), etc.
7604 dtrace_probe_provide(dtrace_probedesc_t
*desc
, dtrace_provider_t
*prv
)
7609 ASSERT(MUTEX_HELD(&dtrace_provider_lock
));
7613 prv
= dtrace_provider
;
7618 * First, call the blanket provide operation.
7620 prv
->dtpv_pops
.dtps_provide(prv
->dtpv_arg
, desc
);
7623 * Now call the per-module provide operation. We will grab
7624 * mod_lock to prevent the list from being modified. Note
7625 * that this also prevents the mod_busy bits from changing.
7626 * (mod_busy can only be changed with mod_lock held.)
7628 mutex_enter(&mod_lock
);
7632 if (ctl
->mod_busy
|| ctl
->mod_mp
== NULL
)
7635 prv
->dtpv_pops
.dtps_provide_module(prv
->dtpv_arg
, ctl
);
7637 } while ((ctl
= ctl
->mod_next
) != &modules
);
7639 mutex_exit(&mod_lock
);
7640 } while (all
&& (prv
= prv
->dtpv_next
) != NULL
);
7644 * Iterate over each probe, and call the Framework-to-Provider API function
7648 dtrace_probe_foreach(uintptr_t offs
)
7650 dtrace_provider_t
*prov
;
7651 void (*func
)(void *, dtrace_id_t
, void *);
7652 dtrace_probe_t
*probe
;
7653 dtrace_icookie_t cookie
;
7657 * We disable interrupts to walk through the probe array. This is
7658 * safe -- the dtrace_sync() in dtrace_unregister() assures that we
7659 * won't see stale data.
7661 cookie
= dtrace_interrupt_disable();
7663 for (i
= 0; i
< dtrace_nprobes
; i
++) {
7664 if ((probe
= dtrace_probes
[i
]) == NULL
)
7667 if (probe
->dtpr_ecb
== NULL
) {
7669 * This probe isn't enabled -- don't call the function.
7674 prov
= probe
->dtpr_provider
;
7675 func
= *((void(**)(void *, dtrace_id_t
, void *))
7676 ((uintptr_t)&prov
->dtpv_pops
+ offs
));
7678 func(prov
->dtpv_arg
, i
+ 1, probe
->dtpr_arg
);
7681 dtrace_interrupt_enable(cookie
);
7685 dtrace_probe_enable(const dtrace_probedesc_t
*desc
, dtrace_enabling_t
*enab
)
7687 dtrace_probekey_t pkey
;
7692 ASSERT(MUTEX_HELD(&dtrace_lock
));
7693 dtrace_ecb_create_cache
= NULL
;
7697 * If we're passed a NULL description, we're being asked to
7698 * create an ECB with a NULL probe.
7700 (void) dtrace_ecb_create_enable(NULL
, enab
);
7704 dtrace_probekey(desc
, &pkey
);
7705 dtrace_cred2priv(enab
->dten_vstate
->dtvs_state
->dts_cred
.dcr_cred
,
7706 &priv
, &uid
, &zoneid
);
7708 return (dtrace_match(&pkey
, priv
, uid
, zoneid
, dtrace_ecb_create_enable
,
7713 * DTrace Helper Provider Functions
7716 dtrace_dofattr2attr(dtrace_attribute_t
*attr
, const dof_attr_t dofattr
)
7718 attr
->dtat_name
= DOF_ATTR_NAME(dofattr
);
7719 attr
->dtat_data
= DOF_ATTR_DATA(dofattr
);
7720 attr
->dtat_class
= DOF_ATTR_CLASS(dofattr
);
7724 dtrace_dofprov2hprov(dtrace_helper_provdesc_t
*hprov
,
7725 const dof_provider_t
*dofprov
, char *strtab
)
7727 hprov
->dthpv_provname
= strtab
+ dofprov
->dofpv_name
;
7728 dtrace_dofattr2attr(&hprov
->dthpv_pattr
.dtpa_provider
,
7729 dofprov
->dofpv_provattr
);
7730 dtrace_dofattr2attr(&hprov
->dthpv_pattr
.dtpa_mod
,
7731 dofprov
->dofpv_modattr
);
7732 dtrace_dofattr2attr(&hprov
->dthpv_pattr
.dtpa_func
,
7733 dofprov
->dofpv_funcattr
);
7734 dtrace_dofattr2attr(&hprov
->dthpv_pattr
.dtpa_name
,
7735 dofprov
->dofpv_nameattr
);
7736 dtrace_dofattr2attr(&hprov
->dthpv_pattr
.dtpa_args
,
7737 dofprov
->dofpv_argsattr
);
7741 dtrace_helper_provide_one(dof_helper_t
*dhp
, dof_sec_t
*sec
, pid_t pid
)
7743 uintptr_t daddr
= (uintptr_t)dhp
->dofhp_dof
;
7744 dof_hdr_t
*dof
= (dof_hdr_t
*)daddr
;
7745 dof_sec_t
*str_sec
, *prb_sec
, *arg_sec
, *off_sec
, *enoff_sec
;
7746 dof_provider_t
*provider
;
7748 uint32_t *off
, *enoff
;
7752 dtrace_helper_provdesc_t dhpv
;
7753 dtrace_helper_probedesc_t dhpb
;
7754 dtrace_meta_t
*meta
= dtrace_meta_pid
;
7755 dtrace_mops_t
*mops
= &meta
->dtm_mops
;
7758 provider
= (dof_provider_t
*)(uintptr_t)(daddr
+ sec
->dofs_offset
);
7759 str_sec
= (dof_sec_t
*)(uintptr_t)(daddr
+ dof
->dofh_secoff
+
7760 provider
->dofpv_strtab
* dof
->dofh_secsize
);
7761 prb_sec
= (dof_sec_t
*)(uintptr_t)(daddr
+ dof
->dofh_secoff
+
7762 provider
->dofpv_probes
* dof
->dofh_secsize
);
7763 arg_sec
= (dof_sec_t
*)(uintptr_t)(daddr
+ dof
->dofh_secoff
+
7764 provider
->dofpv_prargs
* dof
->dofh_secsize
);
7765 off_sec
= (dof_sec_t
*)(uintptr_t)(daddr
+ dof
->dofh_secoff
+
7766 provider
->dofpv_proffs
* dof
->dofh_secsize
);
7768 strtab
= (char *)(uintptr_t)(daddr
+ str_sec
->dofs_offset
);
7769 off
= (uint32_t *)(uintptr_t)(daddr
+ off_sec
->dofs_offset
);
7770 arg
= (uint8_t *)(uintptr_t)(daddr
+ arg_sec
->dofs_offset
);
7774 * See dtrace_helper_provider_validate().
7776 if (dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_1
&&
7777 provider
->dofpv_prenoffs
!= DOF_SECT_NONE
) {
7778 enoff_sec
= (dof_sec_t
*)(uintptr_t)(daddr
+ dof
->dofh_secoff
+
7779 provider
->dofpv_prenoffs
* dof
->dofh_secsize
);
7780 enoff
= (uint32_t *)(uintptr_t)(daddr
+ enoff_sec
->dofs_offset
);
7783 nprobes
= prb_sec
->dofs_size
/ prb_sec
->dofs_entsize
;
7786 * Create the provider.
7788 dtrace_dofprov2hprov(&dhpv
, provider
, strtab
);
7790 if ((parg
= mops
->dtms_provide_pid(meta
->dtm_arg
, &dhpv
, pid
)) == NULL
)
7796 * Create the probes.
7798 for (i
= 0; i
< nprobes
; i
++) {
7799 probe
= (dof_probe_t
*)(uintptr_t)(daddr
+
7800 prb_sec
->dofs_offset
+ i
* prb_sec
->dofs_entsize
);
7802 dhpb
.dthpb_mod
= dhp
->dofhp_mod
;
7803 dhpb
.dthpb_func
= strtab
+ probe
->dofpr_func
;
7804 dhpb
.dthpb_name
= strtab
+ probe
->dofpr_name
;
7805 dhpb
.dthpb_base
= probe
->dofpr_addr
;
7806 dhpb
.dthpb_offs
= off
+ probe
->dofpr_offidx
;
7807 dhpb
.dthpb_noffs
= probe
->dofpr_noffs
;
7808 if (enoff
!= NULL
) {
7809 dhpb
.dthpb_enoffs
= enoff
+ probe
->dofpr_enoffidx
;
7810 dhpb
.dthpb_nenoffs
= probe
->dofpr_nenoffs
;
7812 dhpb
.dthpb_enoffs
= NULL
;
7813 dhpb
.dthpb_nenoffs
= 0;
7815 dhpb
.dthpb_args
= arg
+ probe
->dofpr_argidx
;
7816 dhpb
.dthpb_nargc
= probe
->dofpr_nargc
;
7817 dhpb
.dthpb_xargc
= probe
->dofpr_xargc
;
7818 dhpb
.dthpb_ntypes
= strtab
+ probe
->dofpr_nargv
;
7819 dhpb
.dthpb_xtypes
= strtab
+ probe
->dofpr_xargv
;
7821 mops
->dtms_create_probe(meta
->dtm_arg
, parg
, &dhpb
);
7826 dtrace_helper_provide(dof_helper_t
*dhp
, pid_t pid
)
7828 uintptr_t daddr
= (uintptr_t)dhp
->dofhp_dof
;
7829 dof_hdr_t
*dof
= (dof_hdr_t
*)daddr
;
7832 ASSERT(MUTEX_HELD(&dtrace_meta_lock
));
7834 for (i
= 0; i
< dof
->dofh_secnum
; i
++) {
7835 dof_sec_t
*sec
= (dof_sec_t
*)(uintptr_t)(daddr
+
7836 dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
7838 if (sec
->dofs_type
!= DOF_SECT_PROVIDER
)
7841 dtrace_helper_provide_one(dhp
, sec
, pid
);
7845 * We may have just created probes, so we must now rematch against
7846 * any retained enablings. Note that this call will acquire both
7847 * cpu_lock and dtrace_lock; the fact that we are holding
7848 * dtrace_meta_lock now is what defines the ordering with respect to
7849 * these three locks.
7851 dtrace_enabling_matchall();
7855 dtrace_helper_provider_remove_one(dof_helper_t
*dhp
, dof_sec_t
*sec
, pid_t pid
)
7857 uintptr_t daddr
= (uintptr_t)dhp
->dofhp_dof
;
7858 dof_hdr_t
*dof
= (dof_hdr_t
*)daddr
;
7860 dof_provider_t
*provider
;
7862 dtrace_helper_provdesc_t dhpv
;
7863 dtrace_meta_t
*meta
= dtrace_meta_pid
;
7864 dtrace_mops_t
*mops
= &meta
->dtm_mops
;
7866 provider
= (dof_provider_t
*)(uintptr_t)(daddr
+ sec
->dofs_offset
);
7867 str_sec
= (dof_sec_t
*)(uintptr_t)(daddr
+ dof
->dofh_secoff
+
7868 provider
->dofpv_strtab
* dof
->dofh_secsize
);
7870 strtab
= (char *)(uintptr_t)(daddr
+ str_sec
->dofs_offset
);
7873 * Create the provider.
7875 dtrace_dofprov2hprov(&dhpv
, provider
, strtab
);
7877 mops
->dtms_remove_pid(meta
->dtm_arg
, &dhpv
, pid
);
7883 dtrace_helper_provider_remove(dof_helper_t
*dhp
, pid_t pid
)
7885 uintptr_t daddr
= (uintptr_t)dhp
->dofhp_dof
;
7886 dof_hdr_t
*dof
= (dof_hdr_t
*)daddr
;
7889 ASSERT(MUTEX_HELD(&dtrace_meta_lock
));
7891 for (i
= 0; i
< dof
->dofh_secnum
; i
++) {
7892 dof_sec_t
*sec
= (dof_sec_t
*)(uintptr_t)(daddr
+
7893 dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
7895 if (sec
->dofs_type
!= DOF_SECT_PROVIDER
)
7898 dtrace_helper_provider_remove_one(dhp
, sec
, pid
);
7903 * DTrace Meta Provider-to-Framework API Functions
7905 * These functions implement the Meta Provider-to-Framework API, as described
7906 * in <sys/dtrace.h>.
7909 dtrace_meta_register(const char *name
, const dtrace_mops_t
*mops
, void *arg
,
7910 dtrace_meta_provider_id_t
*idp
)
7912 dtrace_meta_t
*meta
;
7913 dtrace_helpers_t
*help
, *next
;
7916 *idp
= DTRACE_METAPROVNONE
;
7919 * We strictly don't need the name, but we hold onto it for
7920 * debuggability. All hail error queues!
7923 cmn_err(CE_WARN
, "failed to register meta-provider: "
7929 mops
->dtms_create_probe
== NULL
||
7930 mops
->dtms_provide_pid
== NULL
||
7931 mops
->dtms_remove_pid
== NULL
) {
7932 cmn_err(CE_WARN
, "failed to register meta-register %s: "
7933 "invalid ops", name
);
7937 meta
= kmem_zalloc(sizeof (dtrace_meta_t
), KM_SLEEP
);
7938 meta
->dtm_mops
= *mops
;
7939 meta
->dtm_name
= kmem_alloc(strlen(name
) + 1, KM_SLEEP
);
7940 (void) strcpy(meta
->dtm_name
, name
);
7941 meta
->dtm_arg
= arg
;
7943 mutex_enter(&dtrace_meta_lock
);
7944 mutex_enter(&dtrace_lock
);
7946 if (dtrace_meta_pid
!= NULL
) {
7947 mutex_exit(&dtrace_lock
);
7948 mutex_exit(&dtrace_meta_lock
);
7949 cmn_err(CE_WARN
, "failed to register meta-register %s: "
7950 "user-land meta-provider exists", name
);
7951 kmem_free(meta
->dtm_name
, strlen(meta
->dtm_name
) + 1);
7952 kmem_free(meta
, sizeof (dtrace_meta_t
));
7956 dtrace_meta_pid
= meta
;
7957 *idp
= (dtrace_meta_provider_id_t
)meta
;
7960 * If there are providers and probes ready to go, pass them
7961 * off to the new meta provider now.
7964 help
= dtrace_deferred_pid
;
7965 dtrace_deferred_pid
= NULL
;
7967 mutex_exit(&dtrace_lock
);
7969 while (help
!= NULL
) {
7970 for (i
= 0; i
< help
->dthps_nprovs
; i
++) {
7971 dtrace_helper_provide(&help
->dthps_provs
[i
]->dthp_prov
,
7975 next
= help
->dthps_next
;
7976 help
->dthps_next
= NULL
;
7977 help
->dthps_prev
= NULL
;
7978 help
->dthps_deferred
= 0;
7982 mutex_exit(&dtrace_meta_lock
);
7988 dtrace_meta_unregister(dtrace_meta_provider_id_t id
)
7990 dtrace_meta_t
**pp
, *old
= (dtrace_meta_t
*)id
;
7992 mutex_enter(&dtrace_meta_lock
);
7993 mutex_enter(&dtrace_lock
);
7995 if (old
== dtrace_meta_pid
) {
7996 pp
= &dtrace_meta_pid
;
7998 panic("attempt to unregister non-existent "
7999 "dtrace meta-provider %p\n", (void *)old
);
8002 if (old
->dtm_count
!= 0) {
8003 mutex_exit(&dtrace_lock
);
8004 mutex_exit(&dtrace_meta_lock
);
8010 mutex_exit(&dtrace_lock
);
8011 mutex_exit(&dtrace_meta_lock
);
8013 kmem_free(old
->dtm_name
, strlen(old
->dtm_name
) + 1);
8014 kmem_free(old
, sizeof (dtrace_meta_t
));
8021 * DTrace DIF Object Functions
8024 dtrace_difo_err(uint_t pc
, const char *format
, ...)
8026 if (dtrace_err_verbose
) {
8029 (void) uprintf("dtrace DIF object error: [%u]: ", pc
);
8030 va_start(alist
, format
);
8031 (void) vuprintf(format
, alist
);
8035 #ifdef DTRACE_ERRDEBUG
8036 dtrace_errdebug(format
);
8042 * Validate a DTrace DIF object by checking the IR instructions. The following
8043 * rules are currently enforced by dtrace_difo_validate():
8045 * 1. Each instruction must have a valid opcode
8046 * 2. Each register, string, variable, or subroutine reference must be valid
8047 * 3. No instruction can modify register %r0 (must be zero)
8048 * 4. All instruction reserved bits must be set to zero
8049 * 5. The last instruction must be a "ret" instruction
8050 * 6. All branch targets must reference a valid instruction _after_ the branch
8053 dtrace_difo_validate(dtrace_difo_t
*dp
, dtrace_vstate_t
*vstate
, uint_t nregs
,
8057 int (*efunc
)(uint_t pc
, const char *, ...) = dtrace_difo_err
;
8061 kcheckload
= cr
== NULL
||
8062 (vstate
->dtvs_state
->dts_cred
.dcr_visible
& DTRACE_CRV_KERNEL
) == 0;
8064 dp
->dtdo_destructive
= 0;
8066 for (pc
= 0; pc
< dp
->dtdo_len
&& err
== 0; pc
++) {
8067 dif_instr_t instr
= dp
->dtdo_buf
[pc
];
8069 uint_t r1
= DIF_INSTR_R1(instr
);
8070 uint_t r2
= DIF_INSTR_R2(instr
);
8071 uint_t rd
= DIF_INSTR_RD(instr
);
8072 uint_t rs
= DIF_INSTR_RS(instr
);
8073 uint_t label
= DIF_INSTR_LABEL(instr
);
8074 uint_t v
= DIF_INSTR_VAR(instr
);
8075 uint_t subr
= DIF_INSTR_SUBR(instr
);
8076 uint_t type
= DIF_INSTR_TYPE(instr
);
8077 uint_t op
= DIF_INSTR_OP(instr
);
8095 err
+= efunc(pc
, "invalid register %u\n", r1
);
8097 err
+= efunc(pc
, "invalid register %u\n", r2
);
8099 err
+= efunc(pc
, "invalid register %u\n", rd
);
8101 err
+= efunc(pc
, "cannot write to %r0\n");
8107 err
+= efunc(pc
, "invalid register %u\n", r1
);
8109 err
+= efunc(pc
, "non-zero reserved bits\n");
8111 err
+= efunc(pc
, "invalid register %u\n", rd
);
8113 err
+= efunc(pc
, "cannot write to %r0\n");
8123 err
+= efunc(pc
, "invalid register %u\n", r1
);
8125 err
+= efunc(pc
, "non-zero reserved bits\n");
8127 err
+= efunc(pc
, "invalid register %u\n", rd
);
8129 err
+= efunc(pc
, "cannot write to %r0\n");
8131 dp
->dtdo_buf
[pc
] = DIF_INSTR_LOAD(op
+
8132 DIF_OP_RLDSB
- DIF_OP_LDSB
, r1
, rd
);
8142 err
+= efunc(pc
, "invalid register %u\n", r1
);
8144 err
+= efunc(pc
, "non-zero reserved bits\n");
8146 err
+= efunc(pc
, "invalid register %u\n", rd
);
8148 err
+= efunc(pc
, "cannot write to %r0\n");
8158 err
+= efunc(pc
, "invalid register %u\n", r1
);
8160 err
+= efunc(pc
, "non-zero reserved bits\n");
8162 err
+= efunc(pc
, "invalid register %u\n", rd
);
8164 err
+= efunc(pc
, "cannot write to %r0\n");
8171 err
+= efunc(pc
, "invalid register %u\n", r1
);
8173 err
+= efunc(pc
, "non-zero reserved bits\n");
8175 err
+= efunc(pc
, "invalid register %u\n", rd
);
8177 err
+= efunc(pc
, "cannot write to 0 address\n");
8182 err
+= efunc(pc
, "invalid register %u\n", r1
);
8184 err
+= efunc(pc
, "invalid register %u\n", r2
);
8186 err
+= efunc(pc
, "non-zero reserved bits\n");
8190 err
+= efunc(pc
, "invalid register %u\n", r1
);
8191 if (r2
!= 0 || rd
!= 0)
8192 err
+= efunc(pc
, "non-zero reserved bits\n");
8205 if (label
>= dp
->dtdo_len
) {
8206 err
+= efunc(pc
, "invalid branch target %u\n",
8210 err
+= efunc(pc
, "backward branch to %u\n",
8215 if (r1
!= 0 || r2
!= 0)
8216 err
+= efunc(pc
, "non-zero reserved bits\n");
8218 err
+= efunc(pc
, "invalid register %u\n", rd
);
8222 case DIF_OP_FLUSHTS
:
8223 if (r1
!= 0 || r2
!= 0 || rd
!= 0)
8224 err
+= efunc(pc
, "non-zero reserved bits\n");
8227 if (DIF_INSTR_INTEGER(instr
) >= dp
->dtdo_intlen
) {
8228 err
+= efunc(pc
, "invalid integer ref %u\n",
8229 DIF_INSTR_INTEGER(instr
));
8232 err
+= efunc(pc
, "invalid register %u\n", rd
);
8234 err
+= efunc(pc
, "cannot write to %r0\n");
8237 if (DIF_INSTR_STRING(instr
) >= dp
->dtdo_strlen
) {
8238 err
+= efunc(pc
, "invalid string ref %u\n",
8239 DIF_INSTR_STRING(instr
));
8242 err
+= efunc(pc
, "invalid register %u\n", rd
);
8244 err
+= efunc(pc
, "cannot write to %r0\n");
8248 if (r1
> DIF_VAR_ARRAY_MAX
)
8249 err
+= efunc(pc
, "invalid array %u\n", r1
);
8251 err
+= efunc(pc
, "invalid register %u\n", r2
);
8253 err
+= efunc(pc
, "invalid register %u\n", rd
);
8255 err
+= efunc(pc
, "cannot write to %r0\n");
8262 if (v
< DIF_VAR_OTHER_MIN
|| v
> DIF_VAR_OTHER_MAX
)
8263 err
+= efunc(pc
, "invalid variable %u\n", v
);
8265 err
+= efunc(pc
, "invalid register %u\n", rd
);
8267 err
+= efunc(pc
, "cannot write to %r0\n");
8274 if (v
< DIF_VAR_OTHER_UBASE
|| v
> DIF_VAR_OTHER_MAX
)
8275 err
+= efunc(pc
, "invalid variable %u\n", v
);
8277 err
+= efunc(pc
, "invalid register %u\n", rd
);
8280 if (subr
> DIF_SUBR_MAX
)
8281 err
+= efunc(pc
, "invalid subr %u\n", subr
);
8283 err
+= efunc(pc
, "invalid register %u\n", rd
);
8285 err
+= efunc(pc
, "cannot write to %r0\n");
8287 if (subr
== DIF_SUBR_COPYOUT
||
8288 subr
== DIF_SUBR_COPYOUTSTR
) {
8289 dp
->dtdo_destructive
= 1;
8293 if (type
!= DIF_TYPE_STRING
&& type
!= DIF_TYPE_CTF
)
8294 err
+= efunc(pc
, "invalid ref type %u\n", type
);
8296 err
+= efunc(pc
, "invalid register %u\n", r2
);
8298 err
+= efunc(pc
, "invalid register %u\n", rs
);
8301 if (type
!= DIF_TYPE_CTF
)
8302 err
+= efunc(pc
, "invalid val type %u\n", type
);
8304 err
+= efunc(pc
, "invalid register %u\n", r2
);
8306 err
+= efunc(pc
, "invalid register %u\n", rs
);
8309 err
+= efunc(pc
, "invalid opcode %u\n",
8310 DIF_INSTR_OP(instr
));
8314 if (dp
->dtdo_len
!= 0 &&
8315 DIF_INSTR_OP(dp
->dtdo_buf
[dp
->dtdo_len
- 1]) != DIF_OP_RET
) {
8316 err
+= efunc(dp
->dtdo_len
- 1,
8317 "expected 'ret' as last DIF instruction\n");
8320 if (!(dp
->dtdo_rtype
.dtdt_flags
& DIF_TF_BYREF
)) {
8322 * If we're not returning by reference, the size must be either
8323 * 0 or the size of one of the base types.
8325 switch (dp
->dtdo_rtype
.dtdt_size
) {
8327 case sizeof (uint8_t):
8328 case sizeof (uint16_t):
8329 case sizeof (uint32_t):
8330 case sizeof (uint64_t):
8334 err
+= efunc(dp
->dtdo_len
- 1, "bad return size\n");
8338 for (i
= 0; i
< dp
->dtdo_varlen
&& err
== 0; i
++) {
8339 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
], *existing
= NULL
;
8340 dtrace_diftype_t
*vt
, *et
;
8343 if (v
->dtdv_scope
!= DIFV_SCOPE_GLOBAL
&&
8344 v
->dtdv_scope
!= DIFV_SCOPE_THREAD
&&
8345 v
->dtdv_scope
!= DIFV_SCOPE_LOCAL
) {
8346 err
+= efunc(i
, "unrecognized variable scope %d\n",
8351 if (v
->dtdv_kind
!= DIFV_KIND_ARRAY
&&
8352 v
->dtdv_kind
!= DIFV_KIND_SCALAR
) {
8353 err
+= efunc(i
, "unrecognized variable type %d\n",
8358 if ((id
= v
->dtdv_id
) > DIF_VARIABLE_MAX
) {
8359 err
+= efunc(i
, "%d exceeds variable id limit\n", id
);
8363 if (id
< DIF_VAR_OTHER_UBASE
)
8367 * For user-defined variables, we need to check that this
8368 * definition is identical to any previous definition that we
8371 ndx
= id
- DIF_VAR_OTHER_UBASE
;
8373 switch (v
->dtdv_scope
) {
8374 case DIFV_SCOPE_GLOBAL
:
8375 if (ndx
< vstate
->dtvs_nglobals
) {
8376 dtrace_statvar_t
*svar
;
8378 if ((svar
= vstate
->dtvs_globals
[ndx
]) != NULL
)
8379 existing
= &svar
->dtsv_var
;
8384 case DIFV_SCOPE_THREAD
:
8385 if (ndx
< vstate
->dtvs_ntlocals
)
8386 existing
= &vstate
->dtvs_tlocals
[ndx
];
8389 case DIFV_SCOPE_LOCAL
:
8390 if (ndx
< vstate
->dtvs_nlocals
) {
8391 dtrace_statvar_t
*svar
;
8393 if ((svar
= vstate
->dtvs_locals
[ndx
]) != NULL
)
8394 existing
= &svar
->dtsv_var
;
8402 if (vt
->dtdt_flags
& DIF_TF_BYREF
) {
8403 if (vt
->dtdt_size
== 0) {
8404 err
+= efunc(i
, "zero-sized variable\n");
8408 if (v
->dtdv_scope
== DIFV_SCOPE_GLOBAL
&&
8409 vt
->dtdt_size
> dtrace_global_maxsize
) {
8410 err
+= efunc(i
, "oversized by-ref global\n");
8415 if (existing
== NULL
|| existing
->dtdv_id
== 0)
8418 ASSERT(existing
->dtdv_id
== v
->dtdv_id
);
8419 ASSERT(existing
->dtdv_scope
== v
->dtdv_scope
);
8421 if (existing
->dtdv_kind
!= v
->dtdv_kind
)
8422 err
+= efunc(i
, "%d changed variable kind\n", id
);
8424 et
= &existing
->dtdv_type
;
8426 if (vt
->dtdt_flags
!= et
->dtdt_flags
) {
8427 err
+= efunc(i
, "%d changed variable type flags\n", id
);
8431 if (vt
->dtdt_size
!= 0 && vt
->dtdt_size
!= et
->dtdt_size
) {
8432 err
+= efunc(i
, "%d changed variable type size\n", id
);
8441 * Validate a DTrace DIF object that it is to be used as a helper. Helpers
8442 * are much more constrained than normal DIFOs. Specifically, they may
8445 * 1. Make calls to subroutines other than copyin(), copyinstr() or
8446 * miscellaneous string routines
8447 * 2. Access DTrace variables other than the args[] array, and the
8448 * curthread, pid, ppid, tid, execname, zonename, uid and gid variables.
8449 * 3. Have thread-local variables.
8450 * 4. Have dynamic variables.
8453 dtrace_difo_validate_helper(dtrace_difo_t
*dp
)
8455 int (*efunc
)(uint_t pc
, const char *, ...) = dtrace_difo_err
;
8459 for (pc
= 0; pc
< dp
->dtdo_len
; pc
++) {
8460 dif_instr_t instr
= dp
->dtdo_buf
[pc
];
8462 uint_t v
= DIF_INSTR_VAR(instr
);
8463 uint_t subr
= DIF_INSTR_SUBR(instr
);
8464 uint_t op
= DIF_INSTR_OP(instr
);
8519 case DIF_OP_FLUSHTS
:
8531 if (v
>= DIF_VAR_OTHER_UBASE
)
8534 if (v
>= DIF_VAR_ARG0
&& v
<= DIF_VAR_ARG9
)
8537 if (v
== DIF_VAR_CURTHREAD
|| v
== DIF_VAR_PID
||
8538 v
== DIF_VAR_PPID
|| v
== DIF_VAR_TID
||
8539 v
== DIF_VAR_EXECNAME
|| v
== DIF_VAR_ZONENAME
||
8540 v
== DIF_VAR_UID
|| v
== DIF_VAR_GID
)
8543 err
+= efunc(pc
, "illegal variable %u\n", v
);
8550 err
+= efunc(pc
, "illegal dynamic variable load\n");
8556 err
+= efunc(pc
, "illegal dynamic variable store\n");
8560 if (subr
== DIF_SUBR_ALLOCA
||
8561 subr
== DIF_SUBR_BCOPY
||
8562 subr
== DIF_SUBR_COPYIN
||
8563 subr
== DIF_SUBR_COPYINTO
||
8564 subr
== DIF_SUBR_COPYINSTR
||
8565 subr
== DIF_SUBR_INDEX
||
8566 subr
== DIF_SUBR_INET_NTOA
||
8567 subr
== DIF_SUBR_INET_NTOA6
||
8568 subr
== DIF_SUBR_INET_NTOP
||
8569 subr
== DIF_SUBR_LLTOSTR
||
8570 subr
== DIF_SUBR_RINDEX
||
8571 subr
== DIF_SUBR_STRCHR
||
8572 subr
== DIF_SUBR_STRJOIN
||
8573 subr
== DIF_SUBR_STRRCHR
||
8574 subr
== DIF_SUBR_STRSTR
||
8575 subr
== DIF_SUBR_HTONS
||
8576 subr
== DIF_SUBR_HTONL
||
8577 subr
== DIF_SUBR_HTONLL
||
8578 subr
== DIF_SUBR_NTOHS
||
8579 subr
== DIF_SUBR_NTOHL
||
8580 subr
== DIF_SUBR_NTOHLL
)
8583 err
+= efunc(pc
, "invalid subr %u\n", subr
);
8587 err
+= efunc(pc
, "invalid opcode %u\n",
8588 DIF_INSTR_OP(instr
));
8596 * Returns 1 if the expression in the DIF object can be cached on a per-thread
8600 dtrace_difo_cacheable(dtrace_difo_t
*dp
)
8607 for (i
= 0; i
< dp
->dtdo_varlen
; i
++) {
8608 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
];
8610 if (v
->dtdv_scope
!= DIFV_SCOPE_GLOBAL
)
8613 switch (v
->dtdv_id
) {
8614 case DIF_VAR_CURTHREAD
:
8617 case DIF_VAR_EXECNAME
:
8618 case DIF_VAR_ZONENAME
:
8627 * This DIF object may be cacheable. Now we need to look for any
8628 * array loading instructions, any memory loading instructions, or
8629 * any stores to thread-local variables.
8631 for (i
= 0; i
< dp
->dtdo_len
; i
++) {
8632 uint_t op
= DIF_INSTR_OP(dp
->dtdo_buf
[i
]);
8634 if ((op
>= DIF_OP_LDSB
&& op
<= DIF_OP_LDX
) ||
8635 (op
>= DIF_OP_ULDSB
&& op
<= DIF_OP_ULDX
) ||
8636 (op
>= DIF_OP_RLDSB
&& op
<= DIF_OP_RLDX
) ||
8637 op
== DIF_OP_LDGA
|| op
== DIF_OP_STTS
)
8645 dtrace_difo_hold(dtrace_difo_t
*dp
)
8649 ASSERT(MUTEX_HELD(&dtrace_lock
));
8652 ASSERT(dp
->dtdo_refcnt
!= 0);
8655 * We need to check this DIF object for references to the variable
8656 * DIF_VAR_VTIMESTAMP.
8658 for (i
= 0; i
< dp
->dtdo_varlen
; i
++) {
8659 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
];
8661 if (v
->dtdv_id
!= DIF_VAR_VTIMESTAMP
)
8664 if (dtrace_vtime_references
++ == 0)
8665 dtrace_vtime_enable();
8670 * This routine calculates the dynamic variable chunksize for a given DIF
8671 * object. The calculation is not fool-proof, and can probably be tricked by
8672 * malicious DIF -- but it works for all compiler-generated DIF. Because this
8673 * calculation is likely imperfect, dtrace_dynvar() is able to gracefully fail
8674 * if a dynamic variable size exceeds the chunksize.
8677 dtrace_difo_chunksize(dtrace_difo_t
*dp
, dtrace_vstate_t
*vstate
)
8680 dtrace_key_t tupregs
[DIF_DTR_NREGS
+ 2]; /* +2 for thread and id */
8681 const dif_instr_t
*text
= dp
->dtdo_buf
;
8687 for (pc
= 0; pc
< dp
->dtdo_len
; pc
++) {
8688 dif_instr_t instr
= text
[pc
];
8689 uint_t op
= DIF_INSTR_OP(instr
);
8690 uint_t rd
= DIF_INSTR_RD(instr
);
8691 uint_t r1
= DIF_INSTR_R1(instr
);
8695 dtrace_key_t
*key
= tupregs
;
8699 sval
= dp
->dtdo_inttab
[DIF_INSTR_INTEGER(instr
)];
8704 key
= &tupregs
[DIF_DTR_NREGS
];
8705 key
[0].dttk_size
= 0;
8706 key
[1].dttk_size
= 0;
8708 scope
= DIFV_SCOPE_THREAD
;
8715 if (DIF_INSTR_OP(instr
) == DIF_OP_STTAA
)
8716 key
[nkeys
++].dttk_size
= 0;
8718 key
[nkeys
++].dttk_size
= 0;
8720 if (op
== DIF_OP_STTAA
) {
8721 scope
= DIFV_SCOPE_THREAD
;
8723 scope
= DIFV_SCOPE_GLOBAL
;
8729 if (ttop
== DIF_DTR_NREGS
)
8732 if ((srd
== 0 || sval
== 0) && r1
== DIF_TYPE_STRING
) {
8734 * If the register for the size of the "pushtr"
8735 * is %r0 (or the value is 0) and the type is
8736 * a string, we'll use the system-wide default
8739 tupregs
[ttop
++].dttk_size
=
8740 dtrace_strsize_default
;
8745 tupregs
[ttop
++].dttk_size
= sval
;
8751 if (ttop
== DIF_DTR_NREGS
)
8754 tupregs
[ttop
++].dttk_size
= 0;
8757 case DIF_OP_FLUSHTS
:
8774 * We have a dynamic variable allocation; calculate its size.
8776 for (ksize
= 0, i
= 0; i
< nkeys
; i
++)
8777 ksize
+= P2ROUNDUP(key
[i
].dttk_size
, sizeof (uint64_t));
8779 size
= sizeof (dtrace_dynvar_t
);
8780 size
+= sizeof (dtrace_key_t
) * (nkeys
- 1);
8784 * Now we need to determine the size of the stored data.
8786 id
= DIF_INSTR_VAR(instr
);
8788 for (i
= 0; i
< dp
->dtdo_varlen
; i
++) {
8789 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
];
8791 if (v
->dtdv_id
== id
&& v
->dtdv_scope
== scope
) {
8792 size
+= v
->dtdv_type
.dtdt_size
;
8797 if (i
== dp
->dtdo_varlen
)
8801 * We have the size. If this is larger than the chunk size
8802 * for our dynamic variable state, reset the chunk size.
8804 size
= P2ROUNDUP(size
, sizeof (uint64_t));
8806 if (size
> vstate
->dtvs_dynvars
.dtds_chunksize
)
8807 vstate
->dtvs_dynvars
.dtds_chunksize
= size
;
8812 dtrace_difo_init(dtrace_difo_t
*dp
, dtrace_vstate_t
*vstate
)
8814 int i
, oldsvars
, osz
, nsz
, otlocals
, ntlocals
;
8817 ASSERT(MUTEX_HELD(&dtrace_lock
));
8818 ASSERT(dp
->dtdo_buf
!= NULL
&& dp
->dtdo_len
!= 0);
8820 for (i
= 0; i
< dp
->dtdo_varlen
; i
++) {
8821 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
];
8822 dtrace_statvar_t
*svar
, ***svarp
;
8824 uint8_t scope
= v
->dtdv_scope
;
8827 if ((id
= v
->dtdv_id
) < DIF_VAR_OTHER_UBASE
)
8830 id
-= DIF_VAR_OTHER_UBASE
;
8833 case DIFV_SCOPE_THREAD
:
8834 while (id
>= (otlocals
= vstate
->dtvs_ntlocals
)) {
8835 dtrace_difv_t
*tlocals
;
8837 if ((ntlocals
= (otlocals
<< 1)) == 0)
8840 osz
= otlocals
* sizeof (dtrace_difv_t
);
8841 nsz
= ntlocals
* sizeof (dtrace_difv_t
);
8843 tlocals
= kmem_zalloc(nsz
, KM_SLEEP
);
8846 bcopy(vstate
->dtvs_tlocals
,
8848 kmem_free(vstate
->dtvs_tlocals
, osz
);
8851 vstate
->dtvs_tlocals
= tlocals
;
8852 vstate
->dtvs_ntlocals
= ntlocals
;
8855 vstate
->dtvs_tlocals
[id
] = *v
;
8858 case DIFV_SCOPE_LOCAL
:
8859 np
= &vstate
->dtvs_nlocals
;
8860 svarp
= &vstate
->dtvs_locals
;
8862 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
)
8863 dsize
= NCPU
* (v
->dtdv_type
.dtdt_size
+
8866 dsize
= NCPU
* sizeof (uint64_t);
8870 case DIFV_SCOPE_GLOBAL
:
8871 np
= &vstate
->dtvs_nglobals
;
8872 svarp
= &vstate
->dtvs_globals
;
8874 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
)
8875 dsize
= v
->dtdv_type
.dtdt_size
+
8884 while (id
>= (oldsvars
= *np
)) {
8885 dtrace_statvar_t
**statics
;
8886 int newsvars
, oldsize
, newsize
;
8888 if ((newsvars
= (oldsvars
<< 1)) == 0)
8891 oldsize
= oldsvars
* sizeof (dtrace_statvar_t
*);
8892 newsize
= newsvars
* sizeof (dtrace_statvar_t
*);
8894 statics
= kmem_zalloc(newsize
, KM_SLEEP
);
8897 bcopy(*svarp
, statics
, oldsize
);
8898 kmem_free(*svarp
, oldsize
);
8905 if ((svar
= (*svarp
)[id
]) == NULL
) {
8906 svar
= kmem_zalloc(sizeof (dtrace_statvar_t
), KM_SLEEP
);
8907 svar
->dtsv_var
= *v
;
8909 if ((svar
->dtsv_size
= dsize
) != 0) {
8910 svar
->dtsv_data
= (uint64_t)(uintptr_t)
8911 kmem_zalloc(dsize
, KM_SLEEP
);
8914 (*svarp
)[id
] = svar
;
8917 svar
->dtsv_refcnt
++;
8920 dtrace_difo_chunksize(dp
, vstate
);
8921 dtrace_difo_hold(dp
);
8924 static dtrace_difo_t
*
8925 dtrace_difo_duplicate(dtrace_difo_t
*dp
, dtrace_vstate_t
*vstate
)
8930 ASSERT(dp
->dtdo_buf
!= NULL
);
8931 ASSERT(dp
->dtdo_refcnt
!= 0);
8933 new = kmem_zalloc(sizeof (dtrace_difo_t
), KM_SLEEP
);
8935 ASSERT(dp
->dtdo_buf
!= NULL
);
8936 sz
= dp
->dtdo_len
* sizeof (dif_instr_t
);
8937 new->dtdo_buf
= kmem_alloc(sz
, KM_SLEEP
);
8938 bcopy(dp
->dtdo_buf
, new->dtdo_buf
, sz
);
8939 new->dtdo_len
= dp
->dtdo_len
;
8941 if (dp
->dtdo_strtab
!= NULL
) {
8942 ASSERT(dp
->dtdo_strlen
!= 0);
8943 new->dtdo_strtab
= kmem_alloc(dp
->dtdo_strlen
, KM_SLEEP
);
8944 bcopy(dp
->dtdo_strtab
, new->dtdo_strtab
, dp
->dtdo_strlen
);
8945 new->dtdo_strlen
= dp
->dtdo_strlen
;
8948 if (dp
->dtdo_inttab
!= NULL
) {
8949 ASSERT(dp
->dtdo_intlen
!= 0);
8950 sz
= dp
->dtdo_intlen
* sizeof (uint64_t);
8951 new->dtdo_inttab
= kmem_alloc(sz
, KM_SLEEP
);
8952 bcopy(dp
->dtdo_inttab
, new->dtdo_inttab
, sz
);
8953 new->dtdo_intlen
= dp
->dtdo_intlen
;
8956 if (dp
->dtdo_vartab
!= NULL
) {
8957 ASSERT(dp
->dtdo_varlen
!= 0);
8958 sz
= dp
->dtdo_varlen
* sizeof (dtrace_difv_t
);
8959 new->dtdo_vartab
= kmem_alloc(sz
, KM_SLEEP
);
8960 bcopy(dp
->dtdo_vartab
, new->dtdo_vartab
, sz
);
8961 new->dtdo_varlen
= dp
->dtdo_varlen
;
8964 dtrace_difo_init(new, vstate
);
8969 dtrace_difo_destroy(dtrace_difo_t
*dp
, dtrace_vstate_t
*vstate
)
8973 ASSERT(dp
->dtdo_refcnt
== 0);
8975 for (i
= 0; i
< dp
->dtdo_varlen
; i
++) {
8976 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
];
8977 dtrace_statvar_t
*svar
, **svarp
;
8979 uint8_t scope
= v
->dtdv_scope
;
8983 case DIFV_SCOPE_THREAD
:
8986 case DIFV_SCOPE_LOCAL
:
8987 np
= &vstate
->dtvs_nlocals
;
8988 svarp
= vstate
->dtvs_locals
;
8991 case DIFV_SCOPE_GLOBAL
:
8992 np
= &vstate
->dtvs_nglobals
;
8993 svarp
= vstate
->dtvs_globals
;
9000 if ((id
= v
->dtdv_id
) < DIF_VAR_OTHER_UBASE
)
9003 id
-= DIF_VAR_OTHER_UBASE
;
9007 ASSERT(svar
!= NULL
);
9008 ASSERT(svar
->dtsv_refcnt
> 0);
9010 if (--svar
->dtsv_refcnt
> 0)
9013 if (svar
->dtsv_size
!= 0) {
9014 ASSERT(svar
->dtsv_data
!= NULL
);
9015 kmem_free((void *)(uintptr_t)svar
->dtsv_data
,
9019 kmem_free(svar
, sizeof (dtrace_statvar_t
));
9023 kmem_free(dp
->dtdo_buf
, dp
->dtdo_len
* sizeof (dif_instr_t
));
9024 kmem_free(dp
->dtdo_inttab
, dp
->dtdo_intlen
* sizeof (uint64_t));
9025 kmem_free(dp
->dtdo_strtab
, dp
->dtdo_strlen
);
9026 kmem_free(dp
->dtdo_vartab
, dp
->dtdo_varlen
* sizeof (dtrace_difv_t
));
9028 kmem_free(dp
, sizeof (dtrace_difo_t
));
9032 dtrace_difo_release(dtrace_difo_t
*dp
, dtrace_vstate_t
*vstate
)
9036 ASSERT(MUTEX_HELD(&dtrace_lock
));
9037 ASSERT(dp
->dtdo_refcnt
!= 0);
9039 for (i
= 0; i
< dp
->dtdo_varlen
; i
++) {
9040 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
];
9042 if (v
->dtdv_id
!= DIF_VAR_VTIMESTAMP
)
9045 ASSERT(dtrace_vtime_references
> 0);
9046 if (--dtrace_vtime_references
== 0)
9047 dtrace_vtime_disable();
9050 if (--dp
->dtdo_refcnt
== 0)
9051 dtrace_difo_destroy(dp
, vstate
);
9055 * DTrace Format Functions
9058 dtrace_format_add(dtrace_state_t
*state
, char *str
)
9061 uint16_t ndx
, len
= strlen(str
) + 1;
9063 fmt
= kmem_zalloc(len
, KM_SLEEP
);
9064 bcopy(str
, fmt
, len
);
9066 for (ndx
= 0; ndx
< state
->dts_nformats
; ndx
++) {
9067 if (state
->dts_formats
[ndx
] == NULL
) {
9068 state
->dts_formats
[ndx
] = fmt
;
9073 if (state
->dts_nformats
== USHRT_MAX
) {
9075 * This is only likely if a denial-of-service attack is being
9076 * attempted. As such, it's okay to fail silently here.
9078 kmem_free(fmt
, len
);
9083 * For simplicity, we always resize the formats array to be exactly the
9084 * number of formats.
9086 ndx
= state
->dts_nformats
++;
9087 new = kmem_alloc((ndx
+ 1) * sizeof (char *), KM_SLEEP
);
9089 if (state
->dts_formats
!= NULL
) {
9091 bcopy(state
->dts_formats
, new, ndx
* sizeof (char *));
9092 kmem_free(state
->dts_formats
, ndx
* sizeof (char *));
9095 state
->dts_formats
= new;
9096 state
->dts_formats
[ndx
] = fmt
;
9102 dtrace_format_remove(dtrace_state_t
*state
, uint16_t format
)
9106 ASSERT(state
->dts_formats
!= NULL
);
9107 ASSERT(format
<= state
->dts_nformats
);
9108 ASSERT(state
->dts_formats
[format
- 1] != NULL
);
9110 fmt
= state
->dts_formats
[format
- 1];
9111 kmem_free(fmt
, strlen(fmt
) + 1);
9112 state
->dts_formats
[format
- 1] = NULL
;
9116 dtrace_format_destroy(dtrace_state_t
*state
)
9120 if (state
->dts_nformats
== 0) {
9121 ASSERT(state
->dts_formats
== NULL
);
9125 ASSERT(state
->dts_formats
!= NULL
);
9127 for (i
= 0; i
< state
->dts_nformats
; i
++) {
9128 char *fmt
= state
->dts_formats
[i
];
9133 kmem_free(fmt
, strlen(fmt
) + 1);
9136 kmem_free(state
->dts_formats
, state
->dts_nformats
* sizeof (char *));
9137 state
->dts_nformats
= 0;
9138 state
->dts_formats
= NULL
;
9142 * DTrace Predicate Functions
9144 static dtrace_predicate_t
*
9145 dtrace_predicate_create(dtrace_difo_t
*dp
)
9147 dtrace_predicate_t
*pred
;
9149 ASSERT(MUTEX_HELD(&dtrace_lock
));
9150 ASSERT(dp
->dtdo_refcnt
!= 0);
9152 pred
= kmem_zalloc(sizeof (dtrace_predicate_t
), KM_SLEEP
);
9153 pred
->dtp_difo
= dp
;
9154 pred
->dtp_refcnt
= 1;
9156 if (!dtrace_difo_cacheable(dp
))
9159 if (dtrace_predcache_id
== DTRACE_CACHEIDNONE
) {
9161 * This is only theoretically possible -- we have had 2^32
9162 * cacheable predicates on this machine. We cannot allow any
9163 * more predicates to become cacheable: as unlikely as it is,
9164 * there may be a thread caching a (now stale) predicate cache
9165 * ID. (N.B.: the temptation is being successfully resisted to
9166 * have this cmn_err() "Holy shit -- we executed this code!")
9171 pred
->dtp_cacheid
= dtrace_predcache_id
++;
9177 dtrace_predicate_hold(dtrace_predicate_t
*pred
)
9179 ASSERT(MUTEX_HELD(&dtrace_lock
));
9180 ASSERT(pred
->dtp_difo
!= NULL
&& pred
->dtp_difo
->dtdo_refcnt
!= 0);
9181 ASSERT(pred
->dtp_refcnt
> 0);
9187 dtrace_predicate_release(dtrace_predicate_t
*pred
, dtrace_vstate_t
*vstate
)
9189 dtrace_difo_t
*dp
= pred
->dtp_difo
;
9191 ASSERT(MUTEX_HELD(&dtrace_lock
));
9192 ASSERT(dp
!= NULL
&& dp
->dtdo_refcnt
!= 0);
9193 ASSERT(pred
->dtp_refcnt
> 0);
9195 if (--pred
->dtp_refcnt
== 0) {
9196 dtrace_difo_release(pred
->dtp_difo
, vstate
);
9197 kmem_free(pred
, sizeof (dtrace_predicate_t
));
9202 * DTrace Action Description Functions
9204 static dtrace_actdesc_t
*
9205 dtrace_actdesc_create(dtrace_actkind_t kind
, uint32_t ntuple
,
9206 uint64_t uarg
, uint64_t arg
)
9208 dtrace_actdesc_t
*act
;
9210 ASSERT(!DTRACEACT_ISPRINTFLIKE(kind
) || (arg
!= NULL
&&
9211 arg
>= KERNELBASE
) || (arg
== NULL
&& kind
== DTRACEACT_PRINTA
));
9213 act
= kmem_zalloc(sizeof (dtrace_actdesc_t
), KM_SLEEP
);
9214 act
->dtad_kind
= kind
;
9215 act
->dtad_ntuple
= ntuple
;
9216 act
->dtad_uarg
= uarg
;
9217 act
->dtad_arg
= arg
;
9218 act
->dtad_refcnt
= 1;
9224 dtrace_actdesc_hold(dtrace_actdesc_t
*act
)
9226 ASSERT(act
->dtad_refcnt
>= 1);
9231 dtrace_actdesc_release(dtrace_actdesc_t
*act
, dtrace_vstate_t
*vstate
)
9233 dtrace_actkind_t kind
= act
->dtad_kind
;
9236 ASSERT(act
->dtad_refcnt
>= 1);
9238 if (--act
->dtad_refcnt
!= 0)
9241 if ((dp
= act
->dtad_difo
) != NULL
)
9242 dtrace_difo_release(dp
, vstate
);
9244 if (DTRACEACT_ISPRINTFLIKE(kind
)) {
9245 char *str
= (char *)(uintptr_t)act
->dtad_arg
;
9247 ASSERT((str
!= NULL
&& (uintptr_t)str
>= KERNELBASE
) ||
9248 (str
== NULL
&& act
->dtad_kind
== DTRACEACT_PRINTA
));
9251 kmem_free(str
, strlen(str
) + 1);
9254 kmem_free(act
, sizeof (dtrace_actdesc_t
));
9258 * DTrace ECB Functions
9260 static dtrace_ecb_t
*
9261 dtrace_ecb_add(dtrace_state_t
*state
, dtrace_probe_t
*probe
)
9266 ASSERT(MUTEX_HELD(&dtrace_lock
));
9268 ecb
= kmem_zalloc(sizeof (dtrace_ecb_t
), KM_SLEEP
);
9269 ecb
->dte_predicate
= NULL
;
9270 ecb
->dte_probe
= probe
;
9273 * The default size is the size of the default action: recording
9276 ecb
->dte_size
= ecb
->dte_needed
= sizeof (dtrace_epid_t
);
9277 ecb
->dte_alignment
= sizeof (dtrace_epid_t
);
9279 epid
= state
->dts_epid
++;
9281 if (epid
- 1 >= state
->dts_necbs
) {
9282 dtrace_ecb_t
**oecbs
= state
->dts_ecbs
, **ecbs
;
9283 int necbs
= state
->dts_necbs
<< 1;
9285 ASSERT(epid
== state
->dts_necbs
+ 1);
9288 ASSERT(oecbs
== NULL
);
9292 ecbs
= kmem_zalloc(necbs
* sizeof (*ecbs
), KM_SLEEP
);
9295 bcopy(oecbs
, ecbs
, state
->dts_necbs
* sizeof (*ecbs
));
9297 dtrace_membar_producer();
9298 state
->dts_ecbs
= ecbs
;
9300 if (oecbs
!= NULL
) {
9302 * If this state is active, we must dtrace_sync()
9303 * before we can free the old dts_ecbs array: we're
9304 * coming in hot, and there may be active ring
9305 * buffer processing (which indexes into the dts_ecbs
9306 * array) on another CPU.
9308 if (state
->dts_activity
!= DTRACE_ACTIVITY_INACTIVE
)
9311 kmem_free(oecbs
, state
->dts_necbs
* sizeof (*ecbs
));
9314 dtrace_membar_producer();
9315 state
->dts_necbs
= necbs
;
9318 ecb
->dte_state
= state
;
9320 ASSERT(state
->dts_ecbs
[epid
- 1] == NULL
);
9321 dtrace_membar_producer();
9322 state
->dts_ecbs
[(ecb
->dte_epid
= epid
) - 1] = ecb
;
9328 dtrace_ecb_enable(dtrace_ecb_t
*ecb
)
9330 dtrace_probe_t
*probe
= ecb
->dte_probe
;
9332 ASSERT(MUTEX_HELD(&cpu_lock
));
9333 ASSERT(MUTEX_HELD(&dtrace_lock
));
9334 ASSERT(ecb
->dte_next
== NULL
);
9336 if (probe
== NULL
) {
9338 * This is the NULL probe -- there's nothing to do.
9343 if (probe
->dtpr_ecb
== NULL
) {
9344 dtrace_provider_t
*prov
= probe
->dtpr_provider
;
9347 * We're the first ECB on this probe.
9349 probe
->dtpr_ecb
= probe
->dtpr_ecb_last
= ecb
;
9351 if (ecb
->dte_predicate
!= NULL
)
9352 probe
->dtpr_predcache
= ecb
->dte_predicate
->dtp_cacheid
;
9354 return (prov
->dtpv_pops
.dtps_enable(prov
->dtpv_arg
,
9355 probe
->dtpr_id
, probe
->dtpr_arg
));
9358 * This probe is already active. Swing the last pointer to
9359 * point to the new ECB, and issue a dtrace_sync() to assure
9360 * that all CPUs have seen the change.
9362 ASSERT(probe
->dtpr_ecb_last
!= NULL
);
9363 probe
->dtpr_ecb_last
->dte_next
= ecb
;
9364 probe
->dtpr_ecb_last
= ecb
;
9365 probe
->dtpr_predcache
= 0;
9373 dtrace_ecb_resize(dtrace_ecb_t
*ecb
)
9375 uint32_t maxalign
= sizeof (dtrace_epid_t
);
9376 uint32_t align
= sizeof (uint8_t), offs
, diff
;
9377 dtrace_action_t
*act
;
9379 uint32_t aggbase
= UINT32_MAX
;
9380 dtrace_state_t
*state
= ecb
->dte_state
;
9383 * If we record anything, we always record the epid. (And we always
9386 offs
= sizeof (dtrace_epid_t
);
9387 ecb
->dte_size
= ecb
->dte_needed
= sizeof (dtrace_epid_t
);
9389 for (act
= ecb
->dte_action
; act
!= NULL
; act
= act
->dta_next
) {
9390 dtrace_recdesc_t
*rec
= &act
->dta_rec
;
9392 if ((align
= rec
->dtrd_alignment
) > maxalign
)
9395 if (!wastuple
&& act
->dta_intuple
) {
9397 * This is the first record in a tuple. Align the
9398 * offset to be at offset 4 in an 8-byte aligned
9401 diff
= offs
+ sizeof (dtrace_aggid_t
);
9403 if (diff
= (diff
& (sizeof (uint64_t) - 1)))
9404 offs
+= sizeof (uint64_t) - diff
;
9406 aggbase
= offs
- sizeof (dtrace_aggid_t
);
9407 ASSERT(!(aggbase
& (sizeof (uint64_t) - 1)));
9411 if (rec
->dtrd_size
!= 0 && (diff
= (offs
& (align
- 1)))) {
9413 * The current offset is not properly aligned; align it.
9415 offs
+= align
- diff
;
9418 rec
->dtrd_offset
= offs
;
9420 if (offs
+ rec
->dtrd_size
> ecb
->dte_needed
) {
9421 ecb
->dte_needed
= offs
+ rec
->dtrd_size
;
9423 if (ecb
->dte_needed
> state
->dts_needed
)
9424 state
->dts_needed
= ecb
->dte_needed
;
9427 if (DTRACEACT_ISAGG(act
->dta_kind
)) {
9428 dtrace_aggregation_t
*agg
= (dtrace_aggregation_t
*)act
;
9429 dtrace_action_t
*first
= agg
->dtag_first
, *prev
;
9431 ASSERT(rec
->dtrd_size
!= 0 && first
!= NULL
);
9433 ASSERT(aggbase
!= UINT32_MAX
);
9435 agg
->dtag_base
= aggbase
;
9437 while ((prev
= first
->dta_prev
) != NULL
&&
9438 DTRACEACT_ISAGG(prev
->dta_kind
)) {
9439 agg
= (dtrace_aggregation_t
*)prev
;
9440 first
= agg
->dtag_first
;
9444 offs
= prev
->dta_rec
.dtrd_offset
+
9445 prev
->dta_rec
.dtrd_size
;
9447 offs
= sizeof (dtrace_epid_t
);
9451 if (!act
->dta_intuple
)
9452 ecb
->dte_size
= offs
+ rec
->dtrd_size
;
9454 offs
+= rec
->dtrd_size
;
9457 wastuple
= act
->dta_intuple
;
9460 if ((act
= ecb
->dte_action
) != NULL
&&
9461 !(act
->dta_kind
== DTRACEACT_SPECULATE
&& act
->dta_next
== NULL
) &&
9462 ecb
->dte_size
== sizeof (dtrace_epid_t
)) {
9464 * If the size is still sizeof (dtrace_epid_t), then all
9465 * actions store no data; set the size to 0.
9467 ecb
->dte_alignment
= maxalign
;
9471 * If the needed space is still sizeof (dtrace_epid_t), then
9472 * all actions need no additional space; set the needed
9475 if (ecb
->dte_needed
== sizeof (dtrace_epid_t
))
9476 ecb
->dte_needed
= 0;
9482 * Set our alignment, and make sure that the dte_size and dte_needed
9483 * are aligned to the size of an EPID.
9485 ecb
->dte_alignment
= maxalign
;
9486 ecb
->dte_size
= (ecb
->dte_size
+ (sizeof (dtrace_epid_t
) - 1)) &
9487 ~(sizeof (dtrace_epid_t
) - 1);
9488 ecb
->dte_needed
= (ecb
->dte_needed
+ (sizeof (dtrace_epid_t
) - 1)) &
9489 ~(sizeof (dtrace_epid_t
) - 1);
9490 ASSERT(ecb
->dte_size
<= ecb
->dte_needed
);
9493 static dtrace_action_t
*
9494 dtrace_ecb_aggregation_create(dtrace_ecb_t
*ecb
, dtrace_actdesc_t
*desc
)
9496 dtrace_aggregation_t
*agg
;
9497 size_t size
= sizeof (uint64_t);
9498 int ntuple
= desc
->dtad_ntuple
;
9499 dtrace_action_t
*act
;
9500 dtrace_recdesc_t
*frec
;
9501 dtrace_aggid_t aggid
;
9502 dtrace_state_t
*state
= ecb
->dte_state
;
9504 agg
= kmem_zalloc(sizeof (dtrace_aggregation_t
), KM_SLEEP
);
9505 agg
->dtag_ecb
= ecb
;
9507 ASSERT(DTRACEACT_ISAGG(desc
->dtad_kind
));
9509 switch (desc
->dtad_kind
) {
9511 agg
->dtag_initial
= INT64_MAX
;
9512 agg
->dtag_aggregate
= dtrace_aggregate_min
;
9516 agg
->dtag_initial
= INT64_MIN
;
9517 agg
->dtag_aggregate
= dtrace_aggregate_max
;
9520 case DTRACEAGG_COUNT
:
9521 agg
->dtag_aggregate
= dtrace_aggregate_count
;
9524 case DTRACEAGG_QUANTIZE
:
9525 agg
->dtag_aggregate
= dtrace_aggregate_quantize
;
9526 size
= (((sizeof (uint64_t) * NBBY
) - 1) * 2 + 1) *
9530 case DTRACEAGG_LQUANTIZE
: {
9531 uint16_t step
= DTRACE_LQUANTIZE_STEP(desc
->dtad_arg
);
9532 uint16_t levels
= DTRACE_LQUANTIZE_LEVELS(desc
->dtad_arg
);
9534 agg
->dtag_initial
= desc
->dtad_arg
;
9535 agg
->dtag_aggregate
= dtrace_aggregate_lquantize
;
9537 if (step
== 0 || levels
== 0)
9540 size
= levels
* sizeof (uint64_t) + 3 * sizeof (uint64_t);
9544 case DTRACEAGG_LLQUANTIZE
: {
9545 uint16_t factor
= DTRACE_LLQUANTIZE_FACTOR(desc
->dtad_arg
);
9546 uint16_t low
= DTRACE_LLQUANTIZE_LOW(desc
->dtad_arg
);
9547 uint16_t high
= DTRACE_LLQUANTIZE_HIGH(desc
->dtad_arg
);
9548 uint16_t nsteps
= DTRACE_LLQUANTIZE_NSTEP(desc
->dtad_arg
);
9551 agg
->dtag_initial
= desc
->dtad_arg
;
9552 agg
->dtag_aggregate
= dtrace_aggregate_llquantize
;
9554 if (factor
< 2 || low
>= high
|| nsteps
< factor
)
9558 * Now check that the number of steps evenly divides a power
9559 * of the factor. (This assures both integer bucket size and
9560 * linearity within each magnitude.)
9562 for (v
= factor
; v
< nsteps
; v
*= factor
)
9565 if ((v
% nsteps
) || (nsteps
% factor
))
9568 size
= (dtrace_aggregate_llquantize_bucket(factor
,
9569 low
, high
, nsteps
, INT64_MAX
) + 2) * sizeof (uint64_t);
9574 agg
->dtag_aggregate
= dtrace_aggregate_avg
;
9575 size
= sizeof (uint64_t) * 2;
9578 case DTRACEAGG_STDDEV
:
9579 agg
->dtag_aggregate
= dtrace_aggregate_stddev
;
9580 size
= sizeof (uint64_t) * 4;
9584 agg
->dtag_aggregate
= dtrace_aggregate_sum
;
9591 agg
->dtag_action
.dta_rec
.dtrd_size
= size
;
9597 * We must make sure that we have enough actions for the n-tuple.
9599 for (act
= ecb
->dte_action_last
; act
!= NULL
; act
= act
->dta_prev
) {
9600 if (DTRACEACT_ISAGG(act
->dta_kind
))
9603 if (--ntuple
== 0) {
9605 * This is the action with which our n-tuple begins.
9607 agg
->dtag_first
= act
;
9613 * This n-tuple is short by ntuple elements. Return failure.
9615 ASSERT(ntuple
!= 0);
9617 kmem_free(agg
, sizeof (dtrace_aggregation_t
));
9622 * If the last action in the tuple has a size of zero, it's actually
9623 * an expression argument for the aggregating action.
9625 ASSERT(ecb
->dte_action_last
!= NULL
);
9626 act
= ecb
->dte_action_last
;
9628 if (act
->dta_kind
== DTRACEACT_DIFEXPR
) {
9629 ASSERT(act
->dta_difo
!= NULL
);
9631 if (act
->dta_difo
->dtdo_rtype
.dtdt_size
== 0)
9632 agg
->dtag_hasarg
= 1;
9636 * We need to allocate an id for this aggregation.
9638 aggid
= (dtrace_aggid_t
)(uintptr_t)vmem_alloc(state
->dts_aggid_arena
, 1,
9639 VM_BESTFIT
| VM_SLEEP
);
9641 if (aggid
- 1 >= state
->dts_naggregations
) {
9642 dtrace_aggregation_t
**oaggs
= state
->dts_aggregations
;
9643 dtrace_aggregation_t
**aggs
;
9644 int naggs
= state
->dts_naggregations
<< 1;
9645 int onaggs
= state
->dts_naggregations
;
9647 ASSERT(aggid
== state
->dts_naggregations
+ 1);
9650 ASSERT(oaggs
== NULL
);
9654 aggs
= kmem_zalloc(naggs
* sizeof (*aggs
), KM_SLEEP
);
9656 if (oaggs
!= NULL
) {
9657 bcopy(oaggs
, aggs
, onaggs
* sizeof (*aggs
));
9658 kmem_free(oaggs
, onaggs
* sizeof (*aggs
));
9661 state
->dts_aggregations
= aggs
;
9662 state
->dts_naggregations
= naggs
;
9665 ASSERT(state
->dts_aggregations
[aggid
- 1] == NULL
);
9666 state
->dts_aggregations
[(agg
->dtag_id
= aggid
) - 1] = agg
;
9668 frec
= &agg
->dtag_first
->dta_rec
;
9669 if (frec
->dtrd_alignment
< sizeof (dtrace_aggid_t
))
9670 frec
->dtrd_alignment
= sizeof (dtrace_aggid_t
);
9672 for (act
= agg
->dtag_first
; act
!= NULL
; act
= act
->dta_next
) {
9673 ASSERT(!act
->dta_intuple
);
9674 act
->dta_intuple
= 1;
9677 return (&agg
->dtag_action
);
9681 dtrace_ecb_aggregation_destroy(dtrace_ecb_t
*ecb
, dtrace_action_t
*act
)
9683 dtrace_aggregation_t
*agg
= (dtrace_aggregation_t
*)act
;
9684 dtrace_state_t
*state
= ecb
->dte_state
;
9685 dtrace_aggid_t aggid
= agg
->dtag_id
;
9687 ASSERT(DTRACEACT_ISAGG(act
->dta_kind
));
9688 vmem_free(state
->dts_aggid_arena
, (void *)(uintptr_t)aggid
, 1);
9690 ASSERT(state
->dts_aggregations
[aggid
- 1] == agg
);
9691 state
->dts_aggregations
[aggid
- 1] = NULL
;
9693 kmem_free(agg
, sizeof (dtrace_aggregation_t
));
9697 dtrace_ecb_action_add(dtrace_ecb_t
*ecb
, dtrace_actdesc_t
*desc
)
9699 dtrace_action_t
*action
, *last
;
9700 dtrace_difo_t
*dp
= desc
->dtad_difo
;
9701 uint32_t size
= 0, align
= sizeof (uint8_t), mask
;
9702 uint16_t format
= 0;
9703 dtrace_recdesc_t
*rec
;
9704 dtrace_state_t
*state
= ecb
->dte_state
;
9705 dtrace_optval_t
*opt
= state
->dts_options
, nframes
, strsize
;
9706 uint64_t arg
= desc
->dtad_arg
;
9708 ASSERT(MUTEX_HELD(&dtrace_lock
));
9709 ASSERT(ecb
->dte_action
== NULL
|| ecb
->dte_action
->dta_refcnt
== 1);
9711 if (DTRACEACT_ISAGG(desc
->dtad_kind
)) {
9713 * If this is an aggregating action, there must be neither
9714 * a speculate nor a commit on the action chain.
9716 dtrace_action_t
*act
;
9718 for (act
= ecb
->dte_action
; act
!= NULL
; act
= act
->dta_next
) {
9719 if (act
->dta_kind
== DTRACEACT_COMMIT
)
9722 if (act
->dta_kind
== DTRACEACT_SPECULATE
)
9726 action
= dtrace_ecb_aggregation_create(ecb
, desc
);
9731 if (DTRACEACT_ISDESTRUCTIVE(desc
->dtad_kind
) ||
9732 (desc
->dtad_kind
== DTRACEACT_DIFEXPR
&&
9733 dp
!= NULL
&& dp
->dtdo_destructive
)) {
9734 state
->dts_destructive
= 1;
9737 switch (desc
->dtad_kind
) {
9738 case DTRACEACT_PRINTF
:
9739 case DTRACEACT_PRINTA
:
9740 case DTRACEACT_SYSTEM
:
9741 case DTRACEACT_FREOPEN
:
9743 * We know that our arg is a string -- turn it into a
9747 ASSERT(desc
->dtad_kind
== DTRACEACT_PRINTA
);
9750 ASSERT(arg
!= NULL
);
9751 ASSERT(arg
> KERNELBASE
);
9752 format
= dtrace_format_add(state
,
9753 (char *)(uintptr_t)arg
);
9757 case DTRACEACT_LIBACT
:
9758 case DTRACEACT_DIFEXPR
:
9762 if ((size
= dp
->dtdo_rtype
.dtdt_size
) != 0)
9765 if (dp
->dtdo_rtype
.dtdt_kind
== DIF_TYPE_STRING
) {
9766 if (!(dp
->dtdo_rtype
.dtdt_flags
& DIF_TF_BYREF
))
9769 size
= opt
[DTRACEOPT_STRSIZE
];
9774 case DTRACEACT_STACK
:
9775 if ((nframes
= arg
) == 0) {
9776 nframes
= opt
[DTRACEOPT_STACKFRAMES
];
9777 ASSERT(nframes
> 0);
9781 size
= nframes
* sizeof (pc_t
);
9784 case DTRACEACT_JSTACK
:
9785 if ((strsize
= DTRACE_USTACK_STRSIZE(arg
)) == 0)
9786 strsize
= opt
[DTRACEOPT_JSTACKSTRSIZE
];
9788 if ((nframes
= DTRACE_USTACK_NFRAMES(arg
)) == 0)
9789 nframes
= opt
[DTRACEOPT_JSTACKFRAMES
];
9791 arg
= DTRACE_USTACK_ARG(nframes
, strsize
);
9794 case DTRACEACT_USTACK
:
9795 if (desc
->dtad_kind
!= DTRACEACT_JSTACK
&&
9796 (nframes
= DTRACE_USTACK_NFRAMES(arg
)) == 0) {
9797 strsize
= DTRACE_USTACK_STRSIZE(arg
);
9798 nframes
= opt
[DTRACEOPT_USTACKFRAMES
];
9799 ASSERT(nframes
> 0);
9800 arg
= DTRACE_USTACK_ARG(nframes
, strsize
);
9804 * Save a slot for the pid.
9806 size
= (nframes
+ 1) * sizeof (uint64_t);
9807 size
+= DTRACE_USTACK_STRSIZE(arg
);
9808 size
= P2ROUNDUP(size
, (uint32_t)(sizeof (uintptr_t)));
9814 if (dp
== NULL
|| ((size
= dp
->dtdo_rtype
.dtdt_size
) !=
9815 sizeof (uint64_t)) ||
9816 (dp
->dtdo_rtype
.dtdt_flags
& DIF_TF_BYREF
))
9820 case DTRACEACT_USYM
:
9821 case DTRACEACT_UMOD
:
9822 case DTRACEACT_UADDR
:
9824 (dp
->dtdo_rtype
.dtdt_size
!= sizeof (uint64_t)) ||
9825 (dp
->dtdo_rtype
.dtdt_flags
& DIF_TF_BYREF
))
9829 * We have a slot for the pid, plus a slot for the
9830 * argument. To keep things simple (aligned with
9831 * bitness-neutral sizing), we store each as a 64-bit
9834 size
= 2 * sizeof (uint64_t);
9837 case DTRACEACT_STOP
:
9838 case DTRACEACT_BREAKPOINT
:
9839 case DTRACEACT_PANIC
:
9842 case DTRACEACT_CHILL
:
9843 case DTRACEACT_DISCARD
:
9844 case DTRACEACT_RAISE
:
9849 case DTRACEACT_EXIT
:
9851 (size
= dp
->dtdo_rtype
.dtdt_size
) != sizeof (int) ||
9852 (dp
->dtdo_rtype
.dtdt_flags
& DIF_TF_BYREF
))
9856 case DTRACEACT_SPECULATE
:
9857 if (ecb
->dte_size
> sizeof (dtrace_epid_t
))
9863 state
->dts_speculates
= 1;
9866 case DTRACEACT_COMMIT
: {
9867 dtrace_action_t
*act
= ecb
->dte_action
;
9869 for (; act
!= NULL
; act
= act
->dta_next
) {
9870 if (act
->dta_kind
== DTRACEACT_COMMIT
)
9883 if (size
!= 0 || desc
->dtad_kind
== DTRACEACT_SPECULATE
) {
9885 * If this is a data-storing action or a speculate,
9886 * we must be sure that there isn't a commit on the
9889 dtrace_action_t
*act
= ecb
->dte_action
;
9891 for (; act
!= NULL
; act
= act
->dta_next
) {
9892 if (act
->dta_kind
== DTRACEACT_COMMIT
)
9897 action
= kmem_zalloc(sizeof (dtrace_action_t
), KM_SLEEP
);
9898 action
->dta_rec
.dtrd_size
= size
;
9901 action
->dta_refcnt
= 1;
9902 rec
= &action
->dta_rec
;
9903 size
= rec
->dtrd_size
;
9905 for (mask
= sizeof (uint64_t) - 1; size
!= 0 && mask
> 0; mask
>>= 1) {
9906 if (!(size
& mask
)) {
9912 action
->dta_kind
= desc
->dtad_kind
;
9914 if ((action
->dta_difo
= dp
) != NULL
)
9915 dtrace_difo_hold(dp
);
9917 rec
->dtrd_action
= action
->dta_kind
;
9918 rec
->dtrd_arg
= arg
;
9919 rec
->dtrd_uarg
= desc
->dtad_uarg
;
9920 rec
->dtrd_alignment
= (uint16_t)align
;
9921 rec
->dtrd_format
= format
;
9923 if ((last
= ecb
->dte_action_last
) != NULL
) {
9924 ASSERT(ecb
->dte_action
!= NULL
);
9925 action
->dta_prev
= last
;
9926 last
->dta_next
= action
;
9928 ASSERT(ecb
->dte_action
== NULL
);
9929 ecb
->dte_action
= action
;
9932 ecb
->dte_action_last
= action
;
9938 dtrace_ecb_action_remove(dtrace_ecb_t
*ecb
)
9940 dtrace_action_t
*act
= ecb
->dte_action
, *next
;
9941 dtrace_vstate_t
*vstate
= &ecb
->dte_state
->dts_vstate
;
9945 if (act
!= NULL
&& act
->dta_refcnt
> 1) {
9946 ASSERT(act
->dta_next
== NULL
|| act
->dta_next
->dta_refcnt
== 1);
9949 for (; act
!= NULL
; act
= next
) {
9950 next
= act
->dta_next
;
9951 ASSERT(next
!= NULL
|| act
== ecb
->dte_action_last
);
9952 ASSERT(act
->dta_refcnt
== 1);
9954 if ((format
= act
->dta_rec
.dtrd_format
) != 0)
9955 dtrace_format_remove(ecb
->dte_state
, format
);
9957 if ((dp
= act
->dta_difo
) != NULL
)
9958 dtrace_difo_release(dp
, vstate
);
9960 if (DTRACEACT_ISAGG(act
->dta_kind
)) {
9961 dtrace_ecb_aggregation_destroy(ecb
, act
);
9963 kmem_free(act
, sizeof (dtrace_action_t
));
9968 ecb
->dte_action
= NULL
;
9969 ecb
->dte_action_last
= NULL
;
9970 ecb
->dte_size
= sizeof (dtrace_epid_t
);
9974 dtrace_ecb_disable(dtrace_ecb_t
*ecb
)
9977 * We disable the ECB by removing it from its probe.
9979 dtrace_ecb_t
*pecb
, *prev
= NULL
;
9980 dtrace_probe_t
*probe
= ecb
->dte_probe
;
9982 ASSERT(MUTEX_HELD(&dtrace_lock
));
9984 if (probe
== NULL
) {
9986 * This is the NULL probe; there is nothing to disable.
9991 for (pecb
= probe
->dtpr_ecb
; pecb
!= NULL
; pecb
= pecb
->dte_next
) {
9997 ASSERT(pecb
!= NULL
);
10000 probe
->dtpr_ecb
= ecb
->dte_next
;
10002 prev
->dte_next
= ecb
->dte_next
;
10005 if (ecb
== probe
->dtpr_ecb_last
) {
10006 ASSERT(ecb
->dte_next
== NULL
);
10007 probe
->dtpr_ecb_last
= prev
;
10011 * The ECB has been disconnected from the probe; now sync to assure
10012 * that all CPUs have seen the change before returning.
10016 if (probe
->dtpr_ecb
== NULL
) {
10018 * That was the last ECB on the probe; clear the predicate
10019 * cache ID for the probe, disable it and sync one more time
10020 * to assure that we'll never hit it again.
10022 dtrace_provider_t
*prov
= probe
->dtpr_provider
;
10024 ASSERT(ecb
->dte_next
== NULL
);
10025 ASSERT(probe
->dtpr_ecb_last
== NULL
);
10026 probe
->dtpr_predcache
= DTRACE_CACHEIDNONE
;
10027 prov
->dtpv_pops
.dtps_disable(prov
->dtpv_arg
,
10028 probe
->dtpr_id
, probe
->dtpr_arg
);
10032 * There is at least one ECB remaining on the probe. If there
10033 * is _exactly_ one, set the probe's predicate cache ID to be
10034 * the predicate cache ID of the remaining ECB.
10036 ASSERT(probe
->dtpr_ecb_last
!= NULL
);
10037 ASSERT(probe
->dtpr_predcache
== DTRACE_CACHEIDNONE
);
10039 if (probe
->dtpr_ecb
== probe
->dtpr_ecb_last
) {
10040 dtrace_predicate_t
*p
= probe
->dtpr_ecb
->dte_predicate
;
10042 ASSERT(probe
->dtpr_ecb
->dte_next
== NULL
);
10045 probe
->dtpr_predcache
= p
->dtp_cacheid
;
10048 ecb
->dte_next
= NULL
;
10053 dtrace_ecb_destroy(dtrace_ecb_t
*ecb
)
10055 dtrace_state_t
*state
= ecb
->dte_state
;
10056 dtrace_vstate_t
*vstate
= &state
->dts_vstate
;
10057 dtrace_predicate_t
*pred
;
10058 dtrace_epid_t epid
= ecb
->dte_epid
;
10060 ASSERT(MUTEX_HELD(&dtrace_lock
));
10061 ASSERT(ecb
->dte_next
== NULL
);
10062 ASSERT(ecb
->dte_probe
== NULL
|| ecb
->dte_probe
->dtpr_ecb
!= ecb
);
10064 if ((pred
= ecb
->dte_predicate
) != NULL
)
10065 dtrace_predicate_release(pred
, vstate
);
10067 dtrace_ecb_action_remove(ecb
);
10069 ASSERT(state
->dts_ecbs
[epid
- 1] == ecb
);
10070 state
->dts_ecbs
[epid
- 1] = NULL
;
10072 kmem_free(ecb
, sizeof (dtrace_ecb_t
));
10075 static dtrace_ecb_t
*
10076 dtrace_ecb_create(dtrace_state_t
*state
, dtrace_probe_t
*probe
,
10077 dtrace_enabling_t
*enab
)
10080 dtrace_predicate_t
*pred
;
10081 dtrace_actdesc_t
*act
;
10082 dtrace_provider_t
*prov
;
10083 dtrace_ecbdesc_t
*desc
= enab
->dten_current
;
10085 ASSERT(MUTEX_HELD(&dtrace_lock
));
10086 ASSERT(state
!= NULL
);
10088 ecb
= dtrace_ecb_add(state
, probe
);
10089 ecb
->dte_uarg
= desc
->dted_uarg
;
10091 if ((pred
= desc
->dted_pred
.dtpdd_predicate
) != NULL
) {
10092 dtrace_predicate_hold(pred
);
10093 ecb
->dte_predicate
= pred
;
10096 if (probe
!= NULL
) {
10098 * If the provider shows more leg than the consumer is old
10099 * enough to see, we need to enable the appropriate implicit
10100 * predicate bits to prevent the ecb from activating at
10103 * Providers specifying DTRACE_PRIV_USER at register time
10104 * are stating that they need the /proc-style privilege
10105 * model to be enforced, and this is what DTRACE_COND_OWNER
10106 * and DTRACE_COND_ZONEOWNER will then do at probe time.
10108 prov
= probe
->dtpr_provider
;
10109 if (!(state
->dts_cred
.dcr_visible
& DTRACE_CRV_ALLPROC
) &&
10110 (prov
->dtpv_priv
.dtpp_flags
& DTRACE_PRIV_USER
))
10111 ecb
->dte_cond
|= DTRACE_COND_OWNER
;
10113 if (!(state
->dts_cred
.dcr_visible
& DTRACE_CRV_ALLZONE
) &&
10114 (prov
->dtpv_priv
.dtpp_flags
& DTRACE_PRIV_USER
))
10115 ecb
->dte_cond
|= DTRACE_COND_ZONEOWNER
;
10118 * If the provider shows us kernel innards and the user
10119 * is lacking sufficient privilege, enable the
10120 * DTRACE_COND_USERMODE implicit predicate.
10122 if (!(state
->dts_cred
.dcr_visible
& DTRACE_CRV_KERNEL
) &&
10123 (prov
->dtpv_priv
.dtpp_flags
& DTRACE_PRIV_KERNEL
))
10124 ecb
->dte_cond
|= DTRACE_COND_USERMODE
;
10127 if (dtrace_ecb_create_cache
!= NULL
) {
10129 * If we have a cached ecb, we'll use its action list instead
10130 * of creating our own (saving both time and space).
10132 dtrace_ecb_t
*cached
= dtrace_ecb_create_cache
;
10133 dtrace_action_t
*act
= cached
->dte_action
;
10136 ASSERT(act
->dta_refcnt
> 0);
10138 ecb
->dte_action
= act
;
10139 ecb
->dte_action_last
= cached
->dte_action_last
;
10140 ecb
->dte_needed
= cached
->dte_needed
;
10141 ecb
->dte_size
= cached
->dte_size
;
10142 ecb
->dte_alignment
= cached
->dte_alignment
;
10148 for (act
= desc
->dted_action
; act
!= NULL
; act
= act
->dtad_next
) {
10149 if ((enab
->dten_error
= dtrace_ecb_action_add(ecb
, act
)) != 0) {
10150 dtrace_ecb_destroy(ecb
);
10155 dtrace_ecb_resize(ecb
);
10157 return (dtrace_ecb_create_cache
= ecb
);
10161 dtrace_ecb_create_enable(dtrace_probe_t
*probe
, void *arg
)
10164 dtrace_enabling_t
*enab
= arg
;
10165 dtrace_state_t
*state
= enab
->dten_vstate
->dtvs_state
;
10167 ASSERT(state
!= NULL
);
10169 if (probe
!= NULL
&& probe
->dtpr_gen
< enab
->dten_probegen
) {
10171 * This probe was created in a generation for which this
10172 * enabling has previously created ECBs; we don't want to
10173 * enable it again, so just kick out.
10175 return (DTRACE_MATCH_NEXT
);
10178 if ((ecb
= dtrace_ecb_create(state
, probe
, enab
)) == NULL
)
10179 return (DTRACE_MATCH_DONE
);
10181 if (dtrace_ecb_enable(ecb
) < 0)
10182 return (DTRACE_MATCH_FAIL
);
10184 return (DTRACE_MATCH_NEXT
);
10187 static dtrace_ecb_t
*
10188 dtrace_epid2ecb(dtrace_state_t
*state
, dtrace_epid_t id
)
10192 ASSERT(MUTEX_HELD(&dtrace_lock
));
10194 if (id
== 0 || id
> state
->dts_necbs
)
10197 ASSERT(state
->dts_necbs
> 0 && state
->dts_ecbs
!= NULL
);
10198 ASSERT((ecb
= state
->dts_ecbs
[id
- 1]) == NULL
|| ecb
->dte_epid
== id
);
10200 return (state
->dts_ecbs
[id
- 1]);
10203 static dtrace_aggregation_t
*
10204 dtrace_aggid2agg(dtrace_state_t
*state
, dtrace_aggid_t id
)
10206 dtrace_aggregation_t
*agg
;
10208 ASSERT(MUTEX_HELD(&dtrace_lock
));
10210 if (id
== 0 || id
> state
->dts_naggregations
)
10213 ASSERT(state
->dts_naggregations
> 0 && state
->dts_aggregations
!= NULL
);
10214 ASSERT((agg
= state
->dts_aggregations
[id
- 1]) == NULL
||
10215 agg
->dtag_id
== id
);
10217 return (state
->dts_aggregations
[id
- 1]);
10221 * DTrace Buffer Functions
10223 * The following functions manipulate DTrace buffers. Most of these functions
10224 * are called in the context of establishing or processing consumer state;
10225 * exceptions are explicitly noted.
10229 * Note: called from cross call context. This function switches the two
10230 * buffers on a given CPU. The atomicity of this operation is assured by
10231 * disabling interrupts while the actual switch takes place; the disabling of
10232 * interrupts serializes the execution with any execution of dtrace_probe() on
10236 dtrace_buffer_switch(dtrace_buffer_t
*buf
)
10238 caddr_t tomax
= buf
->dtb_tomax
;
10239 caddr_t xamot
= buf
->dtb_xamot
;
10240 dtrace_icookie_t cookie
;
10241 hrtime_t now
= dtrace_gethrtime();
10243 ASSERT(!(buf
->dtb_flags
& DTRACEBUF_NOSWITCH
));
10244 ASSERT(!(buf
->dtb_flags
& DTRACEBUF_RING
));
10246 cookie
= dtrace_interrupt_disable();
10247 buf
->dtb_tomax
= xamot
;
10248 buf
->dtb_xamot
= tomax
;
10249 buf
->dtb_xamot_drops
= buf
->dtb_drops
;
10250 buf
->dtb_xamot_offset
= buf
->dtb_offset
;
10251 buf
->dtb_xamot_errors
= buf
->dtb_errors
;
10252 buf
->dtb_xamot_flags
= buf
->dtb_flags
;
10253 buf
->dtb_offset
= 0;
10254 buf
->dtb_drops
= 0;
10255 buf
->dtb_errors
= 0;
10256 buf
->dtb_flags
&= ~(DTRACEBUF_ERROR
| DTRACEBUF_DROPPED
);
10257 buf
->dtb_interval
= now
- buf
->dtb_switched
;
10258 buf
->dtb_switched
= now
;
10259 dtrace_interrupt_enable(cookie
);
10263 * Note: called from cross call context. This function activates a buffer
10264 * on a CPU. As with dtrace_buffer_switch(), the atomicity of the operation
10265 * is guaranteed by the disabling of interrupts.
10268 dtrace_buffer_activate(dtrace_state_t
*state
)
10270 dtrace_buffer_t
*buf
;
10271 dtrace_icookie_t cookie
= dtrace_interrupt_disable();
10273 buf
= &state
->dts_buffer
[CPU
->cpu_id
];
10275 if (buf
->dtb_tomax
!= NULL
) {
10277 * We might like to assert that the buffer is marked inactive,
10278 * but this isn't necessarily true: the buffer for the CPU
10279 * that processes the BEGIN probe has its buffer activated
10280 * manually. In this case, we take the (harmless) action
10281 * re-clearing the bit INACTIVE bit.
10283 buf
->dtb_flags
&= ~DTRACEBUF_INACTIVE
;
10286 dtrace_interrupt_enable(cookie
);
10290 dtrace_buffer_alloc(dtrace_buffer_t
*bufs
, size_t size
, int flags
,
10291 processorid_t cpu
, int *factor
)
10294 dtrace_buffer_t
*buf
;
10295 int allocated
= 0, desired
= 0;
10297 ASSERT(MUTEX_HELD(&cpu_lock
));
10298 ASSERT(MUTEX_HELD(&dtrace_lock
));
10302 if (size
> dtrace_nonroot_maxsize
&&
10303 !PRIV_POLICY_CHOICE(CRED(), PRIV_ALL
, B_FALSE
))
10309 if (cpu
!= DTRACE_CPUALL
&& cpu
!= cp
->cpu_id
)
10312 buf
= &bufs
[cp
->cpu_id
];
10315 * If there is already a buffer allocated for this CPU, it
10316 * is only possible that this is a DR event. In this case,
10317 * the buffer size must match our specified size.
10319 if (buf
->dtb_tomax
!= NULL
) {
10320 ASSERT(buf
->dtb_size
== size
);
10324 ASSERT(buf
->dtb_xamot
== NULL
);
10326 if ((buf
->dtb_tomax
= kmem_zalloc(size
,
10327 KM_NOSLEEP
| KM_NORMALPRI
)) == NULL
)
10330 buf
->dtb_size
= size
;
10331 buf
->dtb_flags
= flags
;
10332 buf
->dtb_offset
= 0;
10333 buf
->dtb_drops
= 0;
10335 if (flags
& DTRACEBUF_NOSWITCH
)
10338 if ((buf
->dtb_xamot
= kmem_zalloc(size
,
10339 KM_NOSLEEP
| KM_NORMALPRI
)) == NULL
)
10341 } while ((cp
= cp
->cpu_next
) != cpu_list
);
10349 if (cpu
!= DTRACE_CPUALL
&& cpu
!= cp
->cpu_id
)
10352 buf
= &bufs
[cp
->cpu_id
];
10355 if (buf
->dtb_xamot
!= NULL
) {
10356 ASSERT(buf
->dtb_tomax
!= NULL
);
10357 ASSERT(buf
->dtb_size
== size
);
10358 kmem_free(buf
->dtb_xamot
, size
);
10362 if (buf
->dtb_tomax
!= NULL
) {
10363 ASSERT(buf
->dtb_size
== size
);
10364 kmem_free(buf
->dtb_tomax
, size
);
10368 buf
->dtb_tomax
= NULL
;
10369 buf
->dtb_xamot
= NULL
;
10371 } while ((cp
= cp
->cpu_next
) != cpu_list
);
10373 *factor
= desired
/ (allocated
> 0 ? allocated
: 1);
10379 * Note: called from probe context. This function just increments the drop
10380 * count on a buffer. It has been made a function to allow for the
10381 * possibility of understanding the source of mysterious drop counts. (A
10382 * problem for which one may be particularly disappointed that DTrace cannot
10383 * be used to understand DTrace.)
10386 dtrace_buffer_drop(dtrace_buffer_t
*buf
)
10392 * Note: called from probe context. This function is called to reserve space
10393 * in a buffer. If mstate is non-NULL, sets the scratch base and size in the
10394 * mstate. Returns the new offset in the buffer, or a negative value if an
10395 * error has occurred.
10398 dtrace_buffer_reserve(dtrace_buffer_t
*buf
, size_t needed
, size_t align
,
10399 dtrace_state_t
*state
, dtrace_mstate_t
*mstate
)
10401 intptr_t offs
= buf
->dtb_offset
, soffs
;
10406 if (buf
->dtb_flags
& DTRACEBUF_INACTIVE
)
10409 if ((tomax
= buf
->dtb_tomax
) == NULL
) {
10410 dtrace_buffer_drop(buf
);
10414 if (!(buf
->dtb_flags
& (DTRACEBUF_RING
| DTRACEBUF_FILL
))) {
10415 while (offs
& (align
- 1)) {
10417 * Assert that our alignment is off by a number which
10418 * is itself sizeof (uint32_t) aligned.
10420 ASSERT(!((align
- (offs
& (align
- 1))) &
10421 (sizeof (uint32_t) - 1)));
10422 DTRACE_STORE(uint32_t, tomax
, offs
, DTRACE_EPIDNONE
);
10423 offs
+= sizeof (uint32_t);
10426 if ((soffs
= offs
+ needed
) > buf
->dtb_size
) {
10427 dtrace_buffer_drop(buf
);
10431 if (mstate
== NULL
)
10434 mstate
->dtms_scratch_base
= (uintptr_t)tomax
+ soffs
;
10435 mstate
->dtms_scratch_size
= buf
->dtb_size
- soffs
;
10436 mstate
->dtms_scratch_ptr
= mstate
->dtms_scratch_base
;
10441 if (buf
->dtb_flags
& DTRACEBUF_FILL
) {
10442 if (state
->dts_activity
!= DTRACE_ACTIVITY_COOLDOWN
&&
10443 (buf
->dtb_flags
& DTRACEBUF_FULL
))
10448 total
= needed
+ (offs
& (align
- 1));
10451 * For a ring buffer, life is quite a bit more complicated. Before
10452 * we can store any padding, we need to adjust our wrapping offset.
10453 * (If we've never before wrapped or we're not about to, no adjustment
10456 if ((buf
->dtb_flags
& DTRACEBUF_WRAPPED
) ||
10457 offs
+ total
> buf
->dtb_size
) {
10458 woffs
= buf
->dtb_xamot_offset
;
10460 if (offs
+ total
> buf
->dtb_size
) {
10462 * We can't fit in the end of the buffer. First, a
10463 * sanity check that we can fit in the buffer at all.
10465 if (total
> buf
->dtb_size
) {
10466 dtrace_buffer_drop(buf
);
10471 * We're going to be storing at the top of the buffer,
10472 * so now we need to deal with the wrapped offset. We
10473 * only reset our wrapped offset to 0 if it is
10474 * currently greater than the current offset. If it
10475 * is less than the current offset, it is because a
10476 * previous allocation induced a wrap -- but the
10477 * allocation didn't subsequently take the space due
10478 * to an error or false predicate evaluation. In this
10479 * case, we'll just leave the wrapped offset alone: if
10480 * the wrapped offset hasn't been advanced far enough
10481 * for this allocation, it will be adjusted in the
10484 if (buf
->dtb_flags
& DTRACEBUF_WRAPPED
) {
10492 * Now we know that we're going to be storing to the
10493 * top of the buffer and that there is room for us
10494 * there. We need to clear the buffer from the current
10495 * offset to the end (there may be old gunk there).
10497 while (offs
< buf
->dtb_size
)
10501 * We need to set our offset to zero. And because we
10502 * are wrapping, we need to set the bit indicating as
10503 * much. We can also adjust our needed space back
10504 * down to the space required by the ECB -- we know
10505 * that the top of the buffer is aligned.
10509 buf
->dtb_flags
|= DTRACEBUF_WRAPPED
;
10512 * There is room for us in the buffer, so we simply
10513 * need to check the wrapped offset.
10515 if (woffs
< offs
) {
10517 * The wrapped offset is less than the offset.
10518 * This can happen if we allocated buffer space
10519 * that induced a wrap, but then we didn't
10520 * subsequently take the space due to an error
10521 * or false predicate evaluation. This is
10522 * okay; we know that _this_ allocation isn't
10523 * going to induce a wrap. We still can't
10524 * reset the wrapped offset to be zero,
10525 * however: the space may have been trashed in
10526 * the previous failed probe attempt. But at
10527 * least the wrapped offset doesn't need to
10528 * be adjusted at all...
10534 while (offs
+ total
> woffs
) {
10535 dtrace_epid_t epid
= *(uint32_t *)(tomax
+ woffs
);
10538 if (epid
== DTRACE_EPIDNONE
) {
10539 size
= sizeof (uint32_t);
10541 ASSERT(epid
<= state
->dts_necbs
);
10542 ASSERT(state
->dts_ecbs
[epid
- 1] != NULL
);
10544 size
= state
->dts_ecbs
[epid
- 1]->dte_size
;
10547 ASSERT(woffs
+ size
<= buf
->dtb_size
);
10550 if (woffs
+ size
== buf
->dtb_size
) {
10552 * We've reached the end of the buffer; we want
10553 * to set the wrapped offset to 0 and break
10554 * out. However, if the offs is 0, then we're
10555 * in a strange edge-condition: the amount of
10556 * space that we want to reserve plus the size
10557 * of the record that we're overwriting is
10558 * greater than the size of the buffer. This
10559 * is problematic because if we reserve the
10560 * space but subsequently don't consume it (due
10561 * to a failed predicate or error) the wrapped
10562 * offset will be 0 -- yet the EPID at offset 0
10563 * will not be committed. This situation is
10564 * relatively easy to deal with: if we're in
10565 * this case, the buffer is indistinguishable
10566 * from one that hasn't wrapped; we need only
10567 * finish the job by clearing the wrapped bit,
10568 * explicitly setting the offset to be 0, and
10569 * zero'ing out the old data in the buffer.
10572 buf
->dtb_flags
&= ~DTRACEBUF_WRAPPED
;
10573 buf
->dtb_offset
= 0;
10576 while (woffs
< buf
->dtb_size
)
10577 tomax
[woffs
++] = 0;
10588 * We have a wrapped offset. It may be that the wrapped offset
10589 * has become zero -- that's okay.
10591 buf
->dtb_xamot_offset
= woffs
;
10596 * Now we can plow the buffer with any necessary padding.
10598 while (offs
& (align
- 1)) {
10600 * Assert that our alignment is off by a number which
10601 * is itself sizeof (uint32_t) aligned.
10603 ASSERT(!((align
- (offs
& (align
- 1))) &
10604 (sizeof (uint32_t) - 1)));
10605 DTRACE_STORE(uint32_t, tomax
, offs
, DTRACE_EPIDNONE
);
10606 offs
+= sizeof (uint32_t);
10609 if (buf
->dtb_flags
& DTRACEBUF_FILL
) {
10610 if (offs
+ needed
> buf
->dtb_size
- state
->dts_reserve
) {
10611 buf
->dtb_flags
|= DTRACEBUF_FULL
;
10616 if (mstate
== NULL
)
10620 * For ring buffers and fill buffers, the scratch space is always
10621 * the inactive buffer.
10623 mstate
->dtms_scratch_base
= (uintptr_t)buf
->dtb_xamot
;
10624 mstate
->dtms_scratch_size
= buf
->dtb_size
;
10625 mstate
->dtms_scratch_ptr
= mstate
->dtms_scratch_base
;
10631 dtrace_buffer_polish(dtrace_buffer_t
*buf
)
10633 ASSERT(buf
->dtb_flags
& DTRACEBUF_RING
);
10634 ASSERT(MUTEX_HELD(&dtrace_lock
));
10636 if (!(buf
->dtb_flags
& DTRACEBUF_WRAPPED
))
10640 * We need to polish the ring buffer. There are three cases:
10642 * - The first (and presumably most common) is that there is no gap
10643 * between the buffer offset and the wrapped offset. In this case,
10644 * there is nothing in the buffer that isn't valid data; we can
10645 * mark the buffer as polished and return.
10647 * - The second (less common than the first but still more common
10648 * than the third) is that there is a gap between the buffer offset
10649 * and the wrapped offset, and the wrapped offset is larger than the
10650 * buffer offset. This can happen because of an alignment issue, or
10651 * can happen because of a call to dtrace_buffer_reserve() that
10652 * didn't subsequently consume the buffer space. In this case,
10653 * we need to zero the data from the buffer offset to the wrapped
10656 * - The third (and least common) is that there is a gap between the
10657 * buffer offset and the wrapped offset, but the wrapped offset is
10658 * _less_ than the buffer offset. This can only happen because a
10659 * call to dtrace_buffer_reserve() induced a wrap, but the space
10660 * was not subsequently consumed. In this case, we need to zero the
10661 * space from the offset to the end of the buffer _and_ from the
10662 * top of the buffer to the wrapped offset.
10664 if (buf
->dtb_offset
< buf
->dtb_xamot_offset
) {
10665 bzero(buf
->dtb_tomax
+ buf
->dtb_offset
,
10666 buf
->dtb_xamot_offset
- buf
->dtb_offset
);
10669 if (buf
->dtb_offset
> buf
->dtb_xamot_offset
) {
10670 bzero(buf
->dtb_tomax
+ buf
->dtb_offset
,
10671 buf
->dtb_size
- buf
->dtb_offset
);
10672 bzero(buf
->dtb_tomax
, buf
->dtb_xamot_offset
);
10677 * This routine determines if data generated at the specified time has likely
10678 * been entirely consumed at user-level. This routine is called to determine
10679 * if an ECB on a defunct probe (but for an active enabling) can be safely
10680 * disabled and destroyed.
10683 dtrace_buffer_consumed(dtrace_buffer_t
*bufs
, hrtime_t when
)
10687 for (i
= 0; i
< NCPU
; i
++) {
10688 dtrace_buffer_t
*buf
= &bufs
[i
];
10690 if (buf
->dtb_size
== 0)
10693 if (buf
->dtb_flags
& DTRACEBUF_RING
)
10696 if (!buf
->dtb_switched
&& buf
->dtb_offset
!= 0)
10699 if (buf
->dtb_switched
- buf
->dtb_interval
< when
)
10707 dtrace_buffer_free(dtrace_buffer_t
*bufs
)
10711 for (i
= 0; i
< NCPU
; i
++) {
10712 dtrace_buffer_t
*buf
= &bufs
[i
];
10714 if (buf
->dtb_tomax
== NULL
) {
10715 ASSERT(buf
->dtb_xamot
== NULL
);
10716 ASSERT(buf
->dtb_size
== 0);
10720 if (buf
->dtb_xamot
!= NULL
) {
10721 ASSERT(!(buf
->dtb_flags
& DTRACEBUF_NOSWITCH
));
10722 kmem_free(buf
->dtb_xamot
, buf
->dtb_size
);
10725 kmem_free(buf
->dtb_tomax
, buf
->dtb_size
);
10727 buf
->dtb_tomax
= NULL
;
10728 buf
->dtb_xamot
= NULL
;
10733 * DTrace Enabling Functions
10735 static dtrace_enabling_t
*
10736 dtrace_enabling_create(dtrace_vstate_t
*vstate
)
10738 dtrace_enabling_t
*enab
;
10740 enab
= kmem_zalloc(sizeof (dtrace_enabling_t
), KM_SLEEP
);
10741 enab
->dten_vstate
= vstate
;
10747 dtrace_enabling_add(dtrace_enabling_t
*enab
, dtrace_ecbdesc_t
*ecb
)
10749 dtrace_ecbdesc_t
**ndesc
;
10750 size_t osize
, nsize
;
10753 * We can't add to enablings after we've enabled them, or after we've
10756 ASSERT(enab
->dten_probegen
== 0);
10757 ASSERT(enab
->dten_next
== NULL
&& enab
->dten_prev
== NULL
);
10759 if (enab
->dten_ndesc
< enab
->dten_maxdesc
) {
10760 enab
->dten_desc
[enab
->dten_ndesc
++] = ecb
;
10764 osize
= enab
->dten_maxdesc
* sizeof (dtrace_enabling_t
*);
10766 if (enab
->dten_maxdesc
== 0) {
10767 enab
->dten_maxdesc
= 1;
10769 enab
->dten_maxdesc
<<= 1;
10772 ASSERT(enab
->dten_ndesc
< enab
->dten_maxdesc
);
10774 nsize
= enab
->dten_maxdesc
* sizeof (dtrace_enabling_t
*);
10775 ndesc
= kmem_zalloc(nsize
, KM_SLEEP
);
10776 bcopy(enab
->dten_desc
, ndesc
, osize
);
10777 kmem_free(enab
->dten_desc
, osize
);
10779 enab
->dten_desc
= ndesc
;
10780 enab
->dten_desc
[enab
->dten_ndesc
++] = ecb
;
10784 dtrace_enabling_addlike(dtrace_enabling_t
*enab
, dtrace_ecbdesc_t
*ecb
,
10785 dtrace_probedesc_t
*pd
)
10787 dtrace_ecbdesc_t
*new;
10788 dtrace_predicate_t
*pred
;
10789 dtrace_actdesc_t
*act
;
10792 * We're going to create a new ECB description that matches the
10793 * specified ECB in every way, but has the specified probe description.
10795 new = kmem_zalloc(sizeof (dtrace_ecbdesc_t
), KM_SLEEP
);
10797 if ((pred
= ecb
->dted_pred
.dtpdd_predicate
) != NULL
)
10798 dtrace_predicate_hold(pred
);
10800 for (act
= ecb
->dted_action
; act
!= NULL
; act
= act
->dtad_next
)
10801 dtrace_actdesc_hold(act
);
10803 new->dted_action
= ecb
->dted_action
;
10804 new->dted_pred
= ecb
->dted_pred
;
10805 new->dted_probe
= *pd
;
10806 new->dted_uarg
= ecb
->dted_uarg
;
10808 dtrace_enabling_add(enab
, new);
10812 dtrace_enabling_dump(dtrace_enabling_t
*enab
)
10816 for (i
= 0; i
< enab
->dten_ndesc
; i
++) {
10817 dtrace_probedesc_t
*desc
= &enab
->dten_desc
[i
]->dted_probe
;
10819 cmn_err(CE_NOTE
, "enabling probe %d (%s:%s:%s:%s)", i
,
10820 desc
->dtpd_provider
, desc
->dtpd_mod
,
10821 desc
->dtpd_func
, desc
->dtpd_name
);
10826 dtrace_enabling_destroy(dtrace_enabling_t
*enab
)
10829 dtrace_ecbdesc_t
*ep
;
10830 dtrace_vstate_t
*vstate
= enab
->dten_vstate
;
10832 ASSERT(MUTEX_HELD(&dtrace_lock
));
10834 for (i
= 0; i
< enab
->dten_ndesc
; i
++) {
10835 dtrace_actdesc_t
*act
, *next
;
10836 dtrace_predicate_t
*pred
;
10838 ep
= enab
->dten_desc
[i
];
10840 if ((pred
= ep
->dted_pred
.dtpdd_predicate
) != NULL
)
10841 dtrace_predicate_release(pred
, vstate
);
10843 for (act
= ep
->dted_action
; act
!= NULL
; act
= next
) {
10844 next
= act
->dtad_next
;
10845 dtrace_actdesc_release(act
, vstate
);
10848 kmem_free(ep
, sizeof (dtrace_ecbdesc_t
));
10851 kmem_free(enab
->dten_desc
,
10852 enab
->dten_maxdesc
* sizeof (dtrace_enabling_t
*));
10855 * If this was a retained enabling, decrement the dts_nretained count
10856 * and take it off of the dtrace_retained list.
10858 if (enab
->dten_prev
!= NULL
|| enab
->dten_next
!= NULL
||
10859 dtrace_retained
== enab
) {
10860 ASSERT(enab
->dten_vstate
->dtvs_state
!= NULL
);
10861 ASSERT(enab
->dten_vstate
->dtvs_state
->dts_nretained
> 0);
10862 enab
->dten_vstate
->dtvs_state
->dts_nretained
--;
10863 dtrace_retained_gen
++;
10866 if (enab
->dten_prev
== NULL
) {
10867 if (dtrace_retained
== enab
) {
10868 dtrace_retained
= enab
->dten_next
;
10870 if (dtrace_retained
!= NULL
)
10871 dtrace_retained
->dten_prev
= NULL
;
10874 ASSERT(enab
!= dtrace_retained
);
10875 ASSERT(dtrace_retained
!= NULL
);
10876 enab
->dten_prev
->dten_next
= enab
->dten_next
;
10879 if (enab
->dten_next
!= NULL
) {
10880 ASSERT(dtrace_retained
!= NULL
);
10881 enab
->dten_next
->dten_prev
= enab
->dten_prev
;
10884 kmem_free(enab
, sizeof (dtrace_enabling_t
));
10888 dtrace_enabling_retain(dtrace_enabling_t
*enab
)
10890 dtrace_state_t
*state
;
10892 ASSERT(MUTEX_HELD(&dtrace_lock
));
10893 ASSERT(enab
->dten_next
== NULL
&& enab
->dten_prev
== NULL
);
10894 ASSERT(enab
->dten_vstate
!= NULL
);
10896 state
= enab
->dten_vstate
->dtvs_state
;
10897 ASSERT(state
!= NULL
);
10900 * We only allow each state to retain dtrace_retain_max enablings.
10902 if (state
->dts_nretained
>= dtrace_retain_max
)
10905 state
->dts_nretained
++;
10906 dtrace_retained_gen
++;
10908 if (dtrace_retained
== NULL
) {
10909 dtrace_retained
= enab
;
10913 enab
->dten_next
= dtrace_retained
;
10914 dtrace_retained
->dten_prev
= enab
;
10915 dtrace_retained
= enab
;
10921 dtrace_enabling_replicate(dtrace_state_t
*state
, dtrace_probedesc_t
*match
,
10922 dtrace_probedesc_t
*create
)
10924 dtrace_enabling_t
*new, *enab
;
10925 int found
= 0, err
= ENOENT
;
10927 ASSERT(MUTEX_HELD(&dtrace_lock
));
10928 ASSERT(strlen(match
->dtpd_provider
) < DTRACE_PROVNAMELEN
);
10929 ASSERT(strlen(match
->dtpd_mod
) < DTRACE_MODNAMELEN
);
10930 ASSERT(strlen(match
->dtpd_func
) < DTRACE_FUNCNAMELEN
);
10931 ASSERT(strlen(match
->dtpd_name
) < DTRACE_NAMELEN
);
10933 new = dtrace_enabling_create(&state
->dts_vstate
);
10936 * Iterate over all retained enablings, looking for enablings that
10937 * match the specified state.
10939 for (enab
= dtrace_retained
; enab
!= NULL
; enab
= enab
->dten_next
) {
10943 * dtvs_state can only be NULL for helper enablings -- and
10944 * helper enablings can't be retained.
10946 ASSERT(enab
->dten_vstate
->dtvs_state
!= NULL
);
10948 if (enab
->dten_vstate
->dtvs_state
!= state
)
10952 * Now iterate over each probe description; we're looking for
10953 * an exact match to the specified probe description.
10955 for (i
= 0; i
< enab
->dten_ndesc
; i
++) {
10956 dtrace_ecbdesc_t
*ep
= enab
->dten_desc
[i
];
10957 dtrace_probedesc_t
*pd
= &ep
->dted_probe
;
10959 if (strcmp(pd
->dtpd_provider
, match
->dtpd_provider
))
10962 if (strcmp(pd
->dtpd_mod
, match
->dtpd_mod
))
10965 if (strcmp(pd
->dtpd_func
, match
->dtpd_func
))
10968 if (strcmp(pd
->dtpd_name
, match
->dtpd_name
))
10972 * We have a winning probe! Add it to our growing
10976 dtrace_enabling_addlike(new, ep
, create
);
10980 if (!found
|| (err
= dtrace_enabling_retain(new)) != 0) {
10981 dtrace_enabling_destroy(new);
10989 dtrace_enabling_retract(dtrace_state_t
*state
)
10991 dtrace_enabling_t
*enab
, *next
;
10993 ASSERT(MUTEX_HELD(&dtrace_lock
));
10996 * Iterate over all retained enablings, destroy the enablings retained
10997 * for the specified state.
10999 for (enab
= dtrace_retained
; enab
!= NULL
; enab
= next
) {
11000 next
= enab
->dten_next
;
11003 * dtvs_state can only be NULL for helper enablings -- and
11004 * helper enablings can't be retained.
11006 ASSERT(enab
->dten_vstate
->dtvs_state
!= NULL
);
11008 if (enab
->dten_vstate
->dtvs_state
== state
) {
11009 ASSERT(state
->dts_nretained
> 0);
11010 dtrace_enabling_destroy(enab
);
11014 ASSERT(state
->dts_nretained
== 0);
11018 dtrace_enabling_match(dtrace_enabling_t
*enab
, int *nmatched
)
11021 int total_matched
= 0, matched
= 0;
11023 ASSERT(MUTEX_HELD(&cpu_lock
));
11024 ASSERT(MUTEX_HELD(&dtrace_lock
));
11026 for (i
= 0; i
< enab
->dten_ndesc
; i
++) {
11027 dtrace_ecbdesc_t
*ep
= enab
->dten_desc
[i
];
11029 enab
->dten_current
= ep
;
11030 enab
->dten_error
= 0;
11033 * If a provider failed to enable a probe then get out and
11034 * let the consumer know we failed.
11036 if ((matched
= dtrace_probe_enable(&ep
->dted_probe
, enab
)) < 0)
11039 total_matched
+= matched
;
11041 if (enab
->dten_error
!= 0) {
11043 * If we get an error half-way through enabling the
11044 * probes, we kick out -- perhaps with some number of
11045 * them enabled. Leaving enabled probes enabled may
11046 * be slightly confusing for user-level, but we expect
11047 * that no one will attempt to actually drive on in
11048 * the face of such errors. If this is an anonymous
11049 * enabling (indicated with a NULL nmatched pointer),
11050 * we cmn_err() a message. We aren't expecting to
11051 * get such an error -- such as it can exist at all,
11052 * it would be a result of corrupted DOF in the driver
11055 if (nmatched
== NULL
) {
11056 cmn_err(CE_WARN
, "dtrace_enabling_match() "
11057 "error on %p: %d", (void *)ep
,
11061 return (enab
->dten_error
);
11065 enab
->dten_probegen
= dtrace_probegen
;
11066 if (nmatched
!= NULL
)
11067 *nmatched
= total_matched
;
11073 dtrace_enabling_matchall(void)
11075 dtrace_enabling_t
*enab
;
11077 mutex_enter(&cpu_lock
);
11078 mutex_enter(&dtrace_lock
);
11081 * Iterate over all retained enablings to see if any probes match
11082 * against them. We only perform this operation on enablings for which
11083 * we have sufficient permissions by virtue of being in the global zone
11084 * or in the same zone as the DTrace client. Because we can be called
11085 * after dtrace_detach() has been called, we cannot assert that there
11086 * are retained enablings. We can safely load from dtrace_retained,
11087 * however: the taskq_destroy() at the end of dtrace_detach() will
11088 * block pending our completion.
11090 for (enab
= dtrace_retained
; enab
!= NULL
; enab
= enab
->dten_next
) {
11091 dtrace_cred_t
*dcr
= &enab
->dten_vstate
->dtvs_state
->dts_cred
;
11092 cred_t
*cr
= dcr
->dcr_cred
;
11093 zoneid_t zone
= cr
!= NULL
? crgetzoneid(cr
) : 0;
11095 if ((dcr
->dcr_visible
& DTRACE_CRV_ALLZONE
) || (cr
!= NULL
&&
11096 (zone
== GLOBAL_ZONEID
|| getzoneid() == zone
)))
11097 (void) dtrace_enabling_match(enab
, NULL
);
11100 mutex_exit(&dtrace_lock
);
11101 mutex_exit(&cpu_lock
);
11105 * If an enabling is to be enabled without having matched probes (that is, if
11106 * dtrace_state_go() is to be called on the underlying dtrace_state_t), the
11107 * enabling must be _primed_ by creating an ECB for every ECB description.
11108 * This must be done to assure that we know the number of speculations, the
11109 * number of aggregations, the minimum buffer size needed, etc. before we
11110 * transition out of DTRACE_ACTIVITY_INACTIVE. To do this without actually
11111 * enabling any probes, we create ECBs for every ECB decription, but with a
11112 * NULL probe -- which is exactly what this function does.
11115 dtrace_enabling_prime(dtrace_state_t
*state
)
11117 dtrace_enabling_t
*enab
;
11120 for (enab
= dtrace_retained
; enab
!= NULL
; enab
= enab
->dten_next
) {
11121 ASSERT(enab
->dten_vstate
->dtvs_state
!= NULL
);
11123 if (enab
->dten_vstate
->dtvs_state
!= state
)
11127 * We don't want to prime an enabling more than once, lest
11128 * we allow a malicious user to induce resource exhaustion.
11129 * (The ECBs that result from priming an enabling aren't
11130 * leaked -- but they also aren't deallocated until the
11131 * consumer state is destroyed.)
11133 if (enab
->dten_primed
)
11136 for (i
= 0; i
< enab
->dten_ndesc
; i
++) {
11137 enab
->dten_current
= enab
->dten_desc
[i
];
11138 (void) dtrace_probe_enable(NULL
, enab
);
11141 enab
->dten_primed
= 1;
11146 * Called to indicate that probes should be provided due to retained
11147 * enablings. This is implemented in terms of dtrace_probe_provide(), but it
11148 * must take an initial lap through the enabling calling the dtps_provide()
11149 * entry point explicitly to allow for autocreated probes.
11152 dtrace_enabling_provide(dtrace_provider_t
*prv
)
11155 dtrace_probedesc_t desc
;
11156 dtrace_genid_t gen
;
11158 ASSERT(MUTEX_HELD(&dtrace_lock
));
11159 ASSERT(MUTEX_HELD(&dtrace_provider_lock
));
11163 prv
= dtrace_provider
;
11167 dtrace_enabling_t
*enab
;
11168 void *parg
= prv
->dtpv_arg
;
11171 gen
= dtrace_retained_gen
;
11172 for (enab
= dtrace_retained
; enab
!= NULL
;
11173 enab
= enab
->dten_next
) {
11174 for (i
= 0; i
< enab
->dten_ndesc
; i
++) {
11175 desc
= enab
->dten_desc
[i
]->dted_probe
;
11176 mutex_exit(&dtrace_lock
);
11177 prv
->dtpv_pops
.dtps_provide(parg
, &desc
);
11178 mutex_enter(&dtrace_lock
);
11180 * Process the retained enablings again if
11181 * they have changed while we weren't holding
11184 if (gen
!= dtrace_retained_gen
)
11188 } while (all
&& (prv
= prv
->dtpv_next
) != NULL
);
11190 mutex_exit(&dtrace_lock
);
11191 dtrace_probe_provide(NULL
, all
? NULL
: prv
);
11192 mutex_enter(&dtrace_lock
);
11196 * Called to reap ECBs that are attached to probes from defunct providers.
11199 dtrace_enabling_reap(void)
11201 dtrace_provider_t
*prov
;
11202 dtrace_probe_t
*probe
;
11207 mutex_enter(&cpu_lock
);
11208 mutex_enter(&dtrace_lock
);
11210 for (i
= 0; i
< dtrace_nprobes
; i
++) {
11211 if ((probe
= dtrace_probes
[i
]) == NULL
)
11214 if (probe
->dtpr_ecb
== NULL
)
11217 prov
= probe
->dtpr_provider
;
11219 if ((when
= prov
->dtpv_defunct
) == 0)
11223 * We have ECBs on a defunct provider: we want to reap these
11224 * ECBs to allow the provider to unregister. The destruction
11225 * of these ECBs must be done carefully: if we destroy the ECB
11226 * and the consumer later wishes to consume an EPID that
11227 * corresponds to the destroyed ECB (and if the EPID metadata
11228 * has not been previously consumed), the consumer will abort
11229 * processing on the unknown EPID. To reduce (but not, sadly,
11230 * eliminate) the possibility of this, we will only destroy an
11231 * ECB for a defunct provider if, for the state that
11232 * corresponds to the ECB:
11234 * (a) There is no speculative tracing (which can effectively
11235 * cache an EPID for an arbitrary amount of time).
11237 * (b) The principal buffers have been switched twice since the
11238 * provider became defunct.
11240 * (c) The aggregation buffers are of zero size or have been
11241 * switched twice since the provider became defunct.
11243 * We use dts_speculates to determine (a) and call a function
11244 * (dtrace_buffer_consumed()) to determine (b) and (c). Note
11245 * that as soon as we've been unable to destroy one of the ECBs
11246 * associated with the probe, we quit trying -- reaping is only
11247 * fruitful in as much as we can destroy all ECBs associated
11248 * with the defunct provider's probes.
11250 while ((ecb
= probe
->dtpr_ecb
) != NULL
) {
11251 dtrace_state_t
*state
= ecb
->dte_state
;
11252 dtrace_buffer_t
*buf
= state
->dts_buffer
;
11253 dtrace_buffer_t
*aggbuf
= state
->dts_aggbuffer
;
11255 if (state
->dts_speculates
)
11258 if (!dtrace_buffer_consumed(buf
, when
))
11261 if (!dtrace_buffer_consumed(aggbuf
, when
))
11264 dtrace_ecb_disable(ecb
);
11265 ASSERT(probe
->dtpr_ecb
!= ecb
);
11266 dtrace_ecb_destroy(ecb
);
11270 mutex_exit(&dtrace_lock
);
11271 mutex_exit(&cpu_lock
);
11275 * DTrace DOF Functions
11279 dtrace_dof_error(dof_hdr_t
*dof
, const char *str
)
11281 if (dtrace_err_verbose
)
11282 cmn_err(CE_WARN
, "failed to process DOF: %s", str
);
11284 #ifdef DTRACE_ERRDEBUG
11285 dtrace_errdebug(str
);
11290 * Create DOF out of a currently enabled state. Right now, we only create
11291 * DOF containing the run-time options -- but this could be expanded to create
11292 * complete DOF representing the enabled state.
11295 dtrace_dof_create(dtrace_state_t
*state
)
11299 dof_optdesc_t
*opt
;
11300 int i
, len
= sizeof (dof_hdr_t
) +
11301 roundup(sizeof (dof_sec_t
), sizeof (uint64_t)) +
11302 sizeof (dof_optdesc_t
) * DTRACEOPT_MAX
;
11304 ASSERT(MUTEX_HELD(&dtrace_lock
));
11306 dof
= kmem_zalloc(len
, KM_SLEEP
);
11307 dof
->dofh_ident
[DOF_ID_MAG0
] = DOF_MAG_MAG0
;
11308 dof
->dofh_ident
[DOF_ID_MAG1
] = DOF_MAG_MAG1
;
11309 dof
->dofh_ident
[DOF_ID_MAG2
] = DOF_MAG_MAG2
;
11310 dof
->dofh_ident
[DOF_ID_MAG3
] = DOF_MAG_MAG3
;
11312 dof
->dofh_ident
[DOF_ID_MODEL
] = DOF_MODEL_NATIVE
;
11313 dof
->dofh_ident
[DOF_ID_ENCODING
] = DOF_ENCODE_NATIVE
;
11314 dof
->dofh_ident
[DOF_ID_VERSION
] = DOF_VERSION
;
11315 dof
->dofh_ident
[DOF_ID_DIFVERS
] = DIF_VERSION
;
11316 dof
->dofh_ident
[DOF_ID_DIFIREG
] = DIF_DIR_NREGS
;
11317 dof
->dofh_ident
[DOF_ID_DIFTREG
] = DIF_DTR_NREGS
;
11319 dof
->dofh_flags
= 0;
11320 dof
->dofh_hdrsize
= sizeof (dof_hdr_t
);
11321 dof
->dofh_secsize
= sizeof (dof_sec_t
);
11322 dof
->dofh_secnum
= 1; /* only DOF_SECT_OPTDESC */
11323 dof
->dofh_secoff
= sizeof (dof_hdr_t
);
11324 dof
->dofh_loadsz
= len
;
11325 dof
->dofh_filesz
= len
;
11329 * Fill in the option section header...
11331 sec
= (dof_sec_t
*)((uintptr_t)dof
+ sizeof (dof_hdr_t
));
11332 sec
->dofs_type
= DOF_SECT_OPTDESC
;
11333 sec
->dofs_align
= sizeof (uint64_t);
11334 sec
->dofs_flags
= DOF_SECF_LOAD
;
11335 sec
->dofs_entsize
= sizeof (dof_optdesc_t
);
11337 opt
= (dof_optdesc_t
*)((uintptr_t)sec
+
11338 roundup(sizeof (dof_sec_t
), sizeof (uint64_t)));
11340 sec
->dofs_offset
= (uintptr_t)opt
- (uintptr_t)dof
;
11341 sec
->dofs_size
= sizeof (dof_optdesc_t
) * DTRACEOPT_MAX
;
11343 for (i
= 0; i
< DTRACEOPT_MAX
; i
++) {
11344 opt
[i
].dofo_option
= i
;
11345 opt
[i
].dofo_strtab
= DOF_SECIDX_NONE
;
11346 opt
[i
].dofo_value
= state
->dts_options
[i
];
11353 dtrace_dof_copyin(uintptr_t uarg
, int *errp
)
11355 dof_hdr_t hdr
, *dof
;
11357 ASSERT(!MUTEX_HELD(&dtrace_lock
));
11360 * First, we're going to copyin() the sizeof (dof_hdr_t).
11362 if (copyin((void *)uarg
, &hdr
, sizeof (hdr
)) != 0) {
11363 dtrace_dof_error(NULL
, "failed to copyin DOF header");
11369 * Now we'll allocate the entire DOF and copy it in -- provided
11370 * that the length isn't outrageous.
11372 if (hdr
.dofh_loadsz
>= dtrace_dof_maxsize
) {
11373 dtrace_dof_error(&hdr
, "load size exceeds maximum");
11378 if (hdr
.dofh_loadsz
< sizeof (hdr
)) {
11379 dtrace_dof_error(&hdr
, "invalid load size");
11384 dof
= kmem_alloc(hdr
.dofh_loadsz
, KM_SLEEP
);
11386 if (copyin((void *)uarg
, dof
, hdr
.dofh_loadsz
) != 0 ||
11387 dof
->dofh_loadsz
!= hdr
.dofh_loadsz
) {
11388 kmem_free(dof
, hdr
.dofh_loadsz
);
11397 dtrace_dof_property(const char *name
)
11401 unsigned int len
, i
;
11405 * Unfortunately, array of values in .conf files are always (and
11406 * only) interpreted to be integer arrays. We must read our DOF
11407 * as an integer array, and then squeeze it into a byte array.
11409 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY
, dtrace_devi
, 0,
11410 (char *)name
, (int **)&buf
, &len
) != DDI_PROP_SUCCESS
)
11413 for (i
= 0; i
< len
; i
++)
11414 buf
[i
] = (uchar_t
)(((int *)buf
)[i
]);
11416 if (len
< sizeof (dof_hdr_t
)) {
11417 ddi_prop_free(buf
);
11418 dtrace_dof_error(NULL
, "truncated header");
11422 if (len
< (loadsz
= ((dof_hdr_t
*)buf
)->dofh_loadsz
)) {
11423 ddi_prop_free(buf
);
11424 dtrace_dof_error(NULL
, "truncated DOF");
11428 if (loadsz
>= dtrace_dof_maxsize
) {
11429 ddi_prop_free(buf
);
11430 dtrace_dof_error(NULL
, "oversized DOF");
11434 dof
= kmem_alloc(loadsz
, KM_SLEEP
);
11435 bcopy(buf
, dof
, loadsz
);
11436 ddi_prop_free(buf
);
11442 dtrace_dof_destroy(dof_hdr_t
*dof
)
11444 kmem_free(dof
, dof
->dofh_loadsz
);
11448 * Return the dof_sec_t pointer corresponding to a given section index. If the
11449 * index is not valid, dtrace_dof_error() is called and NULL is returned. If
11450 * a type other than DOF_SECT_NONE is specified, the header is checked against
11451 * this type and NULL is returned if the types do not match.
11454 dtrace_dof_sect(dof_hdr_t
*dof
, uint32_t type
, dof_secidx_t i
)
11456 dof_sec_t
*sec
= (dof_sec_t
*)(uintptr_t)
11457 ((uintptr_t)dof
+ dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
11459 if (i
>= dof
->dofh_secnum
) {
11460 dtrace_dof_error(dof
, "referenced section index is invalid");
11464 if (!(sec
->dofs_flags
& DOF_SECF_LOAD
)) {
11465 dtrace_dof_error(dof
, "referenced section is not loadable");
11469 if (type
!= DOF_SECT_NONE
&& type
!= sec
->dofs_type
) {
11470 dtrace_dof_error(dof
, "referenced section is the wrong type");
11477 static dtrace_probedesc_t
*
11478 dtrace_dof_probedesc(dof_hdr_t
*dof
, dof_sec_t
*sec
, dtrace_probedesc_t
*desc
)
11480 dof_probedesc_t
*probe
;
11482 uintptr_t daddr
= (uintptr_t)dof
;
11486 if (sec
->dofs_type
!= DOF_SECT_PROBEDESC
) {
11487 dtrace_dof_error(dof
, "invalid probe section");
11491 if (sec
->dofs_align
!= sizeof (dof_secidx_t
)) {
11492 dtrace_dof_error(dof
, "bad alignment in probe description");
11496 if (sec
->dofs_offset
+ sizeof (dof_probedesc_t
) > dof
->dofh_loadsz
) {
11497 dtrace_dof_error(dof
, "truncated probe description");
11501 probe
= (dof_probedesc_t
*)(uintptr_t)(daddr
+ sec
->dofs_offset
);
11502 strtab
= dtrace_dof_sect(dof
, DOF_SECT_STRTAB
, probe
->dofp_strtab
);
11504 if (strtab
== NULL
)
11507 str
= daddr
+ strtab
->dofs_offset
;
11508 size
= strtab
->dofs_size
;
11510 if (probe
->dofp_provider
>= strtab
->dofs_size
) {
11511 dtrace_dof_error(dof
, "corrupt probe provider");
11515 (void) strncpy(desc
->dtpd_provider
,
11516 (char *)(str
+ probe
->dofp_provider
),
11517 MIN(DTRACE_PROVNAMELEN
- 1, size
- probe
->dofp_provider
));
11519 if (probe
->dofp_mod
>= strtab
->dofs_size
) {
11520 dtrace_dof_error(dof
, "corrupt probe module");
11524 (void) strncpy(desc
->dtpd_mod
, (char *)(str
+ probe
->dofp_mod
),
11525 MIN(DTRACE_MODNAMELEN
- 1, size
- probe
->dofp_mod
));
11527 if (probe
->dofp_func
>= strtab
->dofs_size
) {
11528 dtrace_dof_error(dof
, "corrupt probe function");
11532 (void) strncpy(desc
->dtpd_func
, (char *)(str
+ probe
->dofp_func
),
11533 MIN(DTRACE_FUNCNAMELEN
- 1, size
- probe
->dofp_func
));
11535 if (probe
->dofp_name
>= strtab
->dofs_size
) {
11536 dtrace_dof_error(dof
, "corrupt probe name");
11540 (void) strncpy(desc
->dtpd_name
, (char *)(str
+ probe
->dofp_name
),
11541 MIN(DTRACE_NAMELEN
- 1, size
- probe
->dofp_name
));
11546 static dtrace_difo_t
*
11547 dtrace_dof_difo(dof_hdr_t
*dof
, dof_sec_t
*sec
, dtrace_vstate_t
*vstate
,
11552 dof_difohdr_t
*dofd
;
11553 uintptr_t daddr
= (uintptr_t)dof
;
11554 size_t max
= dtrace_difo_maxsize
;
11557 static const struct {
11565 { DOF_SECT_DIF
, offsetof(dtrace_difo_t
, dtdo_buf
),
11566 offsetof(dtrace_difo_t
, dtdo_len
), sizeof (dif_instr_t
),
11567 sizeof (dif_instr_t
), "multiple DIF sections" },
11569 { DOF_SECT_INTTAB
, offsetof(dtrace_difo_t
, dtdo_inttab
),
11570 offsetof(dtrace_difo_t
, dtdo_intlen
), sizeof (uint64_t),
11571 sizeof (uint64_t), "multiple integer tables" },
11573 { DOF_SECT_STRTAB
, offsetof(dtrace_difo_t
, dtdo_strtab
),
11574 offsetof(dtrace_difo_t
, dtdo_strlen
), 0,
11575 sizeof (char), "multiple string tables" },
11577 { DOF_SECT_VARTAB
, offsetof(dtrace_difo_t
, dtdo_vartab
),
11578 offsetof(dtrace_difo_t
, dtdo_varlen
), sizeof (dtrace_difv_t
),
11579 sizeof (uint_t
), "multiple variable tables" },
11581 { DOF_SECT_NONE
, 0, 0, 0, NULL
}
11584 if (sec
->dofs_type
!= DOF_SECT_DIFOHDR
) {
11585 dtrace_dof_error(dof
, "invalid DIFO header section");
11589 if (sec
->dofs_align
!= sizeof (dof_secidx_t
)) {
11590 dtrace_dof_error(dof
, "bad alignment in DIFO header");
11594 if (sec
->dofs_size
< sizeof (dof_difohdr_t
) ||
11595 sec
->dofs_size
% sizeof (dof_secidx_t
)) {
11596 dtrace_dof_error(dof
, "bad size in DIFO header");
11600 dofd
= (dof_difohdr_t
*)(uintptr_t)(daddr
+ sec
->dofs_offset
);
11601 n
= (sec
->dofs_size
- sizeof (*dofd
)) / sizeof (dof_secidx_t
) + 1;
11603 dp
= kmem_zalloc(sizeof (dtrace_difo_t
), KM_SLEEP
);
11604 dp
->dtdo_rtype
= dofd
->dofd_rtype
;
11606 for (l
= 0; l
< n
; l
++) {
11611 if ((subsec
= dtrace_dof_sect(dof
, DOF_SECT_NONE
,
11612 dofd
->dofd_links
[l
])) == NULL
)
11613 goto err
; /* invalid section link */
11615 if (ttl
+ subsec
->dofs_size
> max
) {
11616 dtrace_dof_error(dof
, "exceeds maximum size");
11620 ttl
+= subsec
->dofs_size
;
11622 for (i
= 0; difo
[i
].section
!= DOF_SECT_NONE
; i
++) {
11623 if (subsec
->dofs_type
!= difo
[i
].section
)
11626 if (!(subsec
->dofs_flags
& DOF_SECF_LOAD
)) {
11627 dtrace_dof_error(dof
, "section not loaded");
11631 if (subsec
->dofs_align
!= difo
[i
].align
) {
11632 dtrace_dof_error(dof
, "bad alignment");
11636 bufp
= (void **)((uintptr_t)dp
+ difo
[i
].bufoffs
);
11637 lenp
= (uint32_t *)((uintptr_t)dp
+ difo
[i
].lenoffs
);
11639 if (*bufp
!= NULL
) {
11640 dtrace_dof_error(dof
, difo
[i
].msg
);
11644 if (difo
[i
].entsize
!= subsec
->dofs_entsize
) {
11645 dtrace_dof_error(dof
, "entry size mismatch");
11649 if (subsec
->dofs_entsize
!= 0 &&
11650 (subsec
->dofs_size
% subsec
->dofs_entsize
) != 0) {
11651 dtrace_dof_error(dof
, "corrupt entry size");
11655 *lenp
= subsec
->dofs_size
;
11656 *bufp
= kmem_alloc(subsec
->dofs_size
, KM_SLEEP
);
11657 bcopy((char *)(uintptr_t)(daddr
+ subsec
->dofs_offset
),
11658 *bufp
, subsec
->dofs_size
);
11660 if (subsec
->dofs_entsize
!= 0)
11661 *lenp
/= subsec
->dofs_entsize
;
11667 * If we encounter a loadable DIFO sub-section that is not
11668 * known to us, assume this is a broken program and fail.
11670 if (difo
[i
].section
== DOF_SECT_NONE
&&
11671 (subsec
->dofs_flags
& DOF_SECF_LOAD
)) {
11672 dtrace_dof_error(dof
, "unrecognized DIFO subsection");
11677 if (dp
->dtdo_buf
== NULL
) {
11679 * We can't have a DIF object without DIF text.
11681 dtrace_dof_error(dof
, "missing DIF text");
11686 * Before we validate the DIF object, run through the variable table
11687 * looking for the strings -- if any of their size are under, we'll set
11688 * their size to be the system-wide default string size. Note that
11689 * this should _not_ happen if the "strsize" option has been set --
11690 * in this case, the compiler should have set the size to reflect the
11691 * setting of the option.
11693 for (i
= 0; i
< dp
->dtdo_varlen
; i
++) {
11694 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
];
11695 dtrace_diftype_t
*t
= &v
->dtdv_type
;
11697 if (v
->dtdv_id
< DIF_VAR_OTHER_UBASE
)
11700 if (t
->dtdt_kind
== DIF_TYPE_STRING
&& t
->dtdt_size
== 0)
11701 t
->dtdt_size
= dtrace_strsize_default
;
11704 if (dtrace_difo_validate(dp
, vstate
, DIF_DIR_NREGS
, cr
) != 0)
11707 dtrace_difo_init(dp
, vstate
);
11711 kmem_free(dp
->dtdo_buf
, dp
->dtdo_len
* sizeof (dif_instr_t
));
11712 kmem_free(dp
->dtdo_inttab
, dp
->dtdo_intlen
* sizeof (uint64_t));
11713 kmem_free(dp
->dtdo_strtab
, dp
->dtdo_strlen
);
11714 kmem_free(dp
->dtdo_vartab
, dp
->dtdo_varlen
* sizeof (dtrace_difv_t
));
11716 kmem_free(dp
, sizeof (dtrace_difo_t
));
11720 static dtrace_predicate_t
*
11721 dtrace_dof_predicate(dof_hdr_t
*dof
, dof_sec_t
*sec
, dtrace_vstate_t
*vstate
,
11726 if ((dp
= dtrace_dof_difo(dof
, sec
, vstate
, cr
)) == NULL
)
11729 return (dtrace_predicate_create(dp
));
11732 static dtrace_actdesc_t
*
11733 dtrace_dof_actdesc(dof_hdr_t
*dof
, dof_sec_t
*sec
, dtrace_vstate_t
*vstate
,
11736 dtrace_actdesc_t
*act
, *first
= NULL
, *last
= NULL
, *next
;
11737 dof_actdesc_t
*desc
;
11738 dof_sec_t
*difosec
;
11740 uintptr_t daddr
= (uintptr_t)dof
;
11742 dtrace_actkind_t kind
;
11744 if (sec
->dofs_type
!= DOF_SECT_ACTDESC
) {
11745 dtrace_dof_error(dof
, "invalid action section");
11749 if (sec
->dofs_offset
+ sizeof (dof_actdesc_t
) > dof
->dofh_loadsz
) {
11750 dtrace_dof_error(dof
, "truncated action description");
11754 if (sec
->dofs_align
!= sizeof (uint64_t)) {
11755 dtrace_dof_error(dof
, "bad alignment in action description");
11759 if (sec
->dofs_size
< sec
->dofs_entsize
) {
11760 dtrace_dof_error(dof
, "section entry size exceeds total size");
11764 if (sec
->dofs_entsize
!= sizeof (dof_actdesc_t
)) {
11765 dtrace_dof_error(dof
, "bad entry size in action description");
11769 if (sec
->dofs_size
/ sec
->dofs_entsize
> dtrace_actions_max
) {
11770 dtrace_dof_error(dof
, "actions exceed dtrace_actions_max");
11774 for (offs
= 0; offs
< sec
->dofs_size
; offs
+= sec
->dofs_entsize
) {
11775 desc
= (dof_actdesc_t
*)(daddr
+
11776 (uintptr_t)sec
->dofs_offset
+ offs
);
11777 kind
= (dtrace_actkind_t
)desc
->dofa_kind
;
11779 if (DTRACEACT_ISPRINTFLIKE(kind
) &&
11780 (kind
!= DTRACEACT_PRINTA
||
11781 desc
->dofa_strtab
!= DOF_SECIDX_NONE
)) {
11787 * printf()-like actions must have a format string.
11789 if ((strtab
= dtrace_dof_sect(dof
,
11790 DOF_SECT_STRTAB
, desc
->dofa_strtab
)) == NULL
)
11793 str
= (char *)((uintptr_t)dof
+
11794 (uintptr_t)strtab
->dofs_offset
);
11796 for (i
= desc
->dofa_arg
; i
< strtab
->dofs_size
; i
++) {
11797 if (str
[i
] == '\0')
11801 if (i
>= strtab
->dofs_size
) {
11802 dtrace_dof_error(dof
, "bogus format string");
11806 if (i
== desc
->dofa_arg
) {
11807 dtrace_dof_error(dof
, "empty format string");
11811 i
-= desc
->dofa_arg
;
11812 fmt
= kmem_alloc(i
+ 1, KM_SLEEP
);
11813 bcopy(&str
[desc
->dofa_arg
], fmt
, i
+ 1);
11814 arg
= (uint64_t)(uintptr_t)fmt
;
11816 if (kind
== DTRACEACT_PRINTA
) {
11817 ASSERT(desc
->dofa_strtab
== DOF_SECIDX_NONE
);
11820 arg
= desc
->dofa_arg
;
11824 act
= dtrace_actdesc_create(kind
, desc
->dofa_ntuple
,
11825 desc
->dofa_uarg
, arg
);
11827 if (last
!= NULL
) {
11828 last
->dtad_next
= act
;
11835 if (desc
->dofa_difo
== DOF_SECIDX_NONE
)
11838 if ((difosec
= dtrace_dof_sect(dof
,
11839 DOF_SECT_DIFOHDR
, desc
->dofa_difo
)) == NULL
)
11842 act
->dtad_difo
= dtrace_dof_difo(dof
, difosec
, vstate
, cr
);
11844 if (act
->dtad_difo
== NULL
)
11848 ASSERT(first
!= NULL
);
11852 for (act
= first
; act
!= NULL
; act
= next
) {
11853 next
= act
->dtad_next
;
11854 dtrace_actdesc_release(act
, vstate
);
11860 static dtrace_ecbdesc_t
*
11861 dtrace_dof_ecbdesc(dof_hdr_t
*dof
, dof_sec_t
*sec
, dtrace_vstate_t
*vstate
,
11864 dtrace_ecbdesc_t
*ep
;
11865 dof_ecbdesc_t
*ecb
;
11866 dtrace_probedesc_t
*desc
;
11867 dtrace_predicate_t
*pred
= NULL
;
11869 if (sec
->dofs_size
< sizeof (dof_ecbdesc_t
)) {
11870 dtrace_dof_error(dof
, "truncated ECB description");
11874 if (sec
->dofs_align
!= sizeof (uint64_t)) {
11875 dtrace_dof_error(dof
, "bad alignment in ECB description");
11879 ecb
= (dof_ecbdesc_t
*)((uintptr_t)dof
+ (uintptr_t)sec
->dofs_offset
);
11880 sec
= dtrace_dof_sect(dof
, DOF_SECT_PROBEDESC
, ecb
->dofe_probes
);
11885 ep
= kmem_zalloc(sizeof (dtrace_ecbdesc_t
), KM_SLEEP
);
11886 ep
->dted_uarg
= ecb
->dofe_uarg
;
11887 desc
= &ep
->dted_probe
;
11889 if (dtrace_dof_probedesc(dof
, sec
, desc
) == NULL
)
11892 if (ecb
->dofe_pred
!= DOF_SECIDX_NONE
) {
11893 if ((sec
= dtrace_dof_sect(dof
,
11894 DOF_SECT_DIFOHDR
, ecb
->dofe_pred
)) == NULL
)
11897 if ((pred
= dtrace_dof_predicate(dof
, sec
, vstate
, cr
)) == NULL
)
11900 ep
->dted_pred
.dtpdd_predicate
= pred
;
11903 if (ecb
->dofe_actions
!= DOF_SECIDX_NONE
) {
11904 if ((sec
= dtrace_dof_sect(dof
,
11905 DOF_SECT_ACTDESC
, ecb
->dofe_actions
)) == NULL
)
11908 ep
->dted_action
= dtrace_dof_actdesc(dof
, sec
, vstate
, cr
);
11910 if (ep
->dted_action
== NULL
)
11918 dtrace_predicate_release(pred
, vstate
);
11919 kmem_free(ep
, sizeof (dtrace_ecbdesc_t
));
11924 * Apply the relocations from the specified 'sec' (a DOF_SECT_URELHDR) to the
11925 * specified DOF. At present, this amounts to simply adding 'ubase' to the
11926 * site of any user SETX relocations to account for load object base address.
11927 * In the future, if we need other relocations, this function can be extended.
11930 dtrace_dof_relocate(dof_hdr_t
*dof
, dof_sec_t
*sec
, uint64_t ubase
)
11932 uintptr_t daddr
= (uintptr_t)dof
;
11933 dof_relohdr_t
*dofr
=
11934 (dof_relohdr_t
*)(uintptr_t)(daddr
+ sec
->dofs_offset
);
11935 dof_sec_t
*ss
, *rs
, *ts
;
11939 if (sec
->dofs_size
< sizeof (dof_relohdr_t
) ||
11940 sec
->dofs_align
!= sizeof (dof_secidx_t
)) {
11941 dtrace_dof_error(dof
, "invalid relocation header");
11945 ss
= dtrace_dof_sect(dof
, DOF_SECT_STRTAB
, dofr
->dofr_strtab
);
11946 rs
= dtrace_dof_sect(dof
, DOF_SECT_RELTAB
, dofr
->dofr_relsec
);
11947 ts
= dtrace_dof_sect(dof
, DOF_SECT_NONE
, dofr
->dofr_tgtsec
);
11949 if (ss
== NULL
|| rs
== NULL
|| ts
== NULL
)
11950 return (-1); /* dtrace_dof_error() has been called already */
11952 if (rs
->dofs_entsize
< sizeof (dof_relodesc_t
) ||
11953 rs
->dofs_align
!= sizeof (uint64_t)) {
11954 dtrace_dof_error(dof
, "invalid relocation section");
11958 r
= (dof_relodesc_t
*)(uintptr_t)(daddr
+ rs
->dofs_offset
);
11959 n
= rs
->dofs_size
/ rs
->dofs_entsize
;
11961 for (i
= 0; i
< n
; i
++) {
11962 uintptr_t taddr
= daddr
+ ts
->dofs_offset
+ r
->dofr_offset
;
11964 switch (r
->dofr_type
) {
11965 case DOF_RELO_NONE
:
11967 case DOF_RELO_SETX
:
11968 if (r
->dofr_offset
>= ts
->dofs_size
|| r
->dofr_offset
+
11969 sizeof (uint64_t) > ts
->dofs_size
) {
11970 dtrace_dof_error(dof
, "bad relocation offset");
11974 if (!IS_P2ALIGNED(taddr
, sizeof (uint64_t))) {
11975 dtrace_dof_error(dof
, "misaligned setx relo");
11979 *(uint64_t *)taddr
+= ubase
;
11982 dtrace_dof_error(dof
, "invalid relocation type");
11986 r
= (dof_relodesc_t
*)((uintptr_t)r
+ rs
->dofs_entsize
);
11993 * The dof_hdr_t passed to dtrace_dof_slurp() should be a partially validated
11994 * header: it should be at the front of a memory region that is at least
11995 * sizeof (dof_hdr_t) in size -- and then at least dof_hdr.dofh_loadsz in
11996 * size. It need not be validated in any other way.
11999 dtrace_dof_slurp(dof_hdr_t
*dof
, dtrace_vstate_t
*vstate
, cred_t
*cr
,
12000 dtrace_enabling_t
**enabp
, uint64_t ubase
, int noprobes
)
12002 uint64_t len
= dof
->dofh_loadsz
, seclen
;
12003 uintptr_t daddr
= (uintptr_t)dof
;
12004 dtrace_ecbdesc_t
*ep
;
12005 dtrace_enabling_t
*enab
;
12008 ASSERT(MUTEX_HELD(&dtrace_lock
));
12009 ASSERT(dof
->dofh_loadsz
>= sizeof (dof_hdr_t
));
12012 * Check the DOF header identification bytes. In addition to checking
12013 * valid settings, we also verify that unused bits/bytes are zeroed so
12014 * we can use them later without fear of regressing existing binaries.
12016 if (bcmp(&dof
->dofh_ident
[DOF_ID_MAG0
],
12017 DOF_MAG_STRING
, DOF_MAG_STRLEN
) != 0) {
12018 dtrace_dof_error(dof
, "DOF magic string mismatch");
12022 if (dof
->dofh_ident
[DOF_ID_MODEL
] != DOF_MODEL_ILP32
&&
12023 dof
->dofh_ident
[DOF_ID_MODEL
] != DOF_MODEL_LP64
) {
12024 dtrace_dof_error(dof
, "DOF has invalid data model");
12028 if (dof
->dofh_ident
[DOF_ID_ENCODING
] != DOF_ENCODE_NATIVE
) {
12029 dtrace_dof_error(dof
, "DOF encoding mismatch");
12033 if (dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_1
&&
12034 dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_2
) {
12035 dtrace_dof_error(dof
, "DOF version mismatch");
12039 if (dof
->dofh_ident
[DOF_ID_DIFVERS
] != DIF_VERSION_2
) {
12040 dtrace_dof_error(dof
, "DOF uses unsupported instruction set");
12044 if (dof
->dofh_ident
[DOF_ID_DIFIREG
] > DIF_DIR_NREGS
) {
12045 dtrace_dof_error(dof
, "DOF uses too many integer registers");
12049 if (dof
->dofh_ident
[DOF_ID_DIFTREG
] > DIF_DTR_NREGS
) {
12050 dtrace_dof_error(dof
, "DOF uses too many tuple registers");
12054 for (i
= DOF_ID_PAD
; i
< DOF_ID_SIZE
; i
++) {
12055 if (dof
->dofh_ident
[i
] != 0) {
12056 dtrace_dof_error(dof
, "DOF has invalid ident byte set");
12061 if (dof
->dofh_flags
& ~DOF_FL_VALID
) {
12062 dtrace_dof_error(dof
, "DOF has invalid flag bits set");
12066 if (dof
->dofh_secsize
== 0) {
12067 dtrace_dof_error(dof
, "zero section header size");
12072 * Check that the section headers don't exceed the amount of DOF
12073 * data. Note that we cast the section size and number of sections
12074 * to uint64_t's to prevent possible overflow in the multiplication.
12076 seclen
= (uint64_t)dof
->dofh_secnum
* (uint64_t)dof
->dofh_secsize
;
12078 if (dof
->dofh_secoff
> len
|| seclen
> len
||
12079 dof
->dofh_secoff
+ seclen
> len
) {
12080 dtrace_dof_error(dof
, "truncated section headers");
12084 if (!IS_P2ALIGNED(dof
->dofh_secoff
, sizeof (uint64_t))) {
12085 dtrace_dof_error(dof
, "misaligned section headers");
12089 if (!IS_P2ALIGNED(dof
->dofh_secsize
, sizeof (uint64_t))) {
12090 dtrace_dof_error(dof
, "misaligned section size");
12095 * Take an initial pass through the section headers to be sure that
12096 * the headers don't have stray offsets. If the 'noprobes' flag is
12097 * set, do not permit sections relating to providers, probes, or args.
12099 for (i
= 0; i
< dof
->dofh_secnum
; i
++) {
12100 dof_sec_t
*sec
= (dof_sec_t
*)(daddr
+
12101 (uintptr_t)dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
12104 switch (sec
->dofs_type
) {
12105 case DOF_SECT_PROVIDER
:
12106 case DOF_SECT_PROBES
:
12107 case DOF_SECT_PRARGS
:
12108 case DOF_SECT_PROFFS
:
12109 dtrace_dof_error(dof
, "illegal sections "
12115 if (DOF_SEC_ISLOADABLE(sec
->dofs_type
) &&
12116 !(sec
->dofs_flags
& DOF_SECF_LOAD
)) {
12117 dtrace_dof_error(dof
, "loadable section with load "
12122 if (!(sec
->dofs_flags
& DOF_SECF_LOAD
))
12123 continue; /* just ignore non-loadable sections */
12125 if (sec
->dofs_align
& (sec
->dofs_align
- 1)) {
12126 dtrace_dof_error(dof
, "bad section alignment");
12130 if (sec
->dofs_offset
& (sec
->dofs_align
- 1)) {
12131 dtrace_dof_error(dof
, "misaligned section");
12135 if (sec
->dofs_offset
> len
|| sec
->dofs_size
> len
||
12136 sec
->dofs_offset
+ sec
->dofs_size
> len
) {
12137 dtrace_dof_error(dof
, "corrupt section header");
12141 if (sec
->dofs_type
== DOF_SECT_STRTAB
&& *((char *)daddr
+
12142 sec
->dofs_offset
+ sec
->dofs_size
- 1) != '\0') {
12143 dtrace_dof_error(dof
, "non-terminating string table");
12149 * Take a second pass through the sections and locate and perform any
12150 * relocations that are present. We do this after the first pass to
12151 * be sure that all sections have had their headers validated.
12153 for (i
= 0; i
< dof
->dofh_secnum
; i
++) {
12154 dof_sec_t
*sec
= (dof_sec_t
*)(daddr
+
12155 (uintptr_t)dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
12157 if (!(sec
->dofs_flags
& DOF_SECF_LOAD
))
12158 continue; /* skip sections that are not loadable */
12160 switch (sec
->dofs_type
) {
12161 case DOF_SECT_URELHDR
:
12162 if (dtrace_dof_relocate(dof
, sec
, ubase
) != 0)
12168 if ((enab
= *enabp
) == NULL
)
12169 enab
= *enabp
= dtrace_enabling_create(vstate
);
12171 for (i
= 0; i
< dof
->dofh_secnum
; i
++) {
12172 dof_sec_t
*sec
= (dof_sec_t
*)(daddr
+
12173 (uintptr_t)dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
12175 if (sec
->dofs_type
!= DOF_SECT_ECBDESC
)
12178 if ((ep
= dtrace_dof_ecbdesc(dof
, sec
, vstate
, cr
)) == NULL
) {
12179 dtrace_enabling_destroy(enab
);
12184 dtrace_enabling_add(enab
, ep
);
12191 * Process DOF for any options. This routine assumes that the DOF has been
12192 * at least processed by dtrace_dof_slurp().
12195 dtrace_dof_options(dof_hdr_t
*dof
, dtrace_state_t
*state
)
12200 dof_optdesc_t
*desc
;
12202 for (i
= 0; i
< dof
->dofh_secnum
; i
++) {
12203 dof_sec_t
*sec
= (dof_sec_t
*)((uintptr_t)dof
+
12204 (uintptr_t)dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
12206 if (sec
->dofs_type
!= DOF_SECT_OPTDESC
)
12209 if (sec
->dofs_align
!= sizeof (uint64_t)) {
12210 dtrace_dof_error(dof
, "bad alignment in "
12211 "option description");
12215 if ((entsize
= sec
->dofs_entsize
) == 0) {
12216 dtrace_dof_error(dof
, "zeroed option entry size");
12220 if (entsize
< sizeof (dof_optdesc_t
)) {
12221 dtrace_dof_error(dof
, "bad option entry size");
12225 for (offs
= 0; offs
< sec
->dofs_size
; offs
+= entsize
) {
12226 desc
= (dof_optdesc_t
*)((uintptr_t)dof
+
12227 (uintptr_t)sec
->dofs_offset
+ offs
);
12229 if (desc
->dofo_strtab
!= DOF_SECIDX_NONE
) {
12230 dtrace_dof_error(dof
, "non-zero option string");
12234 if (desc
->dofo_value
== DTRACEOPT_UNSET
) {
12235 dtrace_dof_error(dof
, "unset option");
12239 if ((rval
= dtrace_state_option(state
,
12240 desc
->dofo_option
, desc
->dofo_value
)) != 0) {
12241 dtrace_dof_error(dof
, "rejected option");
12251 * DTrace Consumer State Functions
12254 dtrace_dstate_init(dtrace_dstate_t
*dstate
, size_t size
)
12256 size_t hashsize
, maxper
, min
, chunksize
= dstate
->dtds_chunksize
;
12259 dtrace_dynvar_t
*dvar
, *next
, *start
;
12262 ASSERT(MUTEX_HELD(&dtrace_lock
));
12263 ASSERT(dstate
->dtds_base
== NULL
&& dstate
->dtds_percpu
== NULL
);
12265 bzero(dstate
, sizeof (dtrace_dstate_t
));
12267 if ((dstate
->dtds_chunksize
= chunksize
) == 0)
12268 dstate
->dtds_chunksize
= DTRACE_DYNVAR_CHUNKSIZE
;
12270 if (size
< (min
= dstate
->dtds_chunksize
+ sizeof (dtrace_dynhash_t
)))
12273 if ((base
= kmem_zalloc(size
, KM_NOSLEEP
| KM_NORMALPRI
)) == NULL
)
12276 dstate
->dtds_size
= size
;
12277 dstate
->dtds_base
= base
;
12278 dstate
->dtds_percpu
= kmem_cache_alloc(dtrace_state_cache
, KM_SLEEP
);
12279 bzero(dstate
->dtds_percpu
, NCPU
* sizeof (dtrace_dstate_percpu_t
));
12281 hashsize
= size
/ (dstate
->dtds_chunksize
+ sizeof (dtrace_dynhash_t
));
12283 if (hashsize
!= 1 && (hashsize
& 1))
12286 dstate
->dtds_hashsize
= hashsize
;
12287 dstate
->dtds_hash
= dstate
->dtds_base
;
12290 * Set all of our hash buckets to point to the single sink, and (if
12291 * it hasn't already been set), set the sink's hash value to be the
12292 * sink sentinel value. The sink is needed for dynamic variable
12293 * lookups to know that they have iterated over an entire, valid hash
12296 for (i
= 0; i
< hashsize
; i
++)
12297 dstate
->dtds_hash
[i
].dtdh_chain
= &dtrace_dynhash_sink
;
12299 if (dtrace_dynhash_sink
.dtdv_hashval
!= DTRACE_DYNHASH_SINK
)
12300 dtrace_dynhash_sink
.dtdv_hashval
= DTRACE_DYNHASH_SINK
;
12303 * Determine number of active CPUs. Divide free list evenly among
12306 start
= (dtrace_dynvar_t
*)
12307 ((uintptr_t)base
+ hashsize
* sizeof (dtrace_dynhash_t
));
12308 limit
= (uintptr_t)base
+ size
;
12310 maxper
= (limit
- (uintptr_t)start
) / NCPU
;
12311 maxper
= (maxper
/ dstate
->dtds_chunksize
) * dstate
->dtds_chunksize
;
12313 for (i
= 0; i
< NCPU
; i
++) {
12314 dstate
->dtds_percpu
[i
].dtdsc_free
= dvar
= start
;
12317 * If we don't even have enough chunks to make it once through
12318 * NCPUs, we're just going to allocate everything to the first
12319 * CPU. And if we're on the last CPU, we're going to allocate
12320 * whatever is left over. In either case, we set the limit to
12321 * be the limit of the dynamic variable space.
12323 if (maxper
== 0 || i
== NCPU
- 1) {
12324 limit
= (uintptr_t)base
+ size
;
12327 limit
= (uintptr_t)start
+ maxper
;
12328 start
= (dtrace_dynvar_t
*)limit
;
12331 ASSERT(limit
<= (uintptr_t)base
+ size
);
12334 next
= (dtrace_dynvar_t
*)((uintptr_t)dvar
+
12335 dstate
->dtds_chunksize
);
12337 if ((uintptr_t)next
+ dstate
->dtds_chunksize
>= limit
)
12340 dvar
->dtdv_next
= next
;
12352 dtrace_dstate_fini(dtrace_dstate_t
*dstate
)
12354 ASSERT(MUTEX_HELD(&cpu_lock
));
12356 if (dstate
->dtds_base
== NULL
)
12359 kmem_free(dstate
->dtds_base
, dstate
->dtds_size
);
12360 kmem_cache_free(dtrace_state_cache
, dstate
->dtds_percpu
);
12364 dtrace_vstate_fini(dtrace_vstate_t
*vstate
)
12367 * Logical XOR, where are you?
12369 ASSERT((vstate
->dtvs_nglobals
== 0) ^ (vstate
->dtvs_globals
!= NULL
));
12371 if (vstate
->dtvs_nglobals
> 0) {
12372 kmem_free(vstate
->dtvs_globals
, vstate
->dtvs_nglobals
*
12373 sizeof (dtrace_statvar_t
*));
12376 if (vstate
->dtvs_ntlocals
> 0) {
12377 kmem_free(vstate
->dtvs_tlocals
, vstate
->dtvs_ntlocals
*
12378 sizeof (dtrace_difv_t
));
12381 ASSERT((vstate
->dtvs_nlocals
== 0) ^ (vstate
->dtvs_locals
!= NULL
));
12383 if (vstate
->dtvs_nlocals
> 0) {
12384 kmem_free(vstate
->dtvs_locals
, vstate
->dtvs_nlocals
*
12385 sizeof (dtrace_statvar_t
*));
12390 dtrace_state_clean(dtrace_state_t
*state
)
12392 if (state
->dts_activity
== DTRACE_ACTIVITY_INACTIVE
)
12395 dtrace_dynvar_clean(&state
->dts_vstate
.dtvs_dynvars
);
12396 dtrace_speculation_clean(state
);
12400 dtrace_state_deadman(dtrace_state_t
*state
)
12406 now
= dtrace_gethrtime();
12408 if (state
!= dtrace_anon
.dta_state
&&
12409 now
- state
->dts_laststatus
>= dtrace_deadman_user
)
12413 * We must be sure that dts_alive never appears to be less than the
12414 * value upon entry to dtrace_state_deadman(), and because we lack a
12415 * dtrace_cas64(), we cannot store to it atomically. We thus instead
12416 * store INT64_MAX to it, followed by a memory barrier, followed by
12417 * the new value. This assures that dts_alive never appears to be
12418 * less than its true value, regardless of the order in which the
12419 * stores to the underlying storage are issued.
12421 state
->dts_alive
= INT64_MAX
;
12422 dtrace_membar_producer();
12423 state
->dts_alive
= now
;
12427 dtrace_state_create(dev_t
*devp
, cred_t
*cr
)
12432 dtrace_state_t
*state
;
12433 dtrace_optval_t
*opt
;
12434 int bufsize
= NCPU
* sizeof (dtrace_buffer_t
), i
;
12436 ASSERT(MUTEX_HELD(&dtrace_lock
));
12437 ASSERT(MUTEX_HELD(&cpu_lock
));
12439 minor
= (minor_t
)(uintptr_t)vmem_alloc(dtrace_minor
, 1,
12440 VM_BESTFIT
| VM_SLEEP
);
12442 if (ddi_soft_state_zalloc(dtrace_softstate
, minor
) != DDI_SUCCESS
) {
12443 vmem_free(dtrace_minor
, (void *)(uintptr_t)minor
, 1);
12447 state
= ddi_get_soft_state(dtrace_softstate
, minor
);
12448 state
->dts_epid
= DTRACE_EPIDNONE
+ 1;
12450 (void) snprintf(c
, sizeof (c
), "dtrace_aggid_%d", minor
);
12451 state
->dts_aggid_arena
= vmem_create(c
, (void *)1, UINT32_MAX
, 1,
12452 NULL
, NULL
, NULL
, 0, VM_SLEEP
| VMC_IDENTIFIER
);
12454 if (devp
!= NULL
) {
12455 major
= getemajor(*devp
);
12457 major
= ddi_driver_major(dtrace_devi
);
12460 state
->dts_dev
= makedevice(major
, minor
);
12463 *devp
= state
->dts_dev
;
12466 * We allocate NCPU buffers. On the one hand, this can be quite
12467 * a bit of memory per instance (nearly 36K on a Starcat). On the
12468 * other hand, it saves an additional memory reference in the probe
12471 state
->dts_buffer
= kmem_zalloc(bufsize
, KM_SLEEP
);
12472 state
->dts_aggbuffer
= kmem_zalloc(bufsize
, KM_SLEEP
);
12473 state
->dts_cleaner
= CYCLIC_NONE
;
12474 state
->dts_deadman
= CYCLIC_NONE
;
12475 state
->dts_vstate
.dtvs_state
= state
;
12477 for (i
= 0; i
< DTRACEOPT_MAX
; i
++)
12478 state
->dts_options
[i
] = DTRACEOPT_UNSET
;
12481 * Set the default options.
12483 opt
= state
->dts_options
;
12484 opt
[DTRACEOPT_BUFPOLICY
] = DTRACEOPT_BUFPOLICY_SWITCH
;
12485 opt
[DTRACEOPT_BUFRESIZE
] = DTRACEOPT_BUFRESIZE_AUTO
;
12486 opt
[DTRACEOPT_NSPEC
] = dtrace_nspec_default
;
12487 opt
[DTRACEOPT_SPECSIZE
] = dtrace_specsize_default
;
12488 opt
[DTRACEOPT_CPU
] = (dtrace_optval_t
)DTRACE_CPUALL
;
12489 opt
[DTRACEOPT_STRSIZE
] = dtrace_strsize_default
;
12490 opt
[DTRACEOPT_STACKFRAMES
] = dtrace_stackframes_default
;
12491 opt
[DTRACEOPT_USTACKFRAMES
] = dtrace_ustackframes_default
;
12492 opt
[DTRACEOPT_CLEANRATE
] = dtrace_cleanrate_default
;
12493 opt
[DTRACEOPT_AGGRATE
] = dtrace_aggrate_default
;
12494 opt
[DTRACEOPT_SWITCHRATE
] = dtrace_switchrate_default
;
12495 opt
[DTRACEOPT_STATUSRATE
] = dtrace_statusrate_default
;
12496 opt
[DTRACEOPT_JSTACKFRAMES
] = dtrace_jstackframes_default
;
12497 opt
[DTRACEOPT_JSTACKSTRSIZE
] = dtrace_jstackstrsize_default
;
12499 state
->dts_activity
= DTRACE_ACTIVITY_INACTIVE
;
12502 * Depending on the user credentials, we set flag bits which alter probe
12503 * visibility or the amount of destructiveness allowed. In the case of
12504 * actual anonymous tracing, or the possession of all privileges, all of
12505 * the normal checks are bypassed.
12507 if (cr
== NULL
|| PRIV_POLICY_ONLY(cr
, PRIV_ALL
, B_FALSE
)) {
12508 state
->dts_cred
.dcr_visible
= DTRACE_CRV_ALL
;
12509 state
->dts_cred
.dcr_action
= DTRACE_CRA_ALL
;
12512 * Set up the credentials for this instantiation. We take a
12513 * hold on the credential to prevent it from disappearing on
12514 * us; this in turn prevents the zone_t referenced by this
12515 * credential from disappearing. This means that we can
12516 * examine the credential and the zone from probe context.
12519 state
->dts_cred
.dcr_cred
= cr
;
12522 * CRA_PROC means "we have *some* privilege for dtrace" and
12523 * unlocks the use of variables like pid, zonename, etc.
12525 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_USER
, B_FALSE
) ||
12526 PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_PROC
, B_FALSE
)) {
12527 state
->dts_cred
.dcr_action
|= DTRACE_CRA_PROC
;
12531 * dtrace_user allows use of syscall and profile providers.
12532 * If the user also has proc_owner and/or proc_zone, we
12533 * extend the scope to include additional visibility and
12534 * destructive power.
12536 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_USER
, B_FALSE
)) {
12537 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_OWNER
, B_FALSE
)) {
12538 state
->dts_cred
.dcr_visible
|=
12539 DTRACE_CRV_ALLPROC
;
12541 state
->dts_cred
.dcr_action
|=
12542 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER
;
12545 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_ZONE
, B_FALSE
)) {
12546 state
->dts_cred
.dcr_visible
|=
12547 DTRACE_CRV_ALLZONE
;
12549 state
->dts_cred
.dcr_action
|=
12550 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE
;
12554 * If we have all privs in whatever zone this is,
12555 * we can do destructive things to processes which
12556 * have altered credentials.
12558 if (priv_isequalset(priv_getset(cr
, PRIV_EFFECTIVE
),
12559 cr
->cr_zone
->zone_privset
)) {
12560 state
->dts_cred
.dcr_action
|=
12561 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG
;
12566 * Holding the dtrace_kernel privilege also implies that
12567 * the user has the dtrace_user privilege from a visibility
12568 * perspective. But without further privileges, some
12569 * destructive actions are not available.
12571 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_KERNEL
, B_FALSE
)) {
12573 * Make all probes in all zones visible. However,
12574 * this doesn't mean that all actions become available
12577 state
->dts_cred
.dcr_visible
|= DTRACE_CRV_KERNEL
|
12578 DTRACE_CRV_ALLPROC
| DTRACE_CRV_ALLZONE
;
12580 state
->dts_cred
.dcr_action
|= DTRACE_CRA_KERNEL
|
12583 * Holding proc_owner means that destructive actions
12584 * for *this* zone are allowed.
12586 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_OWNER
, B_FALSE
))
12587 state
->dts_cred
.dcr_action
|=
12588 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER
;
12591 * Holding proc_zone means that destructive actions
12592 * for this user/group ID in all zones is allowed.
12594 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_ZONE
, B_FALSE
))
12595 state
->dts_cred
.dcr_action
|=
12596 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE
;
12599 * If we have all privs in whatever zone this is,
12600 * we can do destructive things to processes which
12601 * have altered credentials.
12603 if (priv_isequalset(priv_getset(cr
, PRIV_EFFECTIVE
),
12604 cr
->cr_zone
->zone_privset
)) {
12605 state
->dts_cred
.dcr_action
|=
12606 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG
;
12611 * Holding the dtrace_proc privilege gives control over fasttrap
12612 * and pid providers. We need to grant wider destructive
12613 * privileges in the event that the user has proc_owner and/or
12616 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_PROC
, B_FALSE
)) {
12617 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_OWNER
, B_FALSE
))
12618 state
->dts_cred
.dcr_action
|=
12619 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER
;
12621 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_ZONE
, B_FALSE
))
12622 state
->dts_cred
.dcr_action
|=
12623 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE
;
12631 dtrace_state_buffer(dtrace_state_t
*state
, dtrace_buffer_t
*buf
, int which
)
12633 dtrace_optval_t
*opt
= state
->dts_options
, size
;
12635 int flags
= 0, rval
, factor
, divisor
= 1;
12637 ASSERT(MUTEX_HELD(&dtrace_lock
));
12638 ASSERT(MUTEX_HELD(&cpu_lock
));
12639 ASSERT(which
< DTRACEOPT_MAX
);
12640 ASSERT(state
->dts_activity
== DTRACE_ACTIVITY_INACTIVE
||
12641 (state
== dtrace_anon
.dta_state
&&
12642 state
->dts_activity
== DTRACE_ACTIVITY_ACTIVE
));
12644 if (opt
[which
] == DTRACEOPT_UNSET
|| opt
[which
] == 0)
12647 if (opt
[DTRACEOPT_CPU
] != DTRACEOPT_UNSET
)
12648 cpu
= opt
[DTRACEOPT_CPU
];
12650 if (which
== DTRACEOPT_SPECSIZE
)
12651 flags
|= DTRACEBUF_NOSWITCH
;
12653 if (which
== DTRACEOPT_BUFSIZE
) {
12654 if (opt
[DTRACEOPT_BUFPOLICY
] == DTRACEOPT_BUFPOLICY_RING
)
12655 flags
|= DTRACEBUF_RING
;
12657 if (opt
[DTRACEOPT_BUFPOLICY
] == DTRACEOPT_BUFPOLICY_FILL
)
12658 flags
|= DTRACEBUF_FILL
;
12660 if (state
!= dtrace_anon
.dta_state
||
12661 state
->dts_activity
!= DTRACE_ACTIVITY_ACTIVE
)
12662 flags
|= DTRACEBUF_INACTIVE
;
12665 for (size
= opt
[which
]; size
>= sizeof (uint64_t); size
/= divisor
) {
12667 * The size must be 8-byte aligned. If the size is not 8-byte
12668 * aligned, drop it down by the difference.
12670 if (size
& (sizeof (uint64_t) - 1))
12671 size
-= size
& (sizeof (uint64_t) - 1);
12673 if (size
< state
->dts_reserve
) {
12675 * Buffers always must be large enough to accommodate
12676 * their prereserved space. We return E2BIG instead
12677 * of ENOMEM in this case to allow for user-level
12678 * software to differentiate the cases.
12683 rval
= dtrace_buffer_alloc(buf
, size
, flags
, cpu
, &factor
);
12685 if (rval
!= ENOMEM
) {
12690 if (opt
[DTRACEOPT_BUFRESIZE
] == DTRACEOPT_BUFRESIZE_MANUAL
)
12693 for (divisor
= 2; divisor
< factor
; divisor
<<= 1)
12701 dtrace_state_buffers(dtrace_state_t
*state
)
12703 dtrace_speculation_t
*spec
= state
->dts_speculations
;
12706 if ((rval
= dtrace_state_buffer(state
, state
->dts_buffer
,
12707 DTRACEOPT_BUFSIZE
)) != 0)
12710 if ((rval
= dtrace_state_buffer(state
, state
->dts_aggbuffer
,
12711 DTRACEOPT_AGGSIZE
)) != 0)
12714 for (i
= 0; i
< state
->dts_nspeculations
; i
++) {
12715 if ((rval
= dtrace_state_buffer(state
,
12716 spec
[i
].dtsp_buffer
, DTRACEOPT_SPECSIZE
)) != 0)
12724 dtrace_state_prereserve(dtrace_state_t
*state
)
12727 dtrace_probe_t
*probe
;
12729 state
->dts_reserve
= 0;
12731 if (state
->dts_options
[DTRACEOPT_BUFPOLICY
] != DTRACEOPT_BUFPOLICY_FILL
)
12735 * If our buffer policy is a "fill" buffer policy, we need to set the
12736 * prereserved space to be the space required by the END probes.
12738 probe
= dtrace_probes
[dtrace_probeid_end
- 1];
12739 ASSERT(probe
!= NULL
);
12741 for (ecb
= probe
->dtpr_ecb
; ecb
!= NULL
; ecb
= ecb
->dte_next
) {
12742 if (ecb
->dte_state
!= state
)
12745 state
->dts_reserve
+= ecb
->dte_needed
+ ecb
->dte_alignment
;
12750 dtrace_state_go(dtrace_state_t
*state
, processorid_t
*cpu
)
12752 dtrace_optval_t
*opt
= state
->dts_options
, sz
, nspec
;
12753 dtrace_speculation_t
*spec
;
12754 dtrace_buffer_t
*buf
;
12755 cyc_handler_t hdlr
;
12757 int rval
= 0, i
, bufsize
= NCPU
* sizeof (dtrace_buffer_t
);
12758 dtrace_icookie_t cookie
;
12760 mutex_enter(&cpu_lock
);
12761 mutex_enter(&dtrace_lock
);
12763 if (state
->dts_activity
!= DTRACE_ACTIVITY_INACTIVE
) {
12769 * Before we can perform any checks, we must prime all of the
12770 * retained enablings that correspond to this state.
12772 dtrace_enabling_prime(state
);
12774 if (state
->dts_destructive
&& !state
->dts_cred
.dcr_destructive
) {
12779 dtrace_state_prereserve(state
);
12782 * Now we want to do is try to allocate our speculations.
12783 * We do not automatically resize the number of speculations; if
12784 * this fails, we will fail the operation.
12786 nspec
= opt
[DTRACEOPT_NSPEC
];
12787 ASSERT(nspec
!= DTRACEOPT_UNSET
);
12789 if (nspec
> INT_MAX
) {
12794 spec
= kmem_zalloc(nspec
* sizeof (dtrace_speculation_t
),
12795 KM_NOSLEEP
| KM_NORMALPRI
);
12797 if (spec
== NULL
) {
12802 state
->dts_speculations
= spec
;
12803 state
->dts_nspeculations
= (int)nspec
;
12805 for (i
= 0; i
< nspec
; i
++) {
12806 if ((buf
= kmem_zalloc(bufsize
,
12807 KM_NOSLEEP
| KM_NORMALPRI
)) == NULL
) {
12812 spec
[i
].dtsp_buffer
= buf
;
12815 if (opt
[DTRACEOPT_GRABANON
] != DTRACEOPT_UNSET
) {
12816 if (dtrace_anon
.dta_state
== NULL
) {
12821 if (state
->dts_necbs
!= 0) {
12826 state
->dts_anon
= dtrace_anon_grab();
12827 ASSERT(state
->dts_anon
!= NULL
);
12828 state
= state
->dts_anon
;
12831 * We want "grabanon" to be set in the grabbed state, so we'll
12832 * copy that option value from the grabbing state into the
12835 state
->dts_options
[DTRACEOPT_GRABANON
] =
12836 opt
[DTRACEOPT_GRABANON
];
12838 *cpu
= dtrace_anon
.dta_beganon
;
12841 * If the anonymous state is active (as it almost certainly
12842 * is if the anonymous enabling ultimately matched anything),
12843 * we don't allow any further option processing -- but we
12844 * don't return failure.
12846 if (state
->dts_activity
!= DTRACE_ACTIVITY_INACTIVE
)
12850 if (opt
[DTRACEOPT_AGGSIZE
] != DTRACEOPT_UNSET
&&
12851 opt
[DTRACEOPT_AGGSIZE
] != 0) {
12852 if (state
->dts_aggregations
== NULL
) {
12854 * We're not going to create an aggregation buffer
12855 * because we don't have any ECBs that contain
12856 * aggregations -- set this option to 0.
12858 opt
[DTRACEOPT_AGGSIZE
] = 0;
12861 * If we have an aggregation buffer, we must also have
12862 * a buffer to use as scratch.
12864 if (opt
[DTRACEOPT_BUFSIZE
] == DTRACEOPT_UNSET
||
12865 opt
[DTRACEOPT_BUFSIZE
] < state
->dts_needed
) {
12866 opt
[DTRACEOPT_BUFSIZE
] = state
->dts_needed
;
12871 if (opt
[DTRACEOPT_SPECSIZE
] != DTRACEOPT_UNSET
&&
12872 opt
[DTRACEOPT_SPECSIZE
] != 0) {
12873 if (!state
->dts_speculates
) {
12875 * We're not going to create speculation buffers
12876 * because we don't have any ECBs that actually
12877 * speculate -- set the speculation size to 0.
12879 opt
[DTRACEOPT_SPECSIZE
] = 0;
12884 * The bare minimum size for any buffer that we're actually going to
12885 * do anything to is sizeof (uint64_t).
12887 sz
= sizeof (uint64_t);
12889 if ((state
->dts_needed
!= 0 && opt
[DTRACEOPT_BUFSIZE
] < sz
) ||
12890 (state
->dts_speculates
&& opt
[DTRACEOPT_SPECSIZE
] < sz
) ||
12891 (state
->dts_aggregations
!= NULL
&& opt
[DTRACEOPT_AGGSIZE
] < sz
)) {
12893 * A buffer size has been explicitly set to 0 (or to a size
12894 * that will be adjusted to 0) and we need the space -- we
12895 * need to return failure. We return ENOSPC to differentiate
12896 * it from failing to allocate a buffer due to failure to meet
12897 * the reserve (for which we return E2BIG).
12903 if ((rval
= dtrace_state_buffers(state
)) != 0)
12906 if ((sz
= opt
[DTRACEOPT_DYNVARSIZE
]) == DTRACEOPT_UNSET
)
12907 sz
= dtrace_dstate_defsize
;
12910 rval
= dtrace_dstate_init(&state
->dts_vstate
.dtvs_dynvars
, sz
);
12915 if (opt
[DTRACEOPT_BUFRESIZE
] == DTRACEOPT_BUFRESIZE_MANUAL
)
12917 } while (sz
>>= 1);
12919 opt
[DTRACEOPT_DYNVARSIZE
] = sz
;
12924 if (opt
[DTRACEOPT_STATUSRATE
] > dtrace_statusrate_max
)
12925 opt
[DTRACEOPT_STATUSRATE
] = dtrace_statusrate_max
;
12927 if (opt
[DTRACEOPT_CLEANRATE
] == 0)
12928 opt
[DTRACEOPT_CLEANRATE
] = dtrace_cleanrate_max
;
12930 if (opt
[DTRACEOPT_CLEANRATE
] < dtrace_cleanrate_min
)
12931 opt
[DTRACEOPT_CLEANRATE
] = dtrace_cleanrate_min
;
12933 if (opt
[DTRACEOPT_CLEANRATE
] > dtrace_cleanrate_max
)
12934 opt
[DTRACEOPT_CLEANRATE
] = dtrace_cleanrate_max
;
12936 hdlr
.cyh_func
= (cyc_func_t
)dtrace_state_clean
;
12937 hdlr
.cyh_arg
= state
;
12938 hdlr
.cyh_level
= CY_LOW_LEVEL
;
12941 when
.cyt_interval
= opt
[DTRACEOPT_CLEANRATE
];
12943 state
->dts_cleaner
= cyclic_add(&hdlr
, &when
);
12945 hdlr
.cyh_func
= (cyc_func_t
)dtrace_state_deadman
;
12946 hdlr
.cyh_arg
= state
;
12947 hdlr
.cyh_level
= CY_LOW_LEVEL
;
12950 when
.cyt_interval
= dtrace_deadman_interval
;
12952 state
->dts_alive
= state
->dts_laststatus
= dtrace_gethrtime();
12953 state
->dts_deadman
= cyclic_add(&hdlr
, &when
);
12955 state
->dts_activity
= DTRACE_ACTIVITY_WARMUP
;
12958 * Now it's time to actually fire the BEGIN probe. We need to disable
12959 * interrupts here both to record the CPU on which we fired the BEGIN
12960 * probe (the data from this CPU will be processed first at user
12961 * level) and to manually activate the buffer for this CPU.
12963 cookie
= dtrace_interrupt_disable();
12964 *cpu
= CPU
->cpu_id
;
12965 ASSERT(state
->dts_buffer
[*cpu
].dtb_flags
& DTRACEBUF_INACTIVE
);
12966 state
->dts_buffer
[*cpu
].dtb_flags
&= ~DTRACEBUF_INACTIVE
;
12968 dtrace_probe(dtrace_probeid_begin
,
12969 (uint64_t)(uintptr_t)state
, 0, 0, 0, 0);
12970 dtrace_interrupt_enable(cookie
);
12972 * We may have had an exit action from a BEGIN probe; only change our
12973 * state to ACTIVE if we're still in WARMUP.
12975 ASSERT(state
->dts_activity
== DTRACE_ACTIVITY_WARMUP
||
12976 state
->dts_activity
== DTRACE_ACTIVITY_DRAINING
);
12978 if (state
->dts_activity
== DTRACE_ACTIVITY_WARMUP
)
12979 state
->dts_activity
= DTRACE_ACTIVITY_ACTIVE
;
12982 * Regardless of whether or not now we're in ACTIVE or DRAINING, we
12983 * want each CPU to transition its principal buffer out of the
12984 * INACTIVE state. Doing this assures that no CPU will suddenly begin
12985 * processing an ECB halfway down a probe's ECB chain; all CPUs will
12986 * atomically transition from processing none of a state's ECBs to
12987 * processing all of them.
12989 dtrace_xcall(DTRACE_CPUALL
,
12990 (dtrace_xcall_t
)dtrace_buffer_activate
, state
);
12994 dtrace_buffer_free(state
->dts_buffer
);
12995 dtrace_buffer_free(state
->dts_aggbuffer
);
12997 if ((nspec
= state
->dts_nspeculations
) == 0) {
12998 ASSERT(state
->dts_speculations
== NULL
);
13002 spec
= state
->dts_speculations
;
13003 ASSERT(spec
!= NULL
);
13005 for (i
= 0; i
< state
->dts_nspeculations
; i
++) {
13006 if ((buf
= spec
[i
].dtsp_buffer
) == NULL
)
13009 dtrace_buffer_free(buf
);
13010 kmem_free(buf
, bufsize
);
13013 kmem_free(spec
, nspec
* sizeof (dtrace_speculation_t
));
13014 state
->dts_nspeculations
= 0;
13015 state
->dts_speculations
= NULL
;
13018 mutex_exit(&dtrace_lock
);
13019 mutex_exit(&cpu_lock
);
13025 dtrace_state_stop(dtrace_state_t
*state
, processorid_t
*cpu
)
13027 dtrace_icookie_t cookie
;
13029 ASSERT(MUTEX_HELD(&dtrace_lock
));
13031 if (state
->dts_activity
!= DTRACE_ACTIVITY_ACTIVE
&&
13032 state
->dts_activity
!= DTRACE_ACTIVITY_DRAINING
)
13036 * We'll set the activity to DTRACE_ACTIVITY_DRAINING, and issue a sync
13037 * to be sure that every CPU has seen it. See below for the details
13038 * on why this is done.
13040 state
->dts_activity
= DTRACE_ACTIVITY_DRAINING
;
13044 * By this point, it is impossible for any CPU to be still processing
13045 * with DTRACE_ACTIVITY_ACTIVE. We can thus set our activity to
13046 * DTRACE_ACTIVITY_COOLDOWN and know that we're not racing with any
13047 * other CPU in dtrace_buffer_reserve(). This allows dtrace_probe()
13048 * and callees to know that the activity is DTRACE_ACTIVITY_COOLDOWN
13049 * iff we're in the END probe.
13051 state
->dts_activity
= DTRACE_ACTIVITY_COOLDOWN
;
13053 ASSERT(state
->dts_activity
== DTRACE_ACTIVITY_COOLDOWN
);
13056 * Finally, we can release the reserve and call the END probe. We
13057 * disable interrupts across calling the END probe to allow us to
13058 * return the CPU on which we actually called the END probe. This
13059 * allows user-land to be sure that this CPU's principal buffer is
13062 state
->dts_reserve
= 0;
13064 cookie
= dtrace_interrupt_disable();
13065 *cpu
= CPU
->cpu_id
;
13066 dtrace_probe(dtrace_probeid_end
,
13067 (uint64_t)(uintptr_t)state
, 0, 0, 0, 0);
13068 dtrace_interrupt_enable(cookie
);
13070 state
->dts_activity
= DTRACE_ACTIVITY_STOPPED
;
13077 dtrace_state_option(dtrace_state_t
*state
, dtrace_optid_t option
,
13078 dtrace_optval_t val
)
13080 ASSERT(MUTEX_HELD(&dtrace_lock
));
13082 if (state
->dts_activity
!= DTRACE_ACTIVITY_INACTIVE
)
13085 if (option
>= DTRACEOPT_MAX
)
13088 if (option
!= DTRACEOPT_CPU
&& val
< 0)
13092 case DTRACEOPT_DESTRUCTIVE
:
13093 if (dtrace_destructive_disallow
)
13096 state
->dts_cred
.dcr_destructive
= 1;
13099 case DTRACEOPT_BUFSIZE
:
13100 case DTRACEOPT_DYNVARSIZE
:
13101 case DTRACEOPT_AGGSIZE
:
13102 case DTRACEOPT_SPECSIZE
:
13103 case DTRACEOPT_STRSIZE
:
13107 if (val
>= LONG_MAX
) {
13109 * If this is an otherwise negative value, set it to
13110 * the highest multiple of 128m less than LONG_MAX.
13111 * Technically, we're adjusting the size without
13112 * regard to the buffer resizing policy, but in fact,
13113 * this has no effect -- if we set the buffer size to
13114 * ~LONG_MAX and the buffer policy is ultimately set to
13115 * be "manual", the buffer allocation is guaranteed to
13116 * fail, if only because the allocation requires two
13117 * buffers. (We set the the size to the highest
13118 * multiple of 128m because it ensures that the size
13119 * will remain a multiple of a megabyte when
13120 * repeatedly halved -- all the way down to 15m.)
13122 val
= LONG_MAX
- (1 << 27) + 1;
13126 state
->dts_options
[option
] = val
;
13132 dtrace_state_destroy(dtrace_state_t
*state
)
13135 dtrace_vstate_t
*vstate
= &state
->dts_vstate
;
13136 minor_t minor
= getminor(state
->dts_dev
);
13137 int i
, bufsize
= NCPU
* sizeof (dtrace_buffer_t
);
13138 dtrace_speculation_t
*spec
= state
->dts_speculations
;
13139 int nspec
= state
->dts_nspeculations
;
13142 ASSERT(MUTEX_HELD(&dtrace_lock
));
13143 ASSERT(MUTEX_HELD(&cpu_lock
));
13146 * First, retract any retained enablings for this state.
13148 dtrace_enabling_retract(state
);
13149 ASSERT(state
->dts_nretained
== 0);
13151 if (state
->dts_activity
== DTRACE_ACTIVITY_ACTIVE
||
13152 state
->dts_activity
== DTRACE_ACTIVITY_DRAINING
) {
13154 * We have managed to come into dtrace_state_destroy() on a
13155 * hot enabling -- almost certainly because of a disorderly
13156 * shutdown of a consumer. (That is, a consumer that is
13157 * exiting without having called dtrace_stop().) In this case,
13158 * we're going to set our activity to be KILLED, and then
13159 * issue a sync to be sure that everyone is out of probe
13160 * context before we start blowing away ECBs.
13162 state
->dts_activity
= DTRACE_ACTIVITY_KILLED
;
13167 * Release the credential hold we took in dtrace_state_create().
13169 if (state
->dts_cred
.dcr_cred
!= NULL
)
13170 crfree(state
->dts_cred
.dcr_cred
);
13173 * Now we can safely disable and destroy any enabled probes. Because
13174 * any DTRACE_PRIV_KERNEL probes may actually be slowing our progress
13175 * (especially if they're all enabled), we take two passes through the
13176 * ECBs: in the first, we disable just DTRACE_PRIV_KERNEL probes, and
13177 * in the second we disable whatever is left over.
13179 for (match
= DTRACE_PRIV_KERNEL
; ; match
= 0) {
13180 for (i
= 0; i
< state
->dts_necbs
; i
++) {
13181 if ((ecb
= state
->dts_ecbs
[i
]) == NULL
)
13184 if (match
&& ecb
->dte_probe
!= NULL
) {
13185 dtrace_probe_t
*probe
= ecb
->dte_probe
;
13186 dtrace_provider_t
*prov
= probe
->dtpr_provider
;
13188 if (!(prov
->dtpv_priv
.dtpp_flags
& match
))
13192 dtrace_ecb_disable(ecb
);
13193 dtrace_ecb_destroy(ecb
);
13201 * Before we free the buffers, perform one more sync to assure that
13202 * every CPU is out of probe context.
13206 dtrace_buffer_free(state
->dts_buffer
);
13207 dtrace_buffer_free(state
->dts_aggbuffer
);
13209 for (i
= 0; i
< nspec
; i
++)
13210 dtrace_buffer_free(spec
[i
].dtsp_buffer
);
13212 if (state
->dts_cleaner
!= CYCLIC_NONE
)
13213 cyclic_remove(state
->dts_cleaner
);
13215 if (state
->dts_deadman
!= CYCLIC_NONE
)
13216 cyclic_remove(state
->dts_deadman
);
13218 dtrace_dstate_fini(&vstate
->dtvs_dynvars
);
13219 dtrace_vstate_fini(vstate
);
13220 kmem_free(state
->dts_ecbs
, state
->dts_necbs
* sizeof (dtrace_ecb_t
*));
13222 if (state
->dts_aggregations
!= NULL
) {
13224 for (i
= 0; i
< state
->dts_naggregations
; i
++)
13225 ASSERT(state
->dts_aggregations
[i
] == NULL
);
13227 ASSERT(state
->dts_naggregations
> 0);
13228 kmem_free(state
->dts_aggregations
,
13229 state
->dts_naggregations
* sizeof (dtrace_aggregation_t
*));
13232 kmem_free(state
->dts_buffer
, bufsize
);
13233 kmem_free(state
->dts_aggbuffer
, bufsize
);
13235 for (i
= 0; i
< nspec
; i
++)
13236 kmem_free(spec
[i
].dtsp_buffer
, bufsize
);
13238 kmem_free(spec
, nspec
* sizeof (dtrace_speculation_t
));
13240 dtrace_format_destroy(state
);
13242 vmem_destroy(state
->dts_aggid_arena
);
13243 ddi_soft_state_free(dtrace_softstate
, minor
);
13244 vmem_free(dtrace_minor
, (void *)(uintptr_t)minor
, 1);
13248 * DTrace Anonymous Enabling Functions
13250 static dtrace_state_t
*
13251 dtrace_anon_grab(void)
13253 dtrace_state_t
*state
;
13255 ASSERT(MUTEX_HELD(&dtrace_lock
));
13257 if ((state
= dtrace_anon
.dta_state
) == NULL
) {
13258 ASSERT(dtrace_anon
.dta_enabling
== NULL
);
13262 ASSERT(dtrace_anon
.dta_enabling
!= NULL
);
13263 ASSERT(dtrace_retained
!= NULL
);
13265 dtrace_enabling_destroy(dtrace_anon
.dta_enabling
);
13266 dtrace_anon
.dta_enabling
= NULL
;
13267 dtrace_anon
.dta_state
= NULL
;
13273 dtrace_anon_property(void)
13276 dtrace_state_t
*state
;
13278 char c
[32]; /* enough for "dof-data-" + digits */
13280 ASSERT(MUTEX_HELD(&dtrace_lock
));
13281 ASSERT(MUTEX_HELD(&cpu_lock
));
13283 for (i
= 0; ; i
++) {
13284 (void) snprintf(c
, sizeof (c
), "dof-data-%d", i
);
13286 dtrace_err_verbose
= 1;
13288 if ((dof
= dtrace_dof_property(c
)) == NULL
) {
13289 dtrace_err_verbose
= 0;
13294 * We want to create anonymous state, so we need to transition
13295 * the kernel debugger to indicate that DTrace is active. If
13296 * this fails (e.g. because the debugger has modified text in
13297 * some way), we won't continue with the processing.
13299 if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE
) != 0) {
13300 cmn_err(CE_NOTE
, "kernel debugger active; anonymous "
13301 "enabling ignored.");
13302 dtrace_dof_destroy(dof
);
13307 * If we haven't allocated an anonymous state, we'll do so now.
13309 if ((state
= dtrace_anon
.dta_state
) == NULL
) {
13310 state
= dtrace_state_create(NULL
, NULL
);
13311 dtrace_anon
.dta_state
= state
;
13313 if (state
== NULL
) {
13315 * This basically shouldn't happen: the only
13316 * failure mode from dtrace_state_create() is a
13317 * failure of ddi_soft_state_zalloc() that
13318 * itself should never happen. Still, the
13319 * interface allows for a failure mode, and
13320 * we want to fail as gracefully as possible:
13321 * we'll emit an error message and cease
13322 * processing anonymous state in this case.
13324 cmn_err(CE_WARN
, "failed to create "
13325 "anonymous state");
13326 dtrace_dof_destroy(dof
);
13331 rv
= dtrace_dof_slurp(dof
, &state
->dts_vstate
, CRED(),
13332 &dtrace_anon
.dta_enabling
, 0, B_TRUE
);
13335 rv
= dtrace_dof_options(dof
, state
);
13337 dtrace_err_verbose
= 0;
13338 dtrace_dof_destroy(dof
);
13342 * This is malformed DOF; chuck any anonymous state
13345 ASSERT(dtrace_anon
.dta_enabling
== NULL
);
13346 dtrace_state_destroy(state
);
13347 dtrace_anon
.dta_state
= NULL
;
13351 ASSERT(dtrace_anon
.dta_enabling
!= NULL
);
13354 if (dtrace_anon
.dta_enabling
!= NULL
) {
13358 * dtrace_enabling_retain() can only fail because we are
13359 * trying to retain more enablings than are allowed -- but
13360 * we only have one anonymous enabling, and we are guaranteed
13361 * to be allowed at least one retained enabling; we assert
13362 * that dtrace_enabling_retain() returns success.
13364 rval
= dtrace_enabling_retain(dtrace_anon
.dta_enabling
);
13367 dtrace_enabling_dump(dtrace_anon
.dta_enabling
);
13372 * DTrace Helper Functions
13375 dtrace_helper_trace(dtrace_helper_action_t
*helper
,
13376 dtrace_mstate_t
*mstate
, dtrace_vstate_t
*vstate
, int where
)
13378 uint32_t size
, next
, nnext
, i
;
13379 dtrace_helptrace_t
*ent
;
13380 uint16_t flags
= cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
13382 if (!dtrace_helptrace_enabled
)
13385 ASSERT(vstate
->dtvs_nlocals
<= dtrace_helptrace_nlocals
);
13388 * What would a tracing framework be without its own tracing
13389 * framework? (Well, a hell of a lot simpler, for starters...)
13391 size
= sizeof (dtrace_helptrace_t
) + dtrace_helptrace_nlocals
*
13392 sizeof (uint64_t) - sizeof (uint64_t);
13395 * Iterate until we can allocate a slot in the trace buffer.
13398 next
= dtrace_helptrace_next
;
13400 if (next
+ size
< dtrace_helptrace_bufsize
) {
13401 nnext
= next
+ size
;
13405 } while (dtrace_cas32(&dtrace_helptrace_next
, next
, nnext
) != next
);
13408 * We have our slot; fill it in.
13413 ent
= (dtrace_helptrace_t
*)&dtrace_helptrace_buffer
[next
];
13414 ent
->dtht_helper
= helper
;
13415 ent
->dtht_where
= where
;
13416 ent
->dtht_nlocals
= vstate
->dtvs_nlocals
;
13418 ent
->dtht_fltoffs
= (mstate
->dtms_present
& DTRACE_MSTATE_FLTOFFS
) ?
13419 mstate
->dtms_fltoffs
: -1;
13420 ent
->dtht_fault
= DTRACE_FLAGS2FLT(flags
);
13421 ent
->dtht_illval
= cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
;
13423 for (i
= 0; i
< vstate
->dtvs_nlocals
; i
++) {
13424 dtrace_statvar_t
*svar
;
13426 if ((svar
= vstate
->dtvs_locals
[i
]) == NULL
)
13429 ASSERT(svar
->dtsv_size
>= NCPU
* sizeof (uint64_t));
13430 ent
->dtht_locals
[i
] =
13431 ((uint64_t *)(uintptr_t)svar
->dtsv_data
)[CPU
->cpu_id
];
13436 dtrace_helper(int which
, dtrace_mstate_t
*mstate
,
13437 dtrace_state_t
*state
, uint64_t arg0
, uint64_t arg1
)
13439 uint16_t *flags
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
13440 uint64_t sarg0
= mstate
->dtms_arg
[0];
13441 uint64_t sarg1
= mstate
->dtms_arg
[1];
13443 dtrace_helpers_t
*helpers
= curproc
->p_dtrace_helpers
;
13444 dtrace_helper_action_t
*helper
;
13445 dtrace_vstate_t
*vstate
;
13446 dtrace_difo_t
*pred
;
13447 int i
, trace
= dtrace_helptrace_enabled
;
13449 ASSERT(which
>= 0 && which
< DTRACE_NHELPER_ACTIONS
);
13451 if (helpers
== NULL
)
13454 if ((helper
= helpers
->dthps_actions
[which
]) == NULL
)
13457 vstate
= &helpers
->dthps_vstate
;
13458 mstate
->dtms_arg
[0] = arg0
;
13459 mstate
->dtms_arg
[1] = arg1
;
13462 * Now iterate over each helper. If its predicate evaluates to 'true',
13463 * we'll call the corresponding actions. Note that the below calls
13464 * to dtrace_dif_emulate() may set faults in machine state. This is
13465 * okay: our caller (the outer dtrace_dif_emulate()) will simply plow
13466 * the stored DIF offset with its own (which is the desired behavior).
13467 * Also, note the calls to dtrace_dif_emulate() may allocate scratch
13468 * from machine state; this is okay, too.
13470 for (; helper
!= NULL
; helper
= helper
->dtha_next
) {
13471 if ((pred
= helper
->dtha_predicate
) != NULL
) {
13473 dtrace_helper_trace(helper
, mstate
, vstate
, 0);
13475 if (!dtrace_dif_emulate(pred
, mstate
, vstate
, state
))
13478 if (*flags
& CPU_DTRACE_FAULT
)
13482 for (i
= 0; i
< helper
->dtha_nactions
; i
++) {
13484 dtrace_helper_trace(helper
,
13485 mstate
, vstate
, i
+ 1);
13487 rval
= dtrace_dif_emulate(helper
->dtha_actions
[i
],
13488 mstate
, vstate
, state
);
13490 if (*flags
& CPU_DTRACE_FAULT
)
13496 dtrace_helper_trace(helper
, mstate
, vstate
,
13497 DTRACE_HELPTRACE_NEXT
);
13501 dtrace_helper_trace(helper
, mstate
, vstate
,
13502 DTRACE_HELPTRACE_DONE
);
13505 * Restore the arg0 that we saved upon entry.
13507 mstate
->dtms_arg
[0] = sarg0
;
13508 mstate
->dtms_arg
[1] = sarg1
;
13514 dtrace_helper_trace(helper
, mstate
, vstate
,
13515 DTRACE_HELPTRACE_ERR
);
13518 * Restore the arg0 that we saved upon entry.
13520 mstate
->dtms_arg
[0] = sarg0
;
13521 mstate
->dtms_arg
[1] = sarg1
;
13527 dtrace_helper_action_destroy(dtrace_helper_action_t
*helper
,
13528 dtrace_vstate_t
*vstate
)
13532 if (helper
->dtha_predicate
!= NULL
)
13533 dtrace_difo_release(helper
->dtha_predicate
, vstate
);
13535 for (i
= 0; i
< helper
->dtha_nactions
; i
++) {
13536 ASSERT(helper
->dtha_actions
[i
] != NULL
);
13537 dtrace_difo_release(helper
->dtha_actions
[i
], vstate
);
13540 kmem_free(helper
->dtha_actions
,
13541 helper
->dtha_nactions
* sizeof (dtrace_difo_t
*));
13542 kmem_free(helper
, sizeof (dtrace_helper_action_t
));
13546 dtrace_helper_destroygen(int gen
)
13548 proc_t
*p
= curproc
;
13549 dtrace_helpers_t
*help
= p
->p_dtrace_helpers
;
13550 dtrace_vstate_t
*vstate
;
13553 ASSERT(MUTEX_HELD(&dtrace_lock
));
13555 if (help
== NULL
|| gen
> help
->dthps_generation
)
13558 vstate
= &help
->dthps_vstate
;
13560 for (i
= 0; i
< DTRACE_NHELPER_ACTIONS
; i
++) {
13561 dtrace_helper_action_t
*last
= NULL
, *h
, *next
;
13563 for (h
= help
->dthps_actions
[i
]; h
!= NULL
; h
= next
) {
13564 next
= h
->dtha_next
;
13566 if (h
->dtha_generation
== gen
) {
13567 if (last
!= NULL
) {
13568 last
->dtha_next
= next
;
13570 help
->dthps_actions
[i
] = next
;
13573 dtrace_helper_action_destroy(h
, vstate
);
13581 * Interate until we've cleared out all helper providers with the
13582 * given generation number.
13585 dtrace_helper_provider_t
*prov
;
13588 * Look for a helper provider with the right generation. We
13589 * have to start back at the beginning of the list each time
13590 * because we drop dtrace_lock. It's unlikely that we'll make
13591 * more than two passes.
13593 for (i
= 0; i
< help
->dthps_nprovs
; i
++) {
13594 prov
= help
->dthps_provs
[i
];
13596 if (prov
->dthp_generation
== gen
)
13601 * If there were no matches, we're done.
13603 if (i
== help
->dthps_nprovs
)
13607 * Move the last helper provider into this slot.
13609 help
->dthps_nprovs
--;
13610 help
->dthps_provs
[i
] = help
->dthps_provs
[help
->dthps_nprovs
];
13611 help
->dthps_provs
[help
->dthps_nprovs
] = NULL
;
13613 mutex_exit(&dtrace_lock
);
13616 * If we have a meta provider, remove this helper provider.
13618 mutex_enter(&dtrace_meta_lock
);
13619 if (dtrace_meta_pid
!= NULL
) {
13620 ASSERT(dtrace_deferred_pid
== NULL
);
13621 dtrace_helper_provider_remove(&prov
->dthp_prov
,
13624 mutex_exit(&dtrace_meta_lock
);
13626 dtrace_helper_provider_destroy(prov
);
13628 mutex_enter(&dtrace_lock
);
13635 dtrace_helper_validate(dtrace_helper_action_t
*helper
)
13640 if ((dp
= helper
->dtha_predicate
) != NULL
)
13641 err
+= dtrace_difo_validate_helper(dp
);
13643 for (i
= 0; i
< helper
->dtha_nactions
; i
++)
13644 err
+= dtrace_difo_validate_helper(helper
->dtha_actions
[i
]);
13650 dtrace_helper_action_add(int which
, dtrace_ecbdesc_t
*ep
)
13652 dtrace_helpers_t
*help
;
13653 dtrace_helper_action_t
*helper
, *last
;
13654 dtrace_actdesc_t
*act
;
13655 dtrace_vstate_t
*vstate
;
13656 dtrace_predicate_t
*pred
;
13657 int count
= 0, nactions
= 0, i
;
13659 if (which
< 0 || which
>= DTRACE_NHELPER_ACTIONS
)
13662 help
= curproc
->p_dtrace_helpers
;
13663 last
= help
->dthps_actions
[which
];
13664 vstate
= &help
->dthps_vstate
;
13666 for (count
= 0; last
!= NULL
; last
= last
->dtha_next
) {
13668 if (last
->dtha_next
== NULL
)
13673 * If we already have dtrace_helper_actions_max helper actions for this
13674 * helper action type, we'll refuse to add a new one.
13676 if (count
>= dtrace_helper_actions_max
)
13679 helper
= kmem_zalloc(sizeof (dtrace_helper_action_t
), KM_SLEEP
);
13680 helper
->dtha_generation
= help
->dthps_generation
;
13682 if ((pred
= ep
->dted_pred
.dtpdd_predicate
) != NULL
) {
13683 ASSERT(pred
->dtp_difo
!= NULL
);
13684 dtrace_difo_hold(pred
->dtp_difo
);
13685 helper
->dtha_predicate
= pred
->dtp_difo
;
13688 for (act
= ep
->dted_action
; act
!= NULL
; act
= act
->dtad_next
) {
13689 if (act
->dtad_kind
!= DTRACEACT_DIFEXPR
)
13692 if (act
->dtad_difo
== NULL
)
13698 helper
->dtha_actions
= kmem_zalloc(sizeof (dtrace_difo_t
*) *
13699 (helper
->dtha_nactions
= nactions
), KM_SLEEP
);
13701 for (act
= ep
->dted_action
, i
= 0; act
!= NULL
; act
= act
->dtad_next
) {
13702 dtrace_difo_hold(act
->dtad_difo
);
13703 helper
->dtha_actions
[i
++] = act
->dtad_difo
;
13706 if (!dtrace_helper_validate(helper
))
13709 if (last
== NULL
) {
13710 help
->dthps_actions
[which
] = helper
;
13712 last
->dtha_next
= helper
;
13715 if (vstate
->dtvs_nlocals
> dtrace_helptrace_nlocals
) {
13716 dtrace_helptrace_nlocals
= vstate
->dtvs_nlocals
;
13717 dtrace_helptrace_next
= 0;
13722 dtrace_helper_action_destroy(helper
, vstate
);
13727 dtrace_helper_provider_register(proc_t
*p
, dtrace_helpers_t
*help
,
13728 dof_helper_t
*dofhp
)
13730 ASSERT(MUTEX_NOT_HELD(&dtrace_lock
));
13732 mutex_enter(&dtrace_meta_lock
);
13733 mutex_enter(&dtrace_lock
);
13735 if (!dtrace_attached() || dtrace_meta_pid
== NULL
) {
13737 * If the dtrace module is loaded but not attached, or if
13738 * there aren't isn't a meta provider registered to deal with
13739 * these provider descriptions, we need to postpone creating
13740 * the actual providers until later.
13743 if (help
->dthps_next
== NULL
&& help
->dthps_prev
== NULL
&&
13744 dtrace_deferred_pid
!= help
) {
13745 help
->dthps_deferred
= 1;
13746 help
->dthps_pid
= p
->p_pid
;
13747 help
->dthps_next
= dtrace_deferred_pid
;
13748 help
->dthps_prev
= NULL
;
13749 if (dtrace_deferred_pid
!= NULL
)
13750 dtrace_deferred_pid
->dthps_prev
= help
;
13751 dtrace_deferred_pid
= help
;
13754 mutex_exit(&dtrace_lock
);
13756 } else if (dofhp
!= NULL
) {
13758 * If the dtrace module is loaded and we have a particular
13759 * helper provider description, pass that off to the
13763 mutex_exit(&dtrace_lock
);
13765 dtrace_helper_provide(dofhp
, p
->p_pid
);
13769 * Otherwise, just pass all the helper provider descriptions
13770 * off to the meta provider.
13774 mutex_exit(&dtrace_lock
);
13776 for (i
= 0; i
< help
->dthps_nprovs
; i
++) {
13777 dtrace_helper_provide(&help
->dthps_provs
[i
]->dthp_prov
,
13782 mutex_exit(&dtrace_meta_lock
);
13786 dtrace_helper_provider_add(dof_helper_t
*dofhp
, int gen
)
13788 dtrace_helpers_t
*help
;
13789 dtrace_helper_provider_t
*hprov
, **tmp_provs
;
13790 uint_t tmp_maxprovs
, i
;
13792 ASSERT(MUTEX_HELD(&dtrace_lock
));
13794 help
= curproc
->p_dtrace_helpers
;
13795 ASSERT(help
!= NULL
);
13798 * If we already have dtrace_helper_providers_max helper providers,
13799 * we're refuse to add a new one.
13801 if (help
->dthps_nprovs
>= dtrace_helper_providers_max
)
13805 * Check to make sure this isn't a duplicate.
13807 for (i
= 0; i
< help
->dthps_nprovs
; i
++) {
13808 if (dofhp
->dofhp_addr
==
13809 help
->dthps_provs
[i
]->dthp_prov
.dofhp_addr
)
13813 hprov
= kmem_zalloc(sizeof (dtrace_helper_provider_t
), KM_SLEEP
);
13814 hprov
->dthp_prov
= *dofhp
;
13815 hprov
->dthp_ref
= 1;
13816 hprov
->dthp_generation
= gen
;
13819 * Allocate a bigger table for helper providers if it's already full.
13821 if (help
->dthps_maxprovs
== help
->dthps_nprovs
) {
13822 tmp_maxprovs
= help
->dthps_maxprovs
;
13823 tmp_provs
= help
->dthps_provs
;
13825 if (help
->dthps_maxprovs
== 0)
13826 help
->dthps_maxprovs
= 2;
13828 help
->dthps_maxprovs
*= 2;
13829 if (help
->dthps_maxprovs
> dtrace_helper_providers_max
)
13830 help
->dthps_maxprovs
= dtrace_helper_providers_max
;
13832 ASSERT(tmp_maxprovs
< help
->dthps_maxprovs
);
13834 help
->dthps_provs
= kmem_zalloc(help
->dthps_maxprovs
*
13835 sizeof (dtrace_helper_provider_t
*), KM_SLEEP
);
13837 if (tmp_provs
!= NULL
) {
13838 bcopy(tmp_provs
, help
->dthps_provs
, tmp_maxprovs
*
13839 sizeof (dtrace_helper_provider_t
*));
13840 kmem_free(tmp_provs
, tmp_maxprovs
*
13841 sizeof (dtrace_helper_provider_t
*));
13845 help
->dthps_provs
[help
->dthps_nprovs
] = hprov
;
13846 help
->dthps_nprovs
++;
13852 dtrace_helper_provider_destroy(dtrace_helper_provider_t
*hprov
)
13854 mutex_enter(&dtrace_lock
);
13856 if (--hprov
->dthp_ref
== 0) {
13858 mutex_exit(&dtrace_lock
);
13859 dof
= (dof_hdr_t
*)(uintptr_t)hprov
->dthp_prov
.dofhp_dof
;
13860 dtrace_dof_destroy(dof
);
13861 kmem_free(hprov
, sizeof (dtrace_helper_provider_t
));
13863 mutex_exit(&dtrace_lock
);
13868 dtrace_helper_provider_validate(dof_hdr_t
*dof
, dof_sec_t
*sec
)
13870 uintptr_t daddr
= (uintptr_t)dof
;
13871 dof_sec_t
*str_sec
, *prb_sec
, *arg_sec
, *off_sec
, *enoff_sec
;
13872 dof_provider_t
*provider
;
13873 dof_probe_t
*probe
;
13875 char *strtab
, *typestr
;
13876 dof_stridx_t typeidx
;
13878 uint_t nprobes
, j
, k
;
13880 ASSERT(sec
->dofs_type
== DOF_SECT_PROVIDER
);
13882 if (sec
->dofs_offset
& (sizeof (uint_t
) - 1)) {
13883 dtrace_dof_error(dof
, "misaligned section offset");
13888 * The section needs to be large enough to contain the DOF provider
13889 * structure appropriate for the given version.
13891 if (sec
->dofs_size
<
13892 ((dof
->dofh_ident
[DOF_ID_VERSION
] == DOF_VERSION_1
) ?
13893 offsetof(dof_provider_t
, dofpv_prenoffs
) :
13894 sizeof (dof_provider_t
))) {
13895 dtrace_dof_error(dof
, "provider section too small");
13899 provider
= (dof_provider_t
*)(uintptr_t)(daddr
+ sec
->dofs_offset
);
13900 str_sec
= dtrace_dof_sect(dof
, DOF_SECT_STRTAB
, provider
->dofpv_strtab
);
13901 prb_sec
= dtrace_dof_sect(dof
, DOF_SECT_PROBES
, provider
->dofpv_probes
);
13902 arg_sec
= dtrace_dof_sect(dof
, DOF_SECT_PRARGS
, provider
->dofpv_prargs
);
13903 off_sec
= dtrace_dof_sect(dof
, DOF_SECT_PROFFS
, provider
->dofpv_proffs
);
13905 if (str_sec
== NULL
|| prb_sec
== NULL
||
13906 arg_sec
== NULL
|| off_sec
== NULL
)
13911 if (dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_1
&&
13912 provider
->dofpv_prenoffs
!= DOF_SECT_NONE
&&
13913 (enoff_sec
= dtrace_dof_sect(dof
, DOF_SECT_PRENOFFS
,
13914 provider
->dofpv_prenoffs
)) == NULL
)
13917 strtab
= (char *)(uintptr_t)(daddr
+ str_sec
->dofs_offset
);
13919 if (provider
->dofpv_name
>= str_sec
->dofs_size
||
13920 strlen(strtab
+ provider
->dofpv_name
) >= DTRACE_PROVNAMELEN
) {
13921 dtrace_dof_error(dof
, "invalid provider name");
13925 if (prb_sec
->dofs_entsize
== 0 ||
13926 prb_sec
->dofs_entsize
> prb_sec
->dofs_size
) {
13927 dtrace_dof_error(dof
, "invalid entry size");
13931 if (prb_sec
->dofs_entsize
& (sizeof (uintptr_t) - 1)) {
13932 dtrace_dof_error(dof
, "misaligned entry size");
13936 if (off_sec
->dofs_entsize
!= sizeof (uint32_t)) {
13937 dtrace_dof_error(dof
, "invalid entry size");
13941 if (off_sec
->dofs_offset
& (sizeof (uint32_t) - 1)) {
13942 dtrace_dof_error(dof
, "misaligned section offset");
13946 if (arg_sec
->dofs_entsize
!= sizeof (uint8_t)) {
13947 dtrace_dof_error(dof
, "invalid entry size");
13951 arg
= (uint8_t *)(uintptr_t)(daddr
+ arg_sec
->dofs_offset
);
13953 nprobes
= prb_sec
->dofs_size
/ prb_sec
->dofs_entsize
;
13956 * Take a pass through the probes to check for errors.
13958 for (j
= 0; j
< nprobes
; j
++) {
13959 probe
= (dof_probe_t
*)(uintptr_t)(daddr
+
13960 prb_sec
->dofs_offset
+ j
* prb_sec
->dofs_entsize
);
13962 if (probe
->dofpr_func
>= str_sec
->dofs_size
) {
13963 dtrace_dof_error(dof
, "invalid function name");
13967 if (strlen(strtab
+ probe
->dofpr_func
) >= DTRACE_FUNCNAMELEN
) {
13968 dtrace_dof_error(dof
, "function name too long");
13972 if (probe
->dofpr_name
>= str_sec
->dofs_size
||
13973 strlen(strtab
+ probe
->dofpr_name
) >= DTRACE_NAMELEN
) {
13974 dtrace_dof_error(dof
, "invalid probe name");
13979 * The offset count must not wrap the index, and the offsets
13980 * must also not overflow the section's data.
13982 if (probe
->dofpr_offidx
+ probe
->dofpr_noffs
<
13983 probe
->dofpr_offidx
||
13984 (probe
->dofpr_offidx
+ probe
->dofpr_noffs
) *
13985 off_sec
->dofs_entsize
> off_sec
->dofs_size
) {
13986 dtrace_dof_error(dof
, "invalid probe offset");
13990 if (dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_1
) {
13992 * If there's no is-enabled offset section, make sure
13993 * there aren't any is-enabled offsets. Otherwise
13994 * perform the same checks as for probe offsets
13995 * (immediately above).
13997 if (enoff_sec
== NULL
) {
13998 if (probe
->dofpr_enoffidx
!= 0 ||
13999 probe
->dofpr_nenoffs
!= 0) {
14000 dtrace_dof_error(dof
, "is-enabled "
14001 "offsets with null section");
14004 } else if (probe
->dofpr_enoffidx
+
14005 probe
->dofpr_nenoffs
< probe
->dofpr_enoffidx
||
14006 (probe
->dofpr_enoffidx
+ probe
->dofpr_nenoffs
) *
14007 enoff_sec
->dofs_entsize
> enoff_sec
->dofs_size
) {
14008 dtrace_dof_error(dof
, "invalid is-enabled "
14013 if (probe
->dofpr_noffs
+ probe
->dofpr_nenoffs
== 0) {
14014 dtrace_dof_error(dof
, "zero probe and "
14015 "is-enabled offsets");
14018 } else if (probe
->dofpr_noffs
== 0) {
14019 dtrace_dof_error(dof
, "zero probe offsets");
14023 if (probe
->dofpr_argidx
+ probe
->dofpr_xargc
<
14024 probe
->dofpr_argidx
||
14025 (probe
->dofpr_argidx
+ probe
->dofpr_xargc
) *
14026 arg_sec
->dofs_entsize
> arg_sec
->dofs_size
) {
14027 dtrace_dof_error(dof
, "invalid args");
14031 typeidx
= probe
->dofpr_nargv
;
14032 typestr
= strtab
+ probe
->dofpr_nargv
;
14033 for (k
= 0; k
< probe
->dofpr_nargc
; k
++) {
14034 if (typeidx
>= str_sec
->dofs_size
) {
14035 dtrace_dof_error(dof
, "bad "
14036 "native argument type");
14040 typesz
= strlen(typestr
) + 1;
14041 if (typesz
> DTRACE_ARGTYPELEN
) {
14042 dtrace_dof_error(dof
, "native "
14043 "argument type too long");
14050 typeidx
= probe
->dofpr_xargv
;
14051 typestr
= strtab
+ probe
->dofpr_xargv
;
14052 for (k
= 0; k
< probe
->dofpr_xargc
; k
++) {
14053 if (arg
[probe
->dofpr_argidx
+ k
] > probe
->dofpr_nargc
) {
14054 dtrace_dof_error(dof
, "bad "
14055 "native argument index");
14059 if (typeidx
>= str_sec
->dofs_size
) {
14060 dtrace_dof_error(dof
, "bad "
14061 "translated argument type");
14065 typesz
= strlen(typestr
) + 1;
14066 if (typesz
> DTRACE_ARGTYPELEN
) {
14067 dtrace_dof_error(dof
, "translated argument "
14081 dtrace_helper_slurp(dof_hdr_t
*dof
, dof_helper_t
*dhp
)
14083 dtrace_helpers_t
*help
;
14084 dtrace_vstate_t
*vstate
;
14085 dtrace_enabling_t
*enab
= NULL
;
14086 int i
, gen
, rv
, nhelpers
= 0, nprovs
= 0, destroy
= 1;
14087 uintptr_t daddr
= (uintptr_t)dof
;
14089 ASSERT(MUTEX_HELD(&dtrace_lock
));
14091 if ((help
= curproc
->p_dtrace_helpers
) == NULL
)
14092 help
= dtrace_helpers_create(curproc
);
14094 vstate
= &help
->dthps_vstate
;
14096 if ((rv
= dtrace_dof_slurp(dof
, vstate
, NULL
, &enab
,
14097 dhp
!= NULL
? dhp
->dofhp_addr
: 0, B_FALSE
)) != 0) {
14098 dtrace_dof_destroy(dof
);
14103 * Look for helper providers and validate their descriptions.
14106 for (i
= 0; i
< dof
->dofh_secnum
; i
++) {
14107 dof_sec_t
*sec
= (dof_sec_t
*)(uintptr_t)(daddr
+
14108 dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
14110 if (sec
->dofs_type
!= DOF_SECT_PROVIDER
)
14113 if (dtrace_helper_provider_validate(dof
, sec
) != 0) {
14114 dtrace_enabling_destroy(enab
);
14115 dtrace_dof_destroy(dof
);
14124 * Now we need to walk through the ECB descriptions in the enabling.
14126 for (i
= 0; i
< enab
->dten_ndesc
; i
++) {
14127 dtrace_ecbdesc_t
*ep
= enab
->dten_desc
[i
];
14128 dtrace_probedesc_t
*desc
= &ep
->dted_probe
;
14130 if (strcmp(desc
->dtpd_provider
, "dtrace") != 0)
14133 if (strcmp(desc
->dtpd_mod
, "helper") != 0)
14136 if (strcmp(desc
->dtpd_func
, "ustack") != 0)
14139 if ((rv
= dtrace_helper_action_add(DTRACE_HELPER_ACTION_USTACK
,
14142 * Adding this helper action failed -- we are now going
14143 * to rip out the entire generation and return failure.
14145 (void) dtrace_helper_destroygen(help
->dthps_generation
);
14146 dtrace_enabling_destroy(enab
);
14147 dtrace_dof_destroy(dof
);
14154 if (nhelpers
< enab
->dten_ndesc
)
14155 dtrace_dof_error(dof
, "unmatched helpers");
14157 gen
= help
->dthps_generation
++;
14158 dtrace_enabling_destroy(enab
);
14160 if (dhp
!= NULL
&& nprovs
> 0) {
14161 dhp
->dofhp_dof
= (uint64_t)(uintptr_t)dof
;
14162 if (dtrace_helper_provider_add(dhp
, gen
) == 0) {
14163 mutex_exit(&dtrace_lock
);
14164 dtrace_helper_provider_register(curproc
, help
, dhp
);
14165 mutex_enter(&dtrace_lock
);
14172 dtrace_dof_destroy(dof
);
14177 static dtrace_helpers_t
*
14178 dtrace_helpers_create(proc_t
*p
)
14180 dtrace_helpers_t
*help
;
14182 ASSERT(MUTEX_HELD(&dtrace_lock
));
14183 ASSERT(p
->p_dtrace_helpers
== NULL
);
14185 help
= kmem_zalloc(sizeof (dtrace_helpers_t
), KM_SLEEP
);
14186 help
->dthps_actions
= kmem_zalloc(sizeof (dtrace_helper_action_t
*) *
14187 DTRACE_NHELPER_ACTIONS
, KM_SLEEP
);
14189 p
->p_dtrace_helpers
= help
;
14196 dtrace_helpers_destroy(void)
14198 dtrace_helpers_t
*help
;
14199 dtrace_vstate_t
*vstate
;
14200 proc_t
*p
= curproc
;
14203 mutex_enter(&dtrace_lock
);
14205 ASSERT(p
->p_dtrace_helpers
!= NULL
);
14206 ASSERT(dtrace_helpers
> 0);
14208 help
= p
->p_dtrace_helpers
;
14209 vstate
= &help
->dthps_vstate
;
14212 * We're now going to lose the help from this process.
14214 p
->p_dtrace_helpers
= NULL
;
14218 * Destory the helper actions.
14220 for (i
= 0; i
< DTRACE_NHELPER_ACTIONS
; i
++) {
14221 dtrace_helper_action_t
*h
, *next
;
14223 for (h
= help
->dthps_actions
[i
]; h
!= NULL
; h
= next
) {
14224 next
= h
->dtha_next
;
14225 dtrace_helper_action_destroy(h
, vstate
);
14230 mutex_exit(&dtrace_lock
);
14233 * Destroy the helper providers.
14235 if (help
->dthps_maxprovs
> 0) {
14236 mutex_enter(&dtrace_meta_lock
);
14237 if (dtrace_meta_pid
!= NULL
) {
14238 ASSERT(dtrace_deferred_pid
== NULL
);
14240 for (i
= 0; i
< help
->dthps_nprovs
; i
++) {
14241 dtrace_helper_provider_remove(
14242 &help
->dthps_provs
[i
]->dthp_prov
, p
->p_pid
);
14245 mutex_enter(&dtrace_lock
);
14246 ASSERT(help
->dthps_deferred
== 0 ||
14247 help
->dthps_next
!= NULL
||
14248 help
->dthps_prev
!= NULL
||
14249 help
== dtrace_deferred_pid
);
14252 * Remove the helper from the deferred list.
14254 if (help
->dthps_next
!= NULL
)
14255 help
->dthps_next
->dthps_prev
= help
->dthps_prev
;
14256 if (help
->dthps_prev
!= NULL
)
14257 help
->dthps_prev
->dthps_next
= help
->dthps_next
;
14258 if (dtrace_deferred_pid
== help
) {
14259 dtrace_deferred_pid
= help
->dthps_next
;
14260 ASSERT(help
->dthps_prev
== NULL
);
14263 mutex_exit(&dtrace_lock
);
14266 mutex_exit(&dtrace_meta_lock
);
14268 for (i
= 0; i
< help
->dthps_nprovs
; i
++) {
14269 dtrace_helper_provider_destroy(help
->dthps_provs
[i
]);
14272 kmem_free(help
->dthps_provs
, help
->dthps_maxprovs
*
14273 sizeof (dtrace_helper_provider_t
*));
14276 mutex_enter(&dtrace_lock
);
14278 dtrace_vstate_fini(&help
->dthps_vstate
);
14279 kmem_free(help
->dthps_actions
,
14280 sizeof (dtrace_helper_action_t
*) * DTRACE_NHELPER_ACTIONS
);
14281 kmem_free(help
, sizeof (dtrace_helpers_t
));
14284 mutex_exit(&dtrace_lock
);
14288 dtrace_helpers_duplicate(proc_t
*from
, proc_t
*to
)
14290 dtrace_helpers_t
*help
, *newhelp
;
14291 dtrace_helper_action_t
*helper
, *new, *last
;
14293 dtrace_vstate_t
*vstate
;
14294 int i
, j
, sz
, hasprovs
= 0;
14296 mutex_enter(&dtrace_lock
);
14297 ASSERT(from
->p_dtrace_helpers
!= NULL
);
14298 ASSERT(dtrace_helpers
> 0);
14300 help
= from
->p_dtrace_helpers
;
14301 newhelp
= dtrace_helpers_create(to
);
14302 ASSERT(to
->p_dtrace_helpers
!= NULL
);
14304 newhelp
->dthps_generation
= help
->dthps_generation
;
14305 vstate
= &newhelp
->dthps_vstate
;
14308 * Duplicate the helper actions.
14310 for (i
= 0; i
< DTRACE_NHELPER_ACTIONS
; i
++) {
14311 if ((helper
= help
->dthps_actions
[i
]) == NULL
)
14314 for (last
= NULL
; helper
!= NULL
; helper
= helper
->dtha_next
) {
14315 new = kmem_zalloc(sizeof (dtrace_helper_action_t
),
14317 new->dtha_generation
= helper
->dtha_generation
;
14319 if ((dp
= helper
->dtha_predicate
) != NULL
) {
14320 dp
= dtrace_difo_duplicate(dp
, vstate
);
14321 new->dtha_predicate
= dp
;
14324 new->dtha_nactions
= helper
->dtha_nactions
;
14325 sz
= sizeof (dtrace_difo_t
*) * new->dtha_nactions
;
14326 new->dtha_actions
= kmem_alloc(sz
, KM_SLEEP
);
14328 for (j
= 0; j
< new->dtha_nactions
; j
++) {
14329 dtrace_difo_t
*dp
= helper
->dtha_actions
[j
];
14331 ASSERT(dp
!= NULL
);
14332 dp
= dtrace_difo_duplicate(dp
, vstate
);
14333 new->dtha_actions
[j
] = dp
;
14336 if (last
!= NULL
) {
14337 last
->dtha_next
= new;
14339 newhelp
->dthps_actions
[i
] = new;
14347 * Duplicate the helper providers and register them with the
14348 * DTrace framework.
14350 if (help
->dthps_nprovs
> 0) {
14351 newhelp
->dthps_nprovs
= help
->dthps_nprovs
;
14352 newhelp
->dthps_maxprovs
= help
->dthps_nprovs
;
14353 newhelp
->dthps_provs
= kmem_alloc(newhelp
->dthps_nprovs
*
14354 sizeof (dtrace_helper_provider_t
*), KM_SLEEP
);
14355 for (i
= 0; i
< newhelp
->dthps_nprovs
; i
++) {
14356 newhelp
->dthps_provs
[i
] = help
->dthps_provs
[i
];
14357 newhelp
->dthps_provs
[i
]->dthp_ref
++;
14363 mutex_exit(&dtrace_lock
);
14366 dtrace_helper_provider_register(to
, newhelp
, NULL
);
14370 * DTrace Hook Functions
14373 dtrace_module_loaded(struct modctl
*ctl
)
14375 dtrace_provider_t
*prv
;
14377 mutex_enter(&dtrace_provider_lock
);
14378 mutex_enter(&mod_lock
);
14380 ASSERT(ctl
->mod_busy
);
14383 * We're going to call each providers per-module provide operation
14384 * specifying only this module.
14386 for (prv
= dtrace_provider
; prv
!= NULL
; prv
= prv
->dtpv_next
)
14387 prv
->dtpv_pops
.dtps_provide_module(prv
->dtpv_arg
, ctl
);
14389 mutex_exit(&mod_lock
);
14390 mutex_exit(&dtrace_provider_lock
);
14393 * If we have any retained enablings, we need to match against them.
14394 * Enabling probes requires that cpu_lock be held, and we cannot hold
14395 * cpu_lock here -- it is legal for cpu_lock to be held when loading a
14396 * module. (In particular, this happens when loading scheduling
14397 * classes.) So if we have any retained enablings, we need to dispatch
14398 * our task queue to do the match for us.
14400 mutex_enter(&dtrace_lock
);
14402 if (dtrace_retained
== NULL
) {
14403 mutex_exit(&dtrace_lock
);
14407 (void) taskq_dispatch(dtrace_taskq
,
14408 (task_func_t
*)dtrace_enabling_matchall
, NULL
, TQ_SLEEP
);
14410 mutex_exit(&dtrace_lock
);
14413 * And now, for a little heuristic sleaze: in general, we want to
14414 * match modules as soon as they load. However, we cannot guarantee
14415 * this, because it would lead us to the lock ordering violation
14416 * outlined above. The common case, of course, is that cpu_lock is
14417 * _not_ held -- so we delay here for a clock tick, hoping that that's
14418 * long enough for the task queue to do its work. If it's not, it's
14419 * not a serious problem -- it just means that the module that we
14420 * just loaded may not be immediately instrumentable.
14426 dtrace_module_unloaded(struct modctl
*ctl
)
14428 dtrace_probe_t
template, *probe
, *first
, *next
;
14429 dtrace_provider_t
*prov
;
14431 template.dtpr_mod
= ctl
->mod_modname
;
14433 mutex_enter(&dtrace_provider_lock
);
14434 mutex_enter(&mod_lock
);
14435 mutex_enter(&dtrace_lock
);
14437 if (dtrace_bymod
== NULL
) {
14439 * The DTrace module is loaded (obviously) but not attached;
14440 * we don't have any work to do.
14442 mutex_exit(&dtrace_provider_lock
);
14443 mutex_exit(&mod_lock
);
14444 mutex_exit(&dtrace_lock
);
14448 for (probe
= first
= dtrace_hash_lookup(dtrace_bymod
, &template);
14449 probe
!= NULL
; probe
= probe
->dtpr_nextmod
) {
14450 if (probe
->dtpr_ecb
!= NULL
) {
14451 mutex_exit(&dtrace_provider_lock
);
14452 mutex_exit(&mod_lock
);
14453 mutex_exit(&dtrace_lock
);
14456 * This shouldn't _actually_ be possible -- we're
14457 * unloading a module that has an enabled probe in it.
14458 * (It's normally up to the provider to make sure that
14459 * this can't happen.) However, because dtps_enable()
14460 * doesn't have a failure mode, there can be an
14461 * enable/unload race. Upshot: we don't want to
14462 * assert, but we're not going to disable the
14465 if (dtrace_err_verbose
) {
14466 cmn_err(CE_WARN
, "unloaded module '%s' had "
14467 "enabled probes", ctl
->mod_modname
);
14476 for (first
= NULL
; probe
!= NULL
; probe
= next
) {
14477 ASSERT(dtrace_probes
[probe
->dtpr_id
- 1] == probe
);
14479 dtrace_probes
[probe
->dtpr_id
- 1] = NULL
;
14481 next
= probe
->dtpr_nextmod
;
14482 dtrace_hash_remove(dtrace_bymod
, probe
);
14483 dtrace_hash_remove(dtrace_byfunc
, probe
);
14484 dtrace_hash_remove(dtrace_byname
, probe
);
14486 if (first
== NULL
) {
14488 probe
->dtpr_nextmod
= NULL
;
14490 probe
->dtpr_nextmod
= first
;
14496 * We've removed all of the module's probes from the hash chains and
14497 * from the probe array. Now issue a dtrace_sync() to be sure that
14498 * everyone has cleared out from any probe array processing.
14502 for (probe
= first
; probe
!= NULL
; probe
= first
) {
14503 first
= probe
->dtpr_nextmod
;
14504 prov
= probe
->dtpr_provider
;
14505 prov
->dtpv_pops
.dtps_destroy(prov
->dtpv_arg
, probe
->dtpr_id
,
14507 kmem_free(probe
->dtpr_mod
, strlen(probe
->dtpr_mod
) + 1);
14508 kmem_free(probe
->dtpr_func
, strlen(probe
->dtpr_func
) + 1);
14509 kmem_free(probe
->dtpr_name
, strlen(probe
->dtpr_name
) + 1);
14510 vmem_free(dtrace_arena
, (void *)(uintptr_t)probe
->dtpr_id
, 1);
14511 kmem_free(probe
, sizeof (dtrace_probe_t
));
14514 mutex_exit(&dtrace_lock
);
14515 mutex_exit(&mod_lock
);
14516 mutex_exit(&dtrace_provider_lock
);
14520 dtrace_suspend(void)
14522 dtrace_probe_foreach(offsetof(dtrace_pops_t
, dtps_suspend
));
14526 dtrace_resume(void)
14528 dtrace_probe_foreach(offsetof(dtrace_pops_t
, dtps_resume
));
14532 dtrace_cpu_setup(cpu_setup_t what
, processorid_t cpu
)
14534 ASSERT(MUTEX_HELD(&cpu_lock
));
14535 mutex_enter(&dtrace_lock
);
14539 dtrace_state_t
*state
;
14540 dtrace_optval_t
*opt
, rs
, c
;
14543 * For now, we only allocate a new buffer for anonymous state.
14545 if ((state
= dtrace_anon
.dta_state
) == NULL
)
14548 if (state
->dts_activity
!= DTRACE_ACTIVITY_ACTIVE
)
14551 opt
= state
->dts_options
;
14552 c
= opt
[DTRACEOPT_CPU
];
14554 if (c
!= DTRACE_CPUALL
&& c
!= DTRACEOPT_UNSET
&& c
!= cpu
)
14558 * Regardless of what the actual policy is, we're going to
14559 * temporarily set our resize policy to be manual. We're
14560 * also going to temporarily set our CPU option to denote
14561 * the newly configured CPU.
14563 rs
= opt
[DTRACEOPT_BUFRESIZE
];
14564 opt
[DTRACEOPT_BUFRESIZE
] = DTRACEOPT_BUFRESIZE_MANUAL
;
14565 opt
[DTRACEOPT_CPU
] = (dtrace_optval_t
)cpu
;
14567 (void) dtrace_state_buffers(state
);
14569 opt
[DTRACEOPT_BUFRESIZE
] = rs
;
14570 opt
[DTRACEOPT_CPU
] = c
;
14577 * We don't free the buffer in the CPU_UNCONFIG case. (The
14578 * buffer will be freed when the consumer exits.)
14586 mutex_exit(&dtrace_lock
);
14591 dtrace_cpu_setup_initial(processorid_t cpu
)
14593 (void) dtrace_cpu_setup(CPU_CONFIG
, cpu
);
14597 dtrace_toxrange_add(uintptr_t base
, uintptr_t limit
)
14599 if (dtrace_toxranges
>= dtrace_toxranges_max
) {
14601 dtrace_toxrange_t
*range
;
14603 osize
= dtrace_toxranges_max
* sizeof (dtrace_toxrange_t
);
14606 ASSERT(dtrace_toxrange
== NULL
);
14607 ASSERT(dtrace_toxranges_max
== 0);
14608 dtrace_toxranges_max
= 1;
14610 dtrace_toxranges_max
<<= 1;
14613 nsize
= dtrace_toxranges_max
* sizeof (dtrace_toxrange_t
);
14614 range
= kmem_zalloc(nsize
, KM_SLEEP
);
14616 if (dtrace_toxrange
!= NULL
) {
14617 ASSERT(osize
!= 0);
14618 bcopy(dtrace_toxrange
, range
, osize
);
14619 kmem_free(dtrace_toxrange
, osize
);
14622 dtrace_toxrange
= range
;
14625 ASSERT(dtrace_toxrange
[dtrace_toxranges
].dtt_base
== NULL
);
14626 ASSERT(dtrace_toxrange
[dtrace_toxranges
].dtt_limit
== NULL
);
14628 dtrace_toxrange
[dtrace_toxranges
].dtt_base
= base
;
14629 dtrace_toxrange
[dtrace_toxranges
].dtt_limit
= limit
;
14630 dtrace_toxranges
++;
14634 * DTrace Driver Cookbook Functions
14638 dtrace_attach(dev_info_t
*devi
, ddi_attach_cmd_t cmd
)
14640 dtrace_provider_id_t id
;
14641 dtrace_state_t
*state
= NULL
;
14642 dtrace_enabling_t
*enab
;
14644 mutex_enter(&cpu_lock
);
14645 mutex_enter(&dtrace_provider_lock
);
14646 mutex_enter(&dtrace_lock
);
14648 if (ddi_soft_state_init(&dtrace_softstate
,
14649 sizeof (dtrace_state_t
), 0) != 0) {
14650 cmn_err(CE_NOTE
, "/dev/dtrace failed to initialize soft state");
14651 mutex_exit(&cpu_lock
);
14652 mutex_exit(&dtrace_provider_lock
);
14653 mutex_exit(&dtrace_lock
);
14654 return (DDI_FAILURE
);
14657 if (ddi_create_minor_node(devi
, DTRACEMNR_DTRACE
, S_IFCHR
,
14658 DTRACEMNRN_DTRACE
, DDI_PSEUDO
, NULL
) == DDI_FAILURE
||
14659 ddi_create_minor_node(devi
, DTRACEMNR_HELPER
, S_IFCHR
,
14660 DTRACEMNRN_HELPER
, DDI_PSEUDO
, NULL
) == DDI_FAILURE
) {
14661 cmn_err(CE_NOTE
, "/dev/dtrace couldn't create minor nodes");
14662 ddi_remove_minor_node(devi
, NULL
);
14663 ddi_soft_state_fini(&dtrace_softstate
);
14664 mutex_exit(&cpu_lock
);
14665 mutex_exit(&dtrace_provider_lock
);
14666 mutex_exit(&dtrace_lock
);
14667 return (DDI_FAILURE
);
14670 ddi_report_dev(devi
);
14671 dtrace_devi
= devi
;
14673 dtrace_modload
= dtrace_module_loaded
;
14674 dtrace_modunload
= dtrace_module_unloaded
;
14675 dtrace_cpu_init
= dtrace_cpu_setup_initial
;
14676 dtrace_helpers_cleanup
= dtrace_helpers_destroy
;
14677 dtrace_helpers_fork
= dtrace_helpers_duplicate
;
14678 dtrace_cpustart_init
= dtrace_suspend
;
14679 dtrace_cpustart_fini
= dtrace_resume
;
14680 dtrace_debugger_init
= dtrace_suspend
;
14681 dtrace_debugger_fini
= dtrace_resume
;
14683 register_cpu_setup_func((cpu_setup_func_t
*)dtrace_cpu_setup
, NULL
);
14685 ASSERT(MUTEX_HELD(&cpu_lock
));
14687 dtrace_arena
= vmem_create("dtrace", (void *)1, UINT32_MAX
, 1,
14688 NULL
, NULL
, NULL
, 0, VM_SLEEP
| VMC_IDENTIFIER
);
14689 dtrace_minor
= vmem_create("dtrace_minor", (void *)DTRACEMNRN_CLONE
,
14690 UINT32_MAX
- DTRACEMNRN_CLONE
, 1, NULL
, NULL
, NULL
, 0,
14691 VM_SLEEP
| VMC_IDENTIFIER
);
14692 dtrace_taskq
= taskq_create("dtrace_taskq", 1, maxclsyspri
,
14695 dtrace_state_cache
= kmem_cache_create("dtrace_state_cache",
14696 sizeof (dtrace_dstate_percpu_t
) * NCPU
, DTRACE_STATE_ALIGN
,
14697 NULL
, NULL
, NULL
, NULL
, NULL
, 0);
14699 ASSERT(MUTEX_HELD(&cpu_lock
));
14700 dtrace_bymod
= dtrace_hash_create(offsetof(dtrace_probe_t
, dtpr_mod
),
14701 offsetof(dtrace_probe_t
, dtpr_nextmod
),
14702 offsetof(dtrace_probe_t
, dtpr_prevmod
));
14704 dtrace_byfunc
= dtrace_hash_create(offsetof(dtrace_probe_t
, dtpr_func
),
14705 offsetof(dtrace_probe_t
, dtpr_nextfunc
),
14706 offsetof(dtrace_probe_t
, dtpr_prevfunc
));
14708 dtrace_byname
= dtrace_hash_create(offsetof(dtrace_probe_t
, dtpr_name
),
14709 offsetof(dtrace_probe_t
, dtpr_nextname
),
14710 offsetof(dtrace_probe_t
, dtpr_prevname
));
14712 if (dtrace_retain_max
< 1) {
14713 cmn_err(CE_WARN
, "illegal value (%lu) for dtrace_retain_max; "
14714 "setting to 1", dtrace_retain_max
);
14715 dtrace_retain_max
= 1;
14719 * Now discover our toxic ranges.
14721 dtrace_toxic_ranges(dtrace_toxrange_add
);
14724 * Before we register ourselves as a provider to our own framework,
14725 * we would like to assert that dtrace_provider is NULL -- but that's
14726 * not true if we were loaded as a dependency of a DTrace provider.
14727 * Once we've registered, we can assert that dtrace_provider is our
14730 (void) dtrace_register("dtrace", &dtrace_provider_attr
,
14731 DTRACE_PRIV_NONE
, 0, &dtrace_provider_ops
, NULL
, &id
);
14733 ASSERT(dtrace_provider
!= NULL
);
14734 ASSERT((dtrace_provider_id_t
)dtrace_provider
== id
);
14736 dtrace_probeid_begin
= dtrace_probe_create((dtrace_provider_id_t
)
14737 dtrace_provider
, NULL
, NULL
, "BEGIN", 0, NULL
);
14738 dtrace_probeid_end
= dtrace_probe_create((dtrace_provider_id_t
)
14739 dtrace_provider
, NULL
, NULL
, "END", 0, NULL
);
14740 dtrace_probeid_error
= dtrace_probe_create((dtrace_provider_id_t
)
14741 dtrace_provider
, NULL
, NULL
, "ERROR", 1, NULL
);
14743 dtrace_anon_property();
14744 mutex_exit(&cpu_lock
);
14747 * If DTrace helper tracing is enabled, we need to allocate the
14748 * trace buffer and initialize the values.
14750 if (dtrace_helptrace_enabled
) {
14751 ASSERT(dtrace_helptrace_buffer
== NULL
);
14752 dtrace_helptrace_buffer
=
14753 kmem_zalloc(dtrace_helptrace_bufsize
, KM_SLEEP
);
14754 dtrace_helptrace_next
= 0;
14758 * If there are already providers, we must ask them to provide their
14759 * probes, and then match any anonymous enabling against them. Note
14760 * that there should be no other retained enablings at this time:
14761 * the only retained enablings at this time should be the anonymous
14764 if (dtrace_anon
.dta_enabling
!= NULL
) {
14765 ASSERT(dtrace_retained
== dtrace_anon
.dta_enabling
);
14767 dtrace_enabling_provide(NULL
);
14768 state
= dtrace_anon
.dta_state
;
14771 * We couldn't hold cpu_lock across the above call to
14772 * dtrace_enabling_provide(), but we must hold it to actually
14773 * enable the probes. We have to drop all of our locks, pick
14774 * up cpu_lock, and regain our locks before matching the
14775 * retained anonymous enabling.
14777 mutex_exit(&dtrace_lock
);
14778 mutex_exit(&dtrace_provider_lock
);
14780 mutex_enter(&cpu_lock
);
14781 mutex_enter(&dtrace_provider_lock
);
14782 mutex_enter(&dtrace_lock
);
14784 if ((enab
= dtrace_anon
.dta_enabling
) != NULL
)
14785 (void) dtrace_enabling_match(enab
, NULL
);
14787 mutex_exit(&cpu_lock
);
14790 mutex_exit(&dtrace_lock
);
14791 mutex_exit(&dtrace_provider_lock
);
14793 if (state
!= NULL
) {
14795 * If we created any anonymous state, set it going now.
14797 (void) dtrace_state_go(state
, &dtrace_anon
.dta_beganon
);
14800 return (DDI_SUCCESS
);
14805 dtrace_open(dev_t
*devp
, int flag
, int otyp
, cred_t
*cred_p
)
14807 dtrace_state_t
*state
;
14812 if (getminor(*devp
) == DTRACEMNRN_HELPER
)
14816 * If this wasn't an open with the "helper" minor, then it must be
14817 * the "dtrace" minor.
14819 if (getminor(*devp
) != DTRACEMNRN_DTRACE
)
14823 * If no DTRACE_PRIV_* bits are set in the credential, then the
14824 * caller lacks sufficient permission to do anything with DTrace.
14826 dtrace_cred2priv(cred_p
, &priv
, &uid
, &zoneid
);
14827 if (priv
== DTRACE_PRIV_NONE
)
14831 * Ask all providers to provide all their probes.
14833 mutex_enter(&dtrace_provider_lock
);
14834 dtrace_probe_provide(NULL
, NULL
);
14835 mutex_exit(&dtrace_provider_lock
);
14837 mutex_enter(&cpu_lock
);
14838 mutex_enter(&dtrace_lock
);
14840 dtrace_membar_producer();
14843 * If the kernel debugger is active (that is, if the kernel debugger
14844 * modified text in some way), we won't allow the open.
14846 if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE
) != 0) {
14848 mutex_exit(&cpu_lock
);
14849 mutex_exit(&dtrace_lock
);
14853 state
= dtrace_state_create(devp
, cred_p
);
14854 mutex_exit(&cpu_lock
);
14856 if (state
== NULL
) {
14857 if (--dtrace_opens
== 0 && dtrace_anon
.dta_enabling
== NULL
)
14858 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE
);
14859 mutex_exit(&dtrace_lock
);
14863 mutex_exit(&dtrace_lock
);
14870 dtrace_close(dev_t dev
, int flag
, int otyp
, cred_t
*cred_p
)
14872 minor_t minor
= getminor(dev
);
14873 dtrace_state_t
*state
;
14875 if (minor
== DTRACEMNRN_HELPER
)
14878 state
= ddi_get_soft_state(dtrace_softstate
, minor
);
14880 mutex_enter(&cpu_lock
);
14881 mutex_enter(&dtrace_lock
);
14883 if (state
->dts_anon
) {
14885 * There is anonymous state. Destroy that first.
14887 ASSERT(dtrace_anon
.dta_state
== NULL
);
14888 dtrace_state_destroy(state
->dts_anon
);
14891 dtrace_state_destroy(state
);
14892 ASSERT(dtrace_opens
> 0);
14895 * Only relinquish control of the kernel debugger interface when there
14896 * are no consumers and no anonymous enablings.
14898 if (--dtrace_opens
== 0 && dtrace_anon
.dta_enabling
== NULL
)
14899 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE
);
14901 mutex_exit(&dtrace_lock
);
14902 mutex_exit(&cpu_lock
);
14909 dtrace_ioctl_helper(int cmd
, intptr_t arg
, int *rv
)
14912 dof_helper_t help
, *dhp
= NULL
;
14915 case DTRACEHIOC_ADDDOF
:
14916 if (copyin((void *)arg
, &help
, sizeof (help
)) != 0) {
14917 dtrace_dof_error(NULL
, "failed to copyin DOF helper");
14922 arg
= (intptr_t)help
.dofhp_dof
;
14925 case DTRACEHIOC_ADD
: {
14926 dof_hdr_t
*dof
= dtrace_dof_copyin(arg
, &rval
);
14931 mutex_enter(&dtrace_lock
);
14934 * dtrace_helper_slurp() takes responsibility for the dof --
14935 * it may free it now or it may save it and free it later.
14937 if ((rval
= dtrace_helper_slurp(dof
, dhp
)) != -1) {
14944 mutex_exit(&dtrace_lock
);
14948 case DTRACEHIOC_REMOVE
: {
14949 mutex_enter(&dtrace_lock
);
14950 rval
= dtrace_helper_destroygen(arg
);
14951 mutex_exit(&dtrace_lock
);
14965 dtrace_ioctl(dev_t dev
, int cmd
, intptr_t arg
, int md
, cred_t
*cr
, int *rv
)
14967 minor_t minor
= getminor(dev
);
14968 dtrace_state_t
*state
;
14971 if (minor
== DTRACEMNRN_HELPER
)
14972 return (dtrace_ioctl_helper(cmd
, arg
, rv
));
14974 state
= ddi_get_soft_state(dtrace_softstate
, minor
);
14976 if (state
->dts_anon
) {
14977 ASSERT(dtrace_anon
.dta_state
== NULL
);
14978 state
= state
->dts_anon
;
14982 case DTRACEIOC_PROVIDER
: {
14983 dtrace_providerdesc_t pvd
;
14984 dtrace_provider_t
*pvp
;
14986 if (copyin((void *)arg
, &pvd
, sizeof (pvd
)) != 0)
14989 pvd
.dtvd_name
[DTRACE_PROVNAMELEN
- 1] = '\0';
14990 mutex_enter(&dtrace_provider_lock
);
14992 for (pvp
= dtrace_provider
; pvp
!= NULL
; pvp
= pvp
->dtpv_next
) {
14993 if (strcmp(pvp
->dtpv_name
, pvd
.dtvd_name
) == 0)
14997 mutex_exit(&dtrace_provider_lock
);
15002 bcopy(&pvp
->dtpv_priv
, &pvd
.dtvd_priv
, sizeof (dtrace_ppriv_t
));
15003 bcopy(&pvp
->dtpv_attr
, &pvd
.dtvd_attr
, sizeof (dtrace_pattr_t
));
15004 if (copyout(&pvd
, (void *)arg
, sizeof (pvd
)) != 0)
15010 case DTRACEIOC_EPROBE
: {
15011 dtrace_eprobedesc_t epdesc
;
15013 dtrace_action_t
*act
;
15019 if (copyin((void *)arg
, &epdesc
, sizeof (epdesc
)) != 0)
15022 mutex_enter(&dtrace_lock
);
15024 if ((ecb
= dtrace_epid2ecb(state
, epdesc
.dtepd_epid
)) == NULL
) {
15025 mutex_exit(&dtrace_lock
);
15029 if (ecb
->dte_probe
== NULL
) {
15030 mutex_exit(&dtrace_lock
);
15034 epdesc
.dtepd_probeid
= ecb
->dte_probe
->dtpr_id
;
15035 epdesc
.dtepd_uarg
= ecb
->dte_uarg
;
15036 epdesc
.dtepd_size
= ecb
->dte_size
;
15038 nrecs
= epdesc
.dtepd_nrecs
;
15039 epdesc
.dtepd_nrecs
= 0;
15040 for (act
= ecb
->dte_action
; act
!= NULL
; act
= act
->dta_next
) {
15041 if (DTRACEACT_ISAGG(act
->dta_kind
) || act
->dta_intuple
)
15044 epdesc
.dtepd_nrecs
++;
15048 * Now that we have the size, we need to allocate a temporary
15049 * buffer in which to store the complete description. We need
15050 * the temporary buffer to be able to drop dtrace_lock()
15051 * across the copyout(), below.
15053 size
= sizeof (dtrace_eprobedesc_t
) +
15054 (epdesc
.dtepd_nrecs
* sizeof (dtrace_recdesc_t
));
15056 buf
= kmem_alloc(size
, KM_SLEEP
);
15057 dest
= (uintptr_t)buf
;
15059 bcopy(&epdesc
, (void *)dest
, sizeof (epdesc
));
15060 dest
+= offsetof(dtrace_eprobedesc_t
, dtepd_rec
[0]);
15062 for (act
= ecb
->dte_action
; act
!= NULL
; act
= act
->dta_next
) {
15063 if (DTRACEACT_ISAGG(act
->dta_kind
) || act
->dta_intuple
)
15069 bcopy(&act
->dta_rec
, (void *)dest
,
15070 sizeof (dtrace_recdesc_t
));
15071 dest
+= sizeof (dtrace_recdesc_t
);
15074 mutex_exit(&dtrace_lock
);
15076 if (copyout(buf
, (void *)arg
, dest
- (uintptr_t)buf
) != 0) {
15077 kmem_free(buf
, size
);
15081 kmem_free(buf
, size
);
15085 case DTRACEIOC_AGGDESC
: {
15086 dtrace_aggdesc_t aggdesc
;
15087 dtrace_action_t
*act
;
15088 dtrace_aggregation_t
*agg
;
15091 dtrace_recdesc_t
*lrec
;
15096 if (copyin((void *)arg
, &aggdesc
, sizeof (aggdesc
)) != 0)
15099 mutex_enter(&dtrace_lock
);
15101 if ((agg
= dtrace_aggid2agg(state
, aggdesc
.dtagd_id
)) == NULL
) {
15102 mutex_exit(&dtrace_lock
);
15106 aggdesc
.dtagd_epid
= agg
->dtag_ecb
->dte_epid
;
15108 nrecs
= aggdesc
.dtagd_nrecs
;
15109 aggdesc
.dtagd_nrecs
= 0;
15111 offs
= agg
->dtag_base
;
15112 lrec
= &agg
->dtag_action
.dta_rec
;
15113 aggdesc
.dtagd_size
= lrec
->dtrd_offset
+ lrec
->dtrd_size
- offs
;
15115 for (act
= agg
->dtag_first
; ; act
= act
->dta_next
) {
15116 ASSERT(act
->dta_intuple
||
15117 DTRACEACT_ISAGG(act
->dta_kind
));
15120 * If this action has a record size of zero, it
15121 * denotes an argument to the aggregating action.
15122 * Because the presence of this record doesn't (or
15123 * shouldn't) affect the way the data is interpreted,
15124 * we don't copy it out to save user-level the
15125 * confusion of dealing with a zero-length record.
15127 if (act
->dta_rec
.dtrd_size
== 0) {
15128 ASSERT(agg
->dtag_hasarg
);
15132 aggdesc
.dtagd_nrecs
++;
15134 if (act
== &agg
->dtag_action
)
15139 * Now that we have the size, we need to allocate a temporary
15140 * buffer in which to store the complete description. We need
15141 * the temporary buffer to be able to drop dtrace_lock()
15142 * across the copyout(), below.
15144 size
= sizeof (dtrace_aggdesc_t
) +
15145 (aggdesc
.dtagd_nrecs
* sizeof (dtrace_recdesc_t
));
15147 buf
= kmem_alloc(size
, KM_SLEEP
);
15148 dest
= (uintptr_t)buf
;
15150 bcopy(&aggdesc
, (void *)dest
, sizeof (aggdesc
));
15151 dest
+= offsetof(dtrace_aggdesc_t
, dtagd_rec
[0]);
15153 for (act
= agg
->dtag_first
; ; act
= act
->dta_next
) {
15154 dtrace_recdesc_t rec
= act
->dta_rec
;
15157 * See the comment in the above loop for why we pass
15158 * over zero-length records.
15160 if (rec
.dtrd_size
== 0) {
15161 ASSERT(agg
->dtag_hasarg
);
15168 rec
.dtrd_offset
-= offs
;
15169 bcopy(&rec
, (void *)dest
, sizeof (rec
));
15170 dest
+= sizeof (dtrace_recdesc_t
);
15172 if (act
== &agg
->dtag_action
)
15176 mutex_exit(&dtrace_lock
);
15178 if (copyout(buf
, (void *)arg
, dest
- (uintptr_t)buf
) != 0) {
15179 kmem_free(buf
, size
);
15183 kmem_free(buf
, size
);
15187 case DTRACEIOC_ENABLE
: {
15189 dtrace_enabling_t
*enab
= NULL
;
15190 dtrace_vstate_t
*vstate
;
15196 * If a NULL argument has been passed, we take this as our
15197 * cue to reevaluate our enablings.
15200 dtrace_enabling_matchall();
15205 if ((dof
= dtrace_dof_copyin(arg
, &rval
)) == NULL
)
15208 mutex_enter(&cpu_lock
);
15209 mutex_enter(&dtrace_lock
);
15210 vstate
= &state
->dts_vstate
;
15212 if (state
->dts_activity
!= DTRACE_ACTIVITY_INACTIVE
) {
15213 mutex_exit(&dtrace_lock
);
15214 mutex_exit(&cpu_lock
);
15215 dtrace_dof_destroy(dof
);
15219 if (dtrace_dof_slurp(dof
, vstate
, cr
, &enab
, 0, B_TRUE
) != 0) {
15220 mutex_exit(&dtrace_lock
);
15221 mutex_exit(&cpu_lock
);
15222 dtrace_dof_destroy(dof
);
15226 if ((rval
= dtrace_dof_options(dof
, state
)) != 0) {
15227 dtrace_enabling_destroy(enab
);
15228 mutex_exit(&dtrace_lock
);
15229 mutex_exit(&cpu_lock
);
15230 dtrace_dof_destroy(dof
);
15234 if ((err
= dtrace_enabling_match(enab
, rv
)) == 0) {
15235 err
= dtrace_enabling_retain(enab
);
15237 dtrace_enabling_destroy(enab
);
15240 mutex_exit(&cpu_lock
);
15241 mutex_exit(&dtrace_lock
);
15242 dtrace_dof_destroy(dof
);
15247 case DTRACEIOC_REPLICATE
: {
15248 dtrace_repldesc_t desc
;
15249 dtrace_probedesc_t
*match
= &desc
.dtrpd_match
;
15250 dtrace_probedesc_t
*create
= &desc
.dtrpd_create
;
15253 if (copyin((void *)arg
, &desc
, sizeof (desc
)) != 0)
15256 match
->dtpd_provider
[DTRACE_PROVNAMELEN
- 1] = '\0';
15257 match
->dtpd_mod
[DTRACE_MODNAMELEN
- 1] = '\0';
15258 match
->dtpd_func
[DTRACE_FUNCNAMELEN
- 1] = '\0';
15259 match
->dtpd_name
[DTRACE_NAMELEN
- 1] = '\0';
15261 create
->dtpd_provider
[DTRACE_PROVNAMELEN
- 1] = '\0';
15262 create
->dtpd_mod
[DTRACE_MODNAMELEN
- 1] = '\0';
15263 create
->dtpd_func
[DTRACE_FUNCNAMELEN
- 1] = '\0';
15264 create
->dtpd_name
[DTRACE_NAMELEN
- 1] = '\0';
15266 mutex_enter(&dtrace_lock
);
15267 err
= dtrace_enabling_replicate(state
, match
, create
);
15268 mutex_exit(&dtrace_lock
);
15273 case DTRACEIOC_PROBEMATCH
:
15274 case DTRACEIOC_PROBES
: {
15275 dtrace_probe_t
*probe
= NULL
;
15276 dtrace_probedesc_t desc
;
15277 dtrace_probekey_t pkey
;
15284 if (copyin((void *)arg
, &desc
, sizeof (desc
)) != 0)
15287 desc
.dtpd_provider
[DTRACE_PROVNAMELEN
- 1] = '\0';
15288 desc
.dtpd_mod
[DTRACE_MODNAMELEN
- 1] = '\0';
15289 desc
.dtpd_func
[DTRACE_FUNCNAMELEN
- 1] = '\0';
15290 desc
.dtpd_name
[DTRACE_NAMELEN
- 1] = '\0';
15293 * Before we attempt to match this probe, we want to give
15294 * all providers the opportunity to provide it.
15296 if (desc
.dtpd_id
== DTRACE_IDNONE
) {
15297 mutex_enter(&dtrace_provider_lock
);
15298 dtrace_probe_provide(&desc
, NULL
);
15299 mutex_exit(&dtrace_provider_lock
);
15303 if (cmd
== DTRACEIOC_PROBEMATCH
) {
15304 dtrace_probekey(&desc
, &pkey
);
15305 pkey
.dtpk_id
= DTRACE_IDNONE
;
15308 dtrace_cred2priv(cr
, &priv
, &uid
, &zoneid
);
15310 mutex_enter(&dtrace_lock
);
15312 if (cmd
== DTRACEIOC_PROBEMATCH
) {
15313 for (i
= desc
.dtpd_id
; i
<= dtrace_nprobes
; i
++) {
15314 if ((probe
= dtrace_probes
[i
- 1]) != NULL
&&
15315 (m
= dtrace_match_probe(probe
, &pkey
,
15316 priv
, uid
, zoneid
)) != 0)
15321 mutex_exit(&dtrace_lock
);
15326 for (i
= desc
.dtpd_id
; i
<= dtrace_nprobes
; i
++) {
15327 if ((probe
= dtrace_probes
[i
- 1]) != NULL
&&
15328 dtrace_match_priv(probe
, priv
, uid
, zoneid
))
15333 if (probe
== NULL
) {
15334 mutex_exit(&dtrace_lock
);
15338 dtrace_probe_description(probe
, &desc
);
15339 mutex_exit(&dtrace_lock
);
15341 if (copyout(&desc
, (void *)arg
, sizeof (desc
)) != 0)
15347 case DTRACEIOC_PROBEARG
: {
15348 dtrace_argdesc_t desc
;
15349 dtrace_probe_t
*probe
;
15350 dtrace_provider_t
*prov
;
15352 if (copyin((void *)arg
, &desc
, sizeof (desc
)) != 0)
15355 if (desc
.dtargd_id
== DTRACE_IDNONE
)
15358 if (desc
.dtargd_ndx
== DTRACE_ARGNONE
)
15361 mutex_enter(&dtrace_provider_lock
);
15362 mutex_enter(&mod_lock
);
15363 mutex_enter(&dtrace_lock
);
15365 if (desc
.dtargd_id
> dtrace_nprobes
) {
15366 mutex_exit(&dtrace_lock
);
15367 mutex_exit(&mod_lock
);
15368 mutex_exit(&dtrace_provider_lock
);
15372 if ((probe
= dtrace_probes
[desc
.dtargd_id
- 1]) == NULL
) {
15373 mutex_exit(&dtrace_lock
);
15374 mutex_exit(&mod_lock
);
15375 mutex_exit(&dtrace_provider_lock
);
15379 mutex_exit(&dtrace_lock
);
15381 prov
= probe
->dtpr_provider
;
15383 if (prov
->dtpv_pops
.dtps_getargdesc
== NULL
) {
15385 * There isn't any typed information for this probe.
15386 * Set the argument number to DTRACE_ARGNONE.
15388 desc
.dtargd_ndx
= DTRACE_ARGNONE
;
15390 desc
.dtargd_native
[0] = '\0';
15391 desc
.dtargd_xlate
[0] = '\0';
15392 desc
.dtargd_mapping
= desc
.dtargd_ndx
;
15394 prov
->dtpv_pops
.dtps_getargdesc(prov
->dtpv_arg
,
15395 probe
->dtpr_id
, probe
->dtpr_arg
, &desc
);
15398 mutex_exit(&mod_lock
);
15399 mutex_exit(&dtrace_provider_lock
);
15401 if (copyout(&desc
, (void *)arg
, sizeof (desc
)) != 0)
15407 case DTRACEIOC_GO
: {
15408 processorid_t cpuid
;
15409 rval
= dtrace_state_go(state
, &cpuid
);
15414 if (copyout(&cpuid
, (void *)arg
, sizeof (cpuid
)) != 0)
15420 case DTRACEIOC_STOP
: {
15421 processorid_t cpuid
;
15423 mutex_enter(&dtrace_lock
);
15424 rval
= dtrace_state_stop(state
, &cpuid
);
15425 mutex_exit(&dtrace_lock
);
15430 if (copyout(&cpuid
, (void *)arg
, sizeof (cpuid
)) != 0)
15436 case DTRACEIOC_DOFGET
: {
15437 dof_hdr_t hdr
, *dof
;
15440 if (copyin((void *)arg
, &hdr
, sizeof (hdr
)) != 0)
15443 mutex_enter(&dtrace_lock
);
15444 dof
= dtrace_dof_create(state
);
15445 mutex_exit(&dtrace_lock
);
15447 len
= MIN(hdr
.dofh_loadsz
, dof
->dofh_loadsz
);
15448 rval
= copyout(dof
, (void *)arg
, len
);
15449 dtrace_dof_destroy(dof
);
15451 return (rval
== 0 ? 0 : EFAULT
);
15454 case DTRACEIOC_AGGSNAP
:
15455 case DTRACEIOC_BUFSNAP
: {
15456 dtrace_bufdesc_t desc
;
15458 dtrace_buffer_t
*buf
;
15460 if (copyin((void *)arg
, &desc
, sizeof (desc
)) != 0)
15463 if (desc
.dtbd_cpu
< 0 || desc
.dtbd_cpu
>= NCPU
)
15466 mutex_enter(&dtrace_lock
);
15468 if (cmd
== DTRACEIOC_BUFSNAP
) {
15469 buf
= &state
->dts_buffer
[desc
.dtbd_cpu
];
15471 buf
= &state
->dts_aggbuffer
[desc
.dtbd_cpu
];
15474 if (buf
->dtb_flags
& (DTRACEBUF_RING
| DTRACEBUF_FILL
)) {
15475 size_t sz
= buf
->dtb_offset
;
15477 if (state
->dts_activity
!= DTRACE_ACTIVITY_STOPPED
) {
15478 mutex_exit(&dtrace_lock
);
15483 * If this buffer has already been consumed, we're
15484 * going to indicate that there's nothing left here
15487 if (buf
->dtb_flags
& DTRACEBUF_CONSUMED
) {
15488 mutex_exit(&dtrace_lock
);
15490 desc
.dtbd_size
= 0;
15491 desc
.dtbd_drops
= 0;
15492 desc
.dtbd_errors
= 0;
15493 desc
.dtbd_oldest
= 0;
15494 sz
= sizeof (desc
);
15496 if (copyout(&desc
, (void *)arg
, sz
) != 0)
15503 * If this is a ring buffer that has wrapped, we want
15504 * to copy the whole thing out.
15506 if (buf
->dtb_flags
& DTRACEBUF_WRAPPED
) {
15507 dtrace_buffer_polish(buf
);
15508 sz
= buf
->dtb_size
;
15511 if (copyout(buf
->dtb_tomax
, desc
.dtbd_data
, sz
) != 0) {
15512 mutex_exit(&dtrace_lock
);
15516 desc
.dtbd_size
= sz
;
15517 desc
.dtbd_drops
= buf
->dtb_drops
;
15518 desc
.dtbd_errors
= buf
->dtb_errors
;
15519 desc
.dtbd_oldest
= buf
->dtb_xamot_offset
;
15521 mutex_exit(&dtrace_lock
);
15523 if (copyout(&desc
, (void *)arg
, sizeof (desc
)) != 0)
15526 buf
->dtb_flags
|= DTRACEBUF_CONSUMED
;
15531 if (buf
->dtb_tomax
== NULL
) {
15532 ASSERT(buf
->dtb_xamot
== NULL
);
15533 mutex_exit(&dtrace_lock
);
15537 cached
= buf
->dtb_tomax
;
15538 ASSERT(!(buf
->dtb_flags
& DTRACEBUF_NOSWITCH
));
15540 dtrace_xcall(desc
.dtbd_cpu
,
15541 (dtrace_xcall_t
)dtrace_buffer_switch
, buf
);
15543 state
->dts_errors
+= buf
->dtb_xamot_errors
;
15546 * If the buffers did not actually switch, then the cross call
15547 * did not take place -- presumably because the given CPU is
15548 * not in the ready set. If this is the case, we'll return
15551 if (buf
->dtb_tomax
== cached
) {
15552 ASSERT(buf
->dtb_xamot
!= cached
);
15553 mutex_exit(&dtrace_lock
);
15557 ASSERT(cached
== buf
->dtb_xamot
);
15560 * We have our snapshot; now copy it out.
15562 if (copyout(buf
->dtb_xamot
, desc
.dtbd_data
,
15563 buf
->dtb_xamot_offset
) != 0) {
15564 mutex_exit(&dtrace_lock
);
15568 desc
.dtbd_size
= buf
->dtb_xamot_offset
;
15569 desc
.dtbd_drops
= buf
->dtb_xamot_drops
;
15570 desc
.dtbd_errors
= buf
->dtb_xamot_errors
;
15571 desc
.dtbd_oldest
= 0;
15573 mutex_exit(&dtrace_lock
);
15576 * Finally, copy out the buffer description.
15578 if (copyout(&desc
, (void *)arg
, sizeof (desc
)) != 0)
15584 case DTRACEIOC_CONF
: {
15585 dtrace_conf_t conf
;
15587 bzero(&conf
, sizeof (conf
));
15588 conf
.dtc_difversion
= DIF_VERSION
;
15589 conf
.dtc_difintregs
= DIF_DIR_NREGS
;
15590 conf
.dtc_diftupregs
= DIF_DTR_NREGS
;
15591 conf
.dtc_ctfmodel
= CTF_MODEL_NATIVE
;
15593 if (copyout(&conf
, (void *)arg
, sizeof (conf
)) != 0)
15599 case DTRACEIOC_STATUS
: {
15600 dtrace_status_t stat
;
15601 dtrace_dstate_t
*dstate
;
15606 * See the comment in dtrace_state_deadman() for the reason
15607 * for setting dts_laststatus to INT64_MAX before setting
15608 * it to the correct value.
15610 state
->dts_laststatus
= INT64_MAX
;
15611 dtrace_membar_producer();
15612 state
->dts_laststatus
= dtrace_gethrtime();
15614 bzero(&stat
, sizeof (stat
));
15616 mutex_enter(&dtrace_lock
);
15618 if (state
->dts_activity
== DTRACE_ACTIVITY_INACTIVE
) {
15619 mutex_exit(&dtrace_lock
);
15623 if (state
->dts_activity
== DTRACE_ACTIVITY_DRAINING
)
15624 stat
.dtst_exiting
= 1;
15626 nerrs
= state
->dts_errors
;
15627 dstate
= &state
->dts_vstate
.dtvs_dynvars
;
15629 for (i
= 0; i
< NCPU
; i
++) {
15630 dtrace_dstate_percpu_t
*dcpu
= &dstate
->dtds_percpu
[i
];
15632 stat
.dtst_dyndrops
+= dcpu
->dtdsc_drops
;
15633 stat
.dtst_dyndrops_dirty
+= dcpu
->dtdsc_dirty_drops
;
15634 stat
.dtst_dyndrops_rinsing
+= dcpu
->dtdsc_rinsing_drops
;
15636 if (state
->dts_buffer
[i
].dtb_flags
& DTRACEBUF_FULL
)
15637 stat
.dtst_filled
++;
15639 nerrs
+= state
->dts_buffer
[i
].dtb_errors
;
15641 for (j
= 0; j
< state
->dts_nspeculations
; j
++) {
15642 dtrace_speculation_t
*spec
;
15643 dtrace_buffer_t
*buf
;
15645 spec
= &state
->dts_speculations
[j
];
15646 buf
= &spec
->dtsp_buffer
[i
];
15647 stat
.dtst_specdrops
+= buf
->dtb_xamot_drops
;
15651 stat
.dtst_specdrops_busy
= state
->dts_speculations_busy
;
15652 stat
.dtst_specdrops_unavail
= state
->dts_speculations_unavail
;
15653 stat
.dtst_stkstroverflows
= state
->dts_stkstroverflows
;
15654 stat
.dtst_dblerrors
= state
->dts_dblerrors
;
15656 (state
->dts_activity
== DTRACE_ACTIVITY_KILLED
);
15657 stat
.dtst_errors
= nerrs
;
15659 mutex_exit(&dtrace_lock
);
15661 if (copyout(&stat
, (void *)arg
, sizeof (stat
)) != 0)
15667 case DTRACEIOC_FORMAT
: {
15668 dtrace_fmtdesc_t fmt
;
15672 if (copyin((void *)arg
, &fmt
, sizeof (fmt
)) != 0)
15675 mutex_enter(&dtrace_lock
);
15677 if (fmt
.dtfd_format
== 0 ||
15678 fmt
.dtfd_format
> state
->dts_nformats
) {
15679 mutex_exit(&dtrace_lock
);
15684 * Format strings are allocated contiguously and they are
15685 * never freed; if a format index is less than the number
15686 * of formats, we can assert that the format map is non-NULL
15687 * and that the format for the specified index is non-NULL.
15689 ASSERT(state
->dts_formats
!= NULL
);
15690 str
= state
->dts_formats
[fmt
.dtfd_format
- 1];
15691 ASSERT(str
!= NULL
);
15693 len
= strlen(str
) + 1;
15695 if (len
> fmt
.dtfd_length
) {
15696 fmt
.dtfd_length
= len
;
15698 if (copyout(&fmt
, (void *)arg
, sizeof (fmt
)) != 0) {
15699 mutex_exit(&dtrace_lock
);
15703 if (copyout(str
, fmt
.dtfd_string
, len
) != 0) {
15704 mutex_exit(&dtrace_lock
);
15709 mutex_exit(&dtrace_lock
);
15722 dtrace_detach(dev_info_t
*dip
, ddi_detach_cmd_t cmd
)
15724 dtrace_state_t
*state
;
15731 return (DDI_SUCCESS
);
15734 return (DDI_FAILURE
);
15737 mutex_enter(&cpu_lock
);
15738 mutex_enter(&dtrace_provider_lock
);
15739 mutex_enter(&dtrace_lock
);
15741 ASSERT(dtrace_opens
== 0);
15743 if (dtrace_helpers
> 0) {
15744 mutex_exit(&dtrace_provider_lock
);
15745 mutex_exit(&dtrace_lock
);
15746 mutex_exit(&cpu_lock
);
15747 return (DDI_FAILURE
);
15750 if (dtrace_unregister((dtrace_provider_id_t
)dtrace_provider
) != 0) {
15751 mutex_exit(&dtrace_provider_lock
);
15752 mutex_exit(&dtrace_lock
);
15753 mutex_exit(&cpu_lock
);
15754 return (DDI_FAILURE
);
15757 dtrace_provider
= NULL
;
15759 if ((state
= dtrace_anon_grab()) != NULL
) {
15761 * If there were ECBs on this state, the provider should
15762 * have not been allowed to detach; assert that there is
15765 ASSERT(state
->dts_necbs
== 0);
15766 dtrace_state_destroy(state
);
15769 * If we're being detached with anonymous state, we need to
15770 * indicate to the kernel debugger that DTrace is now inactive.
15772 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE
);
15775 bzero(&dtrace_anon
, sizeof (dtrace_anon_t
));
15776 unregister_cpu_setup_func((cpu_setup_func_t
*)dtrace_cpu_setup
, NULL
);
15777 dtrace_cpu_init
= NULL
;
15778 dtrace_helpers_cleanup
= NULL
;
15779 dtrace_helpers_fork
= NULL
;
15780 dtrace_cpustart_init
= NULL
;
15781 dtrace_cpustart_fini
= NULL
;
15782 dtrace_debugger_init
= NULL
;
15783 dtrace_debugger_fini
= NULL
;
15784 dtrace_modload
= NULL
;
15785 dtrace_modunload
= NULL
;
15787 mutex_exit(&cpu_lock
);
15789 if (dtrace_helptrace_enabled
) {
15790 kmem_free(dtrace_helptrace_buffer
, dtrace_helptrace_bufsize
);
15791 dtrace_helptrace_buffer
= NULL
;
15794 kmem_free(dtrace_probes
, dtrace_nprobes
* sizeof (dtrace_probe_t
*));
15795 dtrace_probes
= NULL
;
15796 dtrace_nprobes
= 0;
15798 dtrace_hash_destroy(dtrace_bymod
);
15799 dtrace_hash_destroy(dtrace_byfunc
);
15800 dtrace_hash_destroy(dtrace_byname
);
15801 dtrace_bymod
= NULL
;
15802 dtrace_byfunc
= NULL
;
15803 dtrace_byname
= NULL
;
15805 kmem_cache_destroy(dtrace_state_cache
);
15806 vmem_destroy(dtrace_minor
);
15807 vmem_destroy(dtrace_arena
);
15809 if (dtrace_toxrange
!= NULL
) {
15810 kmem_free(dtrace_toxrange
,
15811 dtrace_toxranges_max
* sizeof (dtrace_toxrange_t
));
15812 dtrace_toxrange
= NULL
;
15813 dtrace_toxranges
= 0;
15814 dtrace_toxranges_max
= 0;
15817 ddi_remove_minor_node(dtrace_devi
, NULL
);
15818 dtrace_devi
= NULL
;
15820 ddi_soft_state_fini(&dtrace_softstate
);
15822 ASSERT(dtrace_vtime_references
== 0);
15823 ASSERT(dtrace_opens
== 0);
15824 ASSERT(dtrace_retained
== NULL
);
15826 mutex_exit(&dtrace_lock
);
15827 mutex_exit(&dtrace_provider_lock
);
15830 * We don't destroy the task queue until after we have dropped our
15831 * locks (taskq_destroy() may block on running tasks). To prevent
15832 * attempting to do work after we have effectively detached but before
15833 * the task queue has been destroyed, all tasks dispatched via the
15834 * task queue must check that DTrace is still attached before
15835 * performing any operation.
15837 taskq_destroy(dtrace_taskq
);
15838 dtrace_taskq
= NULL
;
15840 return (DDI_SUCCESS
);
15845 dtrace_info(dev_info_t
*dip
, ddi_info_cmd_t infocmd
, void *arg
, void **result
)
15850 case DDI_INFO_DEVT2DEVINFO
:
15851 *result
= (void *)dtrace_devi
;
15852 error
= DDI_SUCCESS
;
15854 case DDI_INFO_DEVT2INSTANCE
:
15855 *result
= (void *)0;
15856 error
= DDI_SUCCESS
;
15859 error
= DDI_FAILURE
;
15864 static struct cb_ops dtrace_cb_ops
= {
15865 dtrace_open
, /* open */
15866 dtrace_close
, /* close */
15867 nulldev
, /* strategy */
15868 nulldev
, /* print */
15872 dtrace_ioctl
, /* ioctl */
15873 nodev
, /* devmap */
15875 nodev
, /* segmap */
15876 nochpoll
, /* poll */
15877 ddi_prop_op
, /* cb_prop_op */
15879 D_NEW
| D_MP
/* Driver compatibility flag */
15882 static struct dev_ops dtrace_ops
= {
15883 DEVO_REV
, /* devo_rev */
15885 dtrace_info
, /* get_dev_info */
15886 nulldev
, /* identify */
15887 nulldev
, /* probe */
15888 dtrace_attach
, /* attach */
15889 dtrace_detach
, /* detach */
15891 &dtrace_cb_ops
, /* driver operations */
15892 NULL
, /* bus operations */
15893 nodev
, /* dev power */
15894 ddi_quiesce_not_needed
, /* quiesce */
15897 static struct modldrv modldrv
= {
15898 &mod_driverops
, /* module type (this is a pseudo driver) */
15899 "Dynamic Tracing", /* name of module */
15900 &dtrace_ops
, /* driver ops */
15903 static struct modlinkage modlinkage
= {
15912 return (mod_install(&modlinkage
));
15916 _info(struct modinfo
*modinfop
)
15918 return (mod_info(&modlinkage
, modinfop
));
15924 return (mod_remove(&modlinkage
));