4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2008-2009, Intel Corporation.
23 * All Rights Reserved.
29 #
pragma D option aggsize
=8m
30 #
pragma D option bufsize
=16m
31 #
pragma D option dynvarsize
=16m
32 #
pragma D option aggrate
=0
33 #
pragma D option stackframes
=MAX_STACK
35 * Our D script needs to compile even if some of the TRANSLATE probes cannot
36 * be found. Missing probes can be caused by older kernel, different
37 * architecture, unloaded modules etc.
39 #
pragma D option zdefs
41 #
if defined(ENABLE_SCHED
)
42 #
if defined(TRACE_PID
)
43 #define TRACE_FILTER
/ pid
== 0 || pid
== TRACE_PID
/
44 #define
TRACE_FILTER_COND(a
) / (pid
== 0 || pid
== TRACE_PID
) && (a
) /
45 #elif
defined(TRACE_PGID
)
46 #define TRACE_FILTER
/ pid
== 0 || curpsinfo
->pr_pgid
== TRACE_PGID
/
47 #define
TRACE_FILTER_COND(a
)
48 / (pid
== 0 || curpsinfo
->pr_pgid
== TRACE_PGID
) && (a
) /
51 #define
TRACE_FILTER_COND(a
) / (a
) /
53 #
else /* ENABLE_SCHED */
54 #
if defined(TRACE_PID
)
55 #define TRACE_FILTER
/ pid
== TRACE_PID
/
56 #define
TRACE_FILTER_COND(a
) / (pid
== TRACE_PID
) && (a
) /
57 #elif
defined(TRACE_PGID
)
58 #define TRACE_FILTER
/ curpsinfo
->pr_pgid
== TRACE_PGID
/
59 #define
TRACE_FILTER_COND(a
) / (curpsinfo
->pr_pgid
== TRACE_PGID
) && (a
) /
61 #define TRACE_FILTER
/ pid
!= 0 /
62 #define
TRACE_FILTER_COND(a
) / (pid
!= 0) && (a
) /
64 #endif
/* ENABLE_SCHED */
66 /* Threshold to filter WAKEABLE latencies. */
67 #define FILTER_THRESHOLD
5000000
72 * This array is used to store timestamp of when threads are enqueued
74 * self-> is not accessible when enqueue happens.
76 unsigned
long long lt_timestamps
[int, int];
78 self unsigned
int lt_is_block_wakeable
;
79 self unsigned
long long lt_sleep_start
;
80 self unsigned
long long lt_sleep_duration
;
81 self unsigned
long long lt_sch_delay
;
82 self unsigned
int lt_counter
; /* only used in low overhead */
83 self unsigned
long long lt_timestamp
; /* only used in low overhead */
84 self unsigned
int lt_stackp
;
85 self unsigned
int lt_prio
[int];
86 self string lt_cause
[int];
88 this unsigned
int priority
;
92 * Clean up everything, otherwise we will run out of memory.
96 lt_timestamps
[curpsinfo
->pr_pid
, curlwpsinfo
->pr_lwpid
] = 0;
98 self
->lt_sleep_start
= 0;
99 self
->lt_is_block_wakeable
= 0;
100 self
->lt_counter
= 0;
101 self
->lt_timestamp
= 0;
104 * Workaround: no way to clear associative array.
105 * We have to manually clear 0 ~ (MAX_TAG-1).
108 self
->lt_prio
[0] = 0;
109 self
->lt_prio
[1] = 0;
110 self
->lt_prio
[2] = 0;
111 self
->lt_prio
[3] = 0;
112 self
->lt_prio
[4] = 0;
113 self
->lt_prio
[5] = 0;
114 self
->lt_prio
[6] = 0;
115 self
->lt_prio
[7] = 0;
117 self
->lt_cause
[0] = 0;
118 self
->lt_cause
[1] = 0;
119 self
->lt_cause
[2] = 0;
120 self
->lt_cause
[3] = 0;
121 self
->lt_cause
[4] = 0;
122 self
->lt_cause
[5] = 0;
123 self
->lt_cause
[6] = 0;
124 self
->lt_cause
[7] = 0;
127 #
if !defined(ENABLE_LOW_OVERHEAD
)
129 * Log timestamp when a thread is taken off the CPU.
131 sched
::resume
:off
-cpu
132 TRACE_FILTER_COND(curlwpsinfo
->pr_state
== SSLEEP
)
134 self
->lt_sleep_start
= timestamp
;
135 self
->lt_is_block_wakeable
= curthread
->t_flag
& T_WAKEABLE
;
137 lt_timestamps
[curpsinfo
->pr_pid
, curlwpsinfo
->pr_lwpid
] =
138 self
->lt_sleep_start
;
142 * Log timestamp when a thread is put on a dispatch queue and becomes runnable.
145 /lt_timestamps
[args
[1]->pr_pid
, args
[0]->pr_lwpid
] != 0/
147 lt_timestamps
[args
[1]->pr_pid
, args
[0]->pr_lwpid
] = timestamp
;
151 * Calculate latency when the thread is actually on the CPU.
152 * This is necessary in order to get the right stack.
154 this unsigned
long long end
;
155 this unsigned
long long now
;
157 /self
->lt_sleep_start
!= 0/
159 this->end
= lt_timestamps
[curpsinfo
->pr_pid
, curlwpsinfo
->pr_lwpid
];
160 this->now
= timestamp
;
161 lt_timestamps
[curpsinfo
->pr_pid
, curlwpsinfo
->pr_lwpid
] = 0;
162 this->end
= (this->end
!= 0 && this->end
!= self
->lt_sleep_start
)
163 ?
this->end
: this->now
;
165 self
->lt_sch_delay
= this->now
- this->end
;
166 self
->lt_sleep_duration
= this->end
- self
->lt_sleep_start
;
167 self
->lt_sleep_start
= 0;
171 * Filter: drop all "large" latency when it is interruptible, i.e., sleep()
174 #
if defined(ENABLE_FILTER
)
176 /self
->lt_sleep_duration
> FILTER_THRESHOLD
&&
177 self
->lt_is_block_wakeable
!= 0/
179 self
->lt_sch_delay
= 0;
180 self
->lt_sleep_duration
= 0;
181 self
->lt_is_block_wakeable
= 0;
183 #endif
/* defined(ENABLE_FILTER) */
186 * Write sleep time to the aggregation.
187 * lt_sleep_duration is the duration between the time when a thread is taken
188 * off the CPU and the time when it is enqueued again.
191 /self
->lt_sleep_duration
!= 0/
193 this->cause
= self
->lt_stackp
> 0 ?
194 self
->lt_cause
[self
->lt_stackp
- 1] : "";
195 this->priority
= self
->lt_stackp
> 0 ?
196 self
->lt_prio
[self
->lt_stackp
- 1] : 0;
198 @lt_call_count[pid
, tid
, stack(), this->cause
,
199 this->priority
] = count();
200 @lt_call_sum[pid
, tid
, stack(), this->cause
,
201 this->priority
] = sum(self
->lt_sleep_duration
);
202 @lt_call_max[pid
, tid
, stack(), this->cause
,
203 this->priority
] = max(self
->lt_sleep_duration
);
205 self
->lt_is_block_wakeable
= 0; /* Clear the flag to avoid leak */
206 self
->lt_sleep_duration
= 0;
210 * Write time spent in queue to the aggregation.
211 * lt_sch_delay is the interval between the time when a thread becomes
212 * runnable and the time when it is actually on the CPU.
215 /self
->lt_sch_delay
!= 0/
217 @lt_named_count[pid
, tid
, "Wait for available CPU"] = count();
218 @lt_named_sum[pid
, tid
, "Wait for available CPU"] =
219 sum(self
->lt_sch_delay
);
220 @lt_named_max[pid
, tid
, "Wait for available CPU"] =
221 max(self
->lt_sch_delay
);
223 self
->lt_sch_delay
= 0;
227 * Probes to track latency caused by spinning on a lock.
229 lockstat
:::adaptive
-spin
232 @lt_named_count[pid
, tid
, "Adapt. lock spin"] = count();
233 @lt_named_sum[pid
, tid
, "Adapt. lock spin"] = sum(arg1
);
234 @lt_named_max[pid
, tid
, "Adapt. lock spin"] = max(arg1
);
240 @lt_named_count[pid
, tid
, "Spinlock spin"] = count();
241 @lt_named_sum[pid
, tid
, "Spinlock spin"] = sum(arg1
);
242 @lt_named_max[pid
, tid
, "Spinlock spin"] = max(arg1
);
246 * Probes to track latency caused by blocking on a lock.
248 lockstat
:::adaptive
-block
251 @lt_named_count[pid
, tid
, "#Adapt. lock block"] = count();
252 @lt_named_sum[pid
, tid
, "#Adapt. lock block"] = sum(arg1
);
253 @lt_named_max[pid
, tid
, "#Adapt. lock block"] = max(arg1
);
259 @lt_named_count[pid
, tid
, "#RW. lock block"] = count();
260 @lt_named_sum[pid
, tid
, "#RW. lock block"] = sum(arg1
);
261 @lt_named_max[pid
, tid
, "#RW. lock block"] = max(arg1
);
264 #
if defined(ENABLE_SYNCOBJ
)
266 * Probes to track latency caused by synchronization objects.
269 this unsigned
long long wchan
;
270 this unsigned
long long wtime
;
274 * Currently we are unable to track wakeup from sched, because all its LWP IDs
275 * are zero when we trace it and that makes lt_timestamps unusable.
277 /args
[1]->pr_pid
!= 0 &&
278 lt_timestamps
[args
[1]->pr_pid
, args
[0]->pr_lwpid
] != 0/
280 this->stype
= args
[0]->pr_stype
;
281 this->wchan
= args
[0]->pr_wchan
;
283 * We can use lt_timestamps[] here, because
284 * wakeup is always fired before enqueue.
285 * After enqueue, lt_timestamps[] will be overwritten.
287 this->wtime
= timestamp
- lt_timestamps
[args
[1]->pr_pid
,
290 @lt_sync_count[args
[1]->pr_pid
, args
[0]->pr_lwpid
, this->stype
,
291 this->wchan
] = count();
292 @lt_sync_sum[args
[1]->pr_pid
, args
[0]->pr_lwpid
, this->stype
,
293 this->wchan
] = sum(this->wtime
);
294 @lt_sync_max[args
[1]->pr_pid
, args
[0]->pr_lwpid
, this->stype
,
295 this->wchan
] = max(this->wtime
);
297 #endif
/* defined(ENABLE_SYNCOBJ) */
299 #
else /* !defined(ENABLE_LOW_OVERHEAD) */
302 * This is the low overhead mode.
303 * In order to reduce the number of instructions executed during each
304 * off-cpu and on-cpu event, we do the following:
306 * 1. Use sampling and update aggregations only roughly 1/100 times
308 * 2. Do not track anything other than what is needed for "main" window.
309 * 3. Use as few thread local variables as possible.
312 #define SAMPLE_TIMES
100
313 #define SAMPLE_THRESHOLD
50000000
316 * Log timestamp when a thread is off CPU.
318 sched
::resume
:off
-cpu
319 TRACE_FILTER_COND(curlwpsinfo
->pr_state
== SSLEEP
)
321 self
->lt_timestamp
= timestamp
;
322 #
if defined(ENABLE_FILTER
)
323 self
->lt_is_block_wakeable
= curthread
->t_flag
& T_WAKEABLE
;
324 #endif
/* defined(ENABLE_FILTER) */
328 * Calculate latency when a thread is actually on the CPU.
332 /self
->lt_timestamp
!= 0/
334 self
->lt_timestamp
= timestamp
- self
->lt_timestamp
;
336 #
if defined(ENABLE_FILTER
)
338 (self
->lt_timestamp
> FILTER_THRESHOLD
&&
339 self
->lt_is_block_wakeable
!= 0) ?
0 : self
->lt_timestamp
;
340 self
->lt_is_block_wakeable
= 0;
341 #endif
/* defined(ENABLE_FILTER) */
343 this->need_skip
= (self
->lt_counter
< (SAMPLE_TIMES
- 1) &&
344 self
->lt_timestamp
<= SAMPLE_THRESHOLD
) ?
1 : 0;
345 self
->lt_timestamp
= this->need_skip ?
0 : self
->lt_timestamp
;
346 self
->lt_counter
+= this->need_skip
;
350 * Track large latency first.
353 /self
->lt_timestamp
> SAMPLE_THRESHOLD
/
355 this->cause
= self
->lt_stackp
> 0 ?
356 self
->lt_cause
[self
->lt_stackp
- 1] : "";
357 this->priority
= self
->lt_stackp
> 0 ?
358 self
->lt_prio
[self
->lt_stackp
- 1] : 0;
360 @lt_call_count[pid
, tid
, stack(), this->cause
,
361 this->priority
] = sum(1);
362 @lt_call_sum[pid
, tid
, stack(), this->cause
,
363 this->priority
] = sum(self
->lt_timestamp
);
364 @lt_call_max[pid
, tid
, stack(), this->cause
,
365 this->priority
] = max(self
->lt_timestamp
);
367 self
->lt_timestamp
= 0;
371 * If we fall back to this probe, that means the latency is small and counter
372 * has reached SAMPLE_TIMES.
375 /self
->lt_timestamp
!= 0/
377 this->cause
= self
->lt_stackp
> 0 ?
378 self
->lt_cause
[self
->lt_stackp
- 1] : "";
379 this->priority
= self
->lt_stackp
> 0 ?
380 self
->lt_prio
[self
->lt_stackp
- 1] : 0;
382 /* Need +1 because lt_counter has not been updated in this cycle. */
383 @lt_call_count[pid
, tid
, stack(), this->cause
,
384 this->priority
] = sum(self
->lt_counter
+ 1);
385 @lt_call_sum[pid
, tid
, stack(), this->cause
,
386 this->priority
] = sum((self
->lt_counter
+ 1) * self
->lt_timestamp
);
387 @lt_call_max[pid
, tid
, stack(), this->cause
,
388 this->priority
] = max(self
->lt_timestamp
);
390 self
->lt_timestamp
= 0;
391 self
->lt_counter
= 0;
394 #endif
/* !defined(ENABLE_LOW_OVERHEAD) */
396 #define
TRANSLATE(entryprobe
, returnprobe
, cause
, priority
) \
398 TRACE_FILTER_COND(self
->lt_stackp
== 0 || \
399 (self
->lt_stackp
< MAX_TAG
&& \
400 self
->lt_prio
[self
->lt_stackp
- 1] <= priority
) ) \
402 self
->lt_prio
[self
->lt_stackp
] = priority
; \
403 self
->lt_cause
[self
->lt_stackp
] = cause
; \
407 TRACE_FILTER_COND(self
->lt_stackp
> 0 && \
408 self
->lt_cause
[self
->lt_stackp
- 1] == cause
) \
411 self
->lt_cause
[self
->lt_stackp
] = NULL
; \
415 * Syscalls have a priority of 10. This is to make sure that latency is
416 * traced to one of the syscalls only if nothing else matches.
417 * We put this special probe here because it uses "probefunc" variable instead
418 * of a constant string.
421 TRANSLATE(syscall
:::entry
, syscall
:::return, probefunc
, 10)