Merge commit '00f1a4f432b3d8aad1aa270e91c44c57f03ef407'
[unleashed.git] / usr / src / cmd / latencytop / common / latencytop.d
blobc6686fa3b5ab054cc214ae7aa2dc12c957b7c3c4
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright (c) 2008-2009, Intel Corporation.
23 * All Rights Reserved.
26 #define MAX_TAG 8
27 #define MAX_STACK 64
29 #pragma D option aggsize=8m
30 #pragma D option bufsize=16m
31 #pragma D option dynvarsize=16m
32 #pragma D option aggrate=0
33 #pragma D option stackframes=MAX_STACK
35 * Our D script needs to compile even if some of the TRANSLATE probes cannot
36 * be found. Missing probes can be caused by older kernel, different
37 * architecture, unloaded modules etc.
39 #pragma D option zdefs
41 #if defined(ENABLE_SCHED)
42 #if defined(TRACE_PID)
43 #define TRACE_FILTER / pid == 0 || pid == TRACE_PID /
44 #define TRACE_FILTER_COND(a) / (pid == 0 || pid == TRACE_PID) && (a) /
45 #elif defined(TRACE_PGID)
46 #define TRACE_FILTER / pid == 0 || curpsinfo->pr_pgid == TRACE_PGID /
47 #define TRACE_FILTER_COND(a)
48 / (pid == 0 || curpsinfo->pr_pgid == TRACE_PGID) && (a) /
49 #else
50 #define TRACE_FILTER
51 #define TRACE_FILTER_COND(a) / (a) /
52 #endif
53 #else /* ENABLE_SCHED */
54 #if defined(TRACE_PID)
55 #define TRACE_FILTER / pid == TRACE_PID /
56 #define TRACE_FILTER_COND(a) / (pid == TRACE_PID) && (a) /
57 #elif defined(TRACE_PGID)
58 #define TRACE_FILTER / curpsinfo->pr_pgid == TRACE_PGID /
59 #define TRACE_FILTER_COND(a) / (curpsinfo->pr_pgid == TRACE_PGID) && (a) /
60 #else
61 #define TRACE_FILTER / pid != 0 /
62 #define TRACE_FILTER_COND(a) / (pid != 0) && (a) /
63 #endif
64 #endif /* ENABLE_SCHED */
66 /* Threshold to filter WAKEABLE latencies. */
67 #define FILTER_THRESHOLD 5000000
68 /* From thread.h */
69 #define T_WAKEABLE 2
72 * This array is used to store timestamp of when threads are enqueued
73 * to dispatch queue.
74 * self-> is not accessible when enqueue happens.
76 unsigned long long lt_timestamps[int, int];
78 self unsigned int lt_is_block_wakeable;
79 self unsigned long long lt_sleep_start;
80 self unsigned long long lt_sleep_duration;
81 self unsigned long long lt_sch_delay;
82 self unsigned int lt_counter; /* only used in low overhead */
83 self unsigned long long lt_timestamp; /* only used in low overhead */
84 self unsigned int lt_stackp;
85 self unsigned int lt_prio[int];
86 self string lt_cause[int];
88 this unsigned int priority;
89 this string cause;
92 * Clean up everything, otherwise we will run out of memory.
94 proc:::lwp-exit
96 lt_timestamps[curpsinfo->pr_pid, curlwpsinfo->pr_lwpid] = 0;
98 self->lt_sleep_start = 0;
99 self->lt_is_block_wakeable = 0;
100 self->lt_counter = 0;
101 self->lt_timestamp = 0;
104 * Workaround: no way to clear associative array.
105 * We have to manually clear 0 ~ (MAX_TAG-1).
108 self->lt_prio[0] = 0;
109 self->lt_prio[1] = 0;
110 self->lt_prio[2] = 0;
111 self->lt_prio[3] = 0;
112 self->lt_prio[4] = 0;
113 self->lt_prio[5] = 0;
114 self->lt_prio[6] = 0;
115 self->lt_prio[7] = 0;
117 self->lt_cause[0] = 0;
118 self->lt_cause[1] = 0;
119 self->lt_cause[2] = 0;
120 self->lt_cause[3] = 0;
121 self->lt_cause[4] = 0;
122 self->lt_cause[5] = 0;
123 self->lt_cause[6] = 0;
124 self->lt_cause[7] = 0;
127 #if !defined(ENABLE_LOW_OVERHEAD)
129 * Log timestamp when a thread is taken off the CPU.
131 sched::resume:off-cpu
132 TRACE_FILTER_COND(curlwpsinfo->pr_state == SSLEEP)
134 self->lt_sleep_start = timestamp;
135 self->lt_is_block_wakeable = curthread->t_flag & T_WAKEABLE;
137 lt_timestamps[curpsinfo->pr_pid, curlwpsinfo->pr_lwpid] =
138 self->lt_sleep_start;
142 * Log timestamp when a thread is put on a dispatch queue and becomes runnable.
144 sched:::enqueue
145 /lt_timestamps[args[1]->pr_pid, args[0]->pr_lwpid] != 0/
147 lt_timestamps[args[1]->pr_pid, args[0]->pr_lwpid] = timestamp;
151 * Calculate latency when the thread is actually on the CPU.
152 * This is necessary in order to get the right stack.
154 this unsigned long long end;
155 this unsigned long long now;
156 sched::resume:on-cpu
157 /self->lt_sleep_start != 0/
159 this->end = lt_timestamps[curpsinfo->pr_pid, curlwpsinfo->pr_lwpid];
160 this->now = timestamp;
161 lt_timestamps[curpsinfo->pr_pid, curlwpsinfo->pr_lwpid] = 0;
162 this->end = (this->end != 0 && this->end != self->lt_sleep_start)
163 ? this->end : this->now;
165 self->lt_sch_delay = this->now - this->end;
166 self->lt_sleep_duration = this->end - self->lt_sleep_start;
167 self->lt_sleep_start = 0;
171 * Filter: drop all "large" latency when it is interruptible, i.e., sleep()
172 * etc.
174 #if defined(ENABLE_FILTER)
175 sched::resume:on-cpu
176 /self->lt_sleep_duration > FILTER_THRESHOLD &&
177 self->lt_is_block_wakeable != 0/
179 self->lt_sch_delay = 0;
180 self->lt_sleep_duration = 0;
181 self->lt_is_block_wakeable = 0;
183 #endif /* defined(ENABLE_FILTER) */
186 * Write sleep time to the aggregation.
187 * lt_sleep_duration is the duration between the time when a thread is taken
188 * off the CPU and the time when it is enqueued again.
190 sched::resume:on-cpu
191 /self->lt_sleep_duration != 0/
193 this->cause = self->lt_stackp > 0 ?
194 self->lt_cause[self->lt_stackp - 1] : "";
195 this->priority = self->lt_stackp > 0 ?
196 self->lt_prio[self->lt_stackp - 1] : 0;
198 @lt_call_count[pid, tid, stack(), this->cause,
199 this->priority] = count();
200 @lt_call_sum[pid, tid, stack(), this->cause,
201 this->priority] = sum(self->lt_sleep_duration);
202 @lt_call_max[pid, tid, stack(), this->cause,
203 this->priority] = max(self->lt_sleep_duration);
205 self->lt_is_block_wakeable = 0; /* Clear the flag to avoid leak */
206 self->lt_sleep_duration = 0;
210 * Write time spent in queue to the aggregation.
211 * lt_sch_delay is the interval between the time when a thread becomes
212 * runnable and the time when it is actually on the CPU.
214 sched::resume:on-cpu
215 /self->lt_sch_delay != 0/
217 @lt_named_count[pid, tid, "Wait for available CPU"] = count();
218 @lt_named_sum[pid, tid, "Wait for available CPU"] =
219 sum(self->lt_sch_delay);
220 @lt_named_max[pid, tid, "Wait for available CPU"] =
221 max(self->lt_sch_delay);
223 self->lt_sch_delay = 0;
227 * Probes to track latency caused by spinning on a lock.
229 lockstat:::adaptive-spin
230 TRACE_FILTER
232 @lt_named_count[pid, tid, "Adapt. lock spin"] = count();
233 @lt_named_sum[pid, tid, "Adapt. lock spin"] = sum(arg1);
234 @lt_named_max[pid, tid, "Adapt. lock spin"] = max(arg1);
237 lockstat:::spin-spin
238 TRACE_FILTER
240 @lt_named_count[pid, tid, "Spinlock spin"] = count();
241 @lt_named_sum[pid, tid, "Spinlock spin"] = sum(arg1);
242 @lt_named_max[pid, tid, "Spinlock spin"] = max(arg1);
246 * Probes to track latency caused by blocking on a lock.
248 lockstat:::adaptive-block
249 TRACE_FILTER
251 @lt_named_count[pid, tid, "#Adapt. lock block"] = count();
252 @lt_named_sum[pid, tid, "#Adapt. lock block"] = sum(arg1);
253 @lt_named_max[pid, tid, "#Adapt. lock block"] = max(arg1);
256 lockstat:::rw-block
257 TRACE_FILTER
259 @lt_named_count[pid, tid, "#RW. lock block"] = count();
260 @lt_named_sum[pid, tid, "#RW. lock block"] = sum(arg1);
261 @lt_named_max[pid, tid, "#RW. lock block"] = max(arg1);
264 #if defined(ENABLE_SYNCOBJ)
266 * Probes to track latency caused by synchronization objects.
268 this int stype;
269 this unsigned long long wchan;
270 this unsigned long long wtime;
272 sched:::wakeup
274 * Currently we are unable to track wakeup from sched, because all its LWP IDs
275 * are zero when we trace it and that makes lt_timestamps unusable.
277 /args[1]->pr_pid != 0 &&
278 lt_timestamps[args[1]->pr_pid, args[0]->pr_lwpid] != 0/
280 this->stype = args[0]->pr_stype;
281 this->wchan = args[0]->pr_wchan;
283 * We can use lt_timestamps[] here, because
284 * wakeup is always fired before enqueue.
285 * After enqueue, lt_timestamps[] will be overwritten.
287 this->wtime = timestamp - lt_timestamps[args[1]->pr_pid,
288 args[0]->pr_lwpid];
290 @lt_sync_count[args[1]->pr_pid, args[0]->pr_lwpid, this->stype,
291 this->wchan] = count();
292 @lt_sync_sum[args[1]->pr_pid, args[0]->pr_lwpid, this->stype,
293 this->wchan] = sum(this->wtime);
294 @lt_sync_max[args[1]->pr_pid, args[0]->pr_lwpid, this->stype,
295 this->wchan] = max(this->wtime);
297 #endif /* defined(ENABLE_SYNCOBJ) */
299 #else /* !defined(ENABLE_LOW_OVERHEAD) */
302 * This is the low overhead mode.
303 * In order to reduce the number of instructions executed during each
304 * off-cpu and on-cpu event, we do the following:
306 * 1. Use sampling and update aggregations only roughly 1/100 times
307 * (SAMPLE_TIMES).
308 * 2. Do not track anything other than what is needed for "main" window.
309 * 3. Use as few thread local variables as possible.
312 #define SAMPLE_TIMES 100
313 #define SAMPLE_THRESHOLD 50000000
316 * Log timestamp when a thread is off CPU.
318 sched::resume:off-cpu
319 TRACE_FILTER_COND(curlwpsinfo->pr_state == SSLEEP)
321 self->lt_timestamp = timestamp;
322 #if defined(ENABLE_FILTER)
323 self->lt_is_block_wakeable = curthread->t_flag & T_WAKEABLE;
324 #endif /* defined(ENABLE_FILTER) */
328 * Calculate latency when a thread is actually on the CPU.
330 this int need_skip;
331 sched::resume:on-cpu
332 /self->lt_timestamp != 0/
334 self->lt_timestamp = timestamp - self->lt_timestamp;
336 #if defined(ENABLE_FILTER)
337 self->lt_timestamp =
338 (self->lt_timestamp > FILTER_THRESHOLD &&
339 self->lt_is_block_wakeable != 0) ? 0 : self->lt_timestamp;
340 self->lt_is_block_wakeable = 0;
341 #endif /* defined(ENABLE_FILTER) */
343 this->need_skip = (self->lt_counter < (SAMPLE_TIMES - 1) &&
344 self->lt_timestamp <= SAMPLE_THRESHOLD) ? 1 : 0;
345 self->lt_timestamp = this->need_skip ? 0 : self->lt_timestamp;
346 self->lt_counter += this->need_skip;
350 * Track large latency first.
352 sched::resume:on-cpu
353 /self->lt_timestamp > SAMPLE_THRESHOLD/
355 this->cause = self->lt_stackp > 0 ?
356 self->lt_cause[self->lt_stackp - 1] : "";
357 this->priority = self->lt_stackp > 0 ?
358 self->lt_prio[self->lt_stackp - 1] : 0;
360 @lt_call_count[pid, tid, stack(), this->cause,
361 this->priority] = sum(1);
362 @lt_call_sum[pid, tid, stack(), this->cause,
363 this->priority] = sum(self->lt_timestamp);
364 @lt_call_max[pid, tid, stack(), this->cause,
365 this->priority] = max(self->lt_timestamp);
367 self->lt_timestamp = 0;
371 * If we fall back to this probe, that means the latency is small and counter
372 * has reached SAMPLE_TIMES.
374 sched::resume:on-cpu
375 /self->lt_timestamp != 0/
377 this->cause = self->lt_stackp > 0 ?
378 self->lt_cause[self->lt_stackp - 1] : "";
379 this->priority = self->lt_stackp > 0 ?
380 self->lt_prio[self->lt_stackp - 1] : 0;
382 /* Need +1 because lt_counter has not been updated in this cycle. */
383 @lt_call_count[pid, tid, stack(), this->cause,
384 this->priority] = sum(self->lt_counter + 1);
385 @lt_call_sum[pid, tid, stack(), this->cause,
386 this->priority] = sum((self->lt_counter + 1) * self->lt_timestamp);
387 @lt_call_max[pid, tid, stack(), this->cause,
388 this->priority] = max(self->lt_timestamp);
390 self->lt_timestamp = 0;
391 self->lt_counter = 0;
394 #endif /* !defined(ENABLE_LOW_OVERHEAD) */
396 #define TRANSLATE(entryprobe, returnprobe, cause, priority) \
397 entryprobe \
398 TRACE_FILTER_COND(self->lt_stackp == 0 || \
399 (self->lt_stackp < MAX_TAG && \
400 self->lt_prio[self->lt_stackp - 1] <= priority) ) \
402 self->lt_prio[self->lt_stackp] = priority; \
403 self->lt_cause[self->lt_stackp] = cause; \
404 ++self->lt_stackp; \
406 returnprobe \
407 TRACE_FILTER_COND(self->lt_stackp > 0 && \
408 self->lt_cause[self->lt_stackp - 1] == cause) \
410 --self->lt_stackp; \
411 self->lt_cause[self->lt_stackp] = NULL; \
415 * Syscalls have a priority of 10. This is to make sure that latency is
416 * traced to one of the syscalls only if nothing else matches.
417 * We put this special probe here because it uses "probefunc" variable instead
418 * of a constant string.
421 TRANSLATE(syscall:::entry, syscall:::return, probefunc, 10)