4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2012 by Delphix. All rights reserved.
27 * Architecture-independent CPU control functions.
30 #include <sys/types.h>
31 #include <sys/param.h>
33 #include <sys/thread.h>
34 #include <sys/cpuvar.h>
35 #include <sys/cpu_event.h>
36 #include <sys/kstat.h>
37 #include <sys/uadmin.h>
38 #include <sys/systm.h>
39 #include <sys/errno.h>
40 #include <sys/cmn_err.h>
41 #include <sys/procset.h>
42 #include <sys/processor.h>
43 #include <sys/debug.h>
44 #include <sys/cpupart.h>
49 #include <sys/kmem_impl.h> /* to set per-cpu kmem_cache offset */
50 #include <sys/atomic.h>
51 #include <sys/callb.h>
52 #include <sys/vtrace.h>
53 #include <sys/cyclic.h>
54 #include <sys/bitmap.h>
55 #include <sys/nvpair.h>
56 #include <sys/pool_pset.h>
57 #include <sys/msacct.h>
59 #include <sys/archsystm.h>
61 #if defined(__x86) || defined(__amd64)
62 #include <sys/x86_archext.h>
64 #include <sys/callo.h>
66 extern int mp_cpu_start(cpu_t
*);
67 extern int mp_cpu_stop(cpu_t
*);
68 extern int mp_cpu_poweron(cpu_t
*);
69 extern int mp_cpu_poweroff(cpu_t
*);
70 extern int mp_cpu_configure(int);
71 extern int mp_cpu_unconfigure(int);
72 extern void mp_cpu_faulted_enter(cpu_t
*);
73 extern void mp_cpu_faulted_exit(cpu_t
*);
75 extern int cmp_cpu_to_chip(processorid_t cpuid
);
77 extern char *cpu_fru_fmri(cpu_t
*cp
);
80 static void cpu_add_active_internal(cpu_t
*cp
);
81 static void cpu_remove_active(cpu_t
*cp
);
82 static void cpu_info_kstat_create(cpu_t
*cp
);
83 static void cpu_info_kstat_destroy(cpu_t
*cp
);
84 static void cpu_stats_kstat_create(cpu_t
*cp
);
85 static void cpu_stats_kstat_destroy(cpu_t
*cp
);
87 static int cpu_sys_stats_ks_update(kstat_t
*ksp
, int rw
);
88 static int cpu_vm_stats_ks_update(kstat_t
*ksp
, int rw
);
89 static int cpu_stat_ks_update(kstat_t
*ksp
, int rw
);
90 static int cpu_state_change_hooks(int, cpu_setup_t
, cpu_setup_t
);
93 * cpu_lock protects ncpus, ncpus_online, cpu_flag, cpu_list, cpu_active,
94 * max_cpu_seqid_ever, and dispatch queue reallocations. The lock ordering with
95 * respect to related locks is:
97 * cpu_lock --> thread_free_lock ---> p_lock ---> thread_lock()
99 * Warning: Certain sections of code do not use the cpu_lock when
100 * traversing the cpu_list (e.g. mutex_vector_enter(), clock()). Since
101 * all cpus are paused during modifications to this list, a solution
102 * to protect the list is too either disable kernel preemption while
103 * walking the list, *or* recheck the cpu_next pointer at each
104 * iteration in the loop. Note that in no cases can any cached
105 * copies of the cpu pointers be kept as they may become invalid.
108 cpu_t
*cpu_list
; /* list of all CPUs */
109 cpu_t
*clock_cpu_list
; /* used by clock to walk CPUs */
110 cpu_t
*cpu_active
; /* list of active CPUs */
111 static cpuset_t cpu_available
; /* set of available CPUs */
112 cpuset_t cpu_seqid_inuse
; /* which cpu_seqids are in use */
114 cpu_t
**cpu_seq
; /* ptrs to CPUs, indexed by seq_id */
117 * max_ncpus keeps the max cpus the system can have. Initially
118 * it's NCPU, but since most archs scan the devtree for cpus
119 * fairly early on during boot, the real max can be known before
120 * ncpus is set (useful for early NCPU based allocations).
122 int max_ncpus
= NCPU
;
124 * platforms that set max_ncpus to maxiumum number of cpus that can be
125 * dynamically added will set boot_max_ncpus to the number of cpus found
126 * at device tree scan time during boot.
128 int boot_max_ncpus
= -1;
131 * Maximum possible CPU id. This can never be >= NCPU since NCPU is
132 * used to size arrays that are indexed by CPU id.
134 processorid_t max_cpuid
= NCPU
- 1;
137 * Maximum cpu_seqid was given. This number can only grow and never shrink. It
138 * can be used to optimize NCPU loops to avoid going through CPUs which were
141 processorid_t max_cpu_seqid_ever
= 0;
144 int ncpus_online
= 1;
147 * CPU that we're trying to offline. Protected by cpu_lock.
152 * Can be raised to suppress further weakbinding, which are instead
153 * satisfied by disabling preemption. Must be raised/lowered under cpu_lock,
154 * while individual thread weakbinding synchronization is done under thread
157 int weakbindingbarrier
;
160 * Variables used in pause_cpus().
162 static volatile char safe_list
[NCPU
];
164 static struct _cpu_pause_info
{
165 int cp_spl
; /* spl saved in pause_cpus() */
166 volatile int cp_go
; /* Go signal sent after all ready */
167 int cp_count
; /* # of CPUs to pause */
168 ksema_t cp_sem
; /* synch pause_cpus & cpu_pause */
169 kthread_id_t cp_paused
;
170 void *(*cp_func
)(void *);
173 static kmutex_t pause_free_mutex
;
174 static kcondvar_t pause_free_cv
;
177 static struct cpu_sys_stats_ks_data
{
178 kstat_named_t cpu_ticks_idle
;
179 kstat_named_t cpu_ticks_user
;
180 kstat_named_t cpu_ticks_kernel
;
181 kstat_named_t cpu_ticks_wait
;
182 kstat_named_t cpu_nsec_idle
;
183 kstat_named_t cpu_nsec_user
;
184 kstat_named_t cpu_nsec_kernel
;
185 kstat_named_t cpu_nsec_dtrace
;
186 kstat_named_t cpu_nsec_intr
;
187 kstat_named_t cpu_load_intr
;
188 kstat_named_t wait_ticks_io
;
189 kstat_named_t dtrace_probes
;
191 kstat_named_t bwrite
;
193 kstat_named_t lwrite
;
194 kstat_named_t phread
;
195 kstat_named_t phwrite
;
196 kstat_named_t pswitch
;
199 kstat_named_t syscall
;
200 kstat_named_t sysread
;
201 kstat_named_t syswrite
;
202 kstat_named_t sysfork
;
203 kstat_named_t sysvfork
;
204 kstat_named_t sysexec
;
205 kstat_named_t readch
;
206 kstat_named_t writech
;
207 kstat_named_t rcvint
;
208 kstat_named_t xmtint
;
209 kstat_named_t mdmint
;
216 kstat_named_t ufsiget
;
217 kstat_named_t ufsdirblk
;
218 kstat_named_t ufsipage
;
219 kstat_named_t ufsinopage
;
220 kstat_named_t procovf
;
221 kstat_named_t intrthread
;
222 kstat_named_t intrblk
;
223 kstat_named_t intrunpin
;
224 kstat_named_t idlethread
;
225 kstat_named_t inv_swtch
;
226 kstat_named_t nthreads
;
227 kstat_named_t cpumigrate
;
228 kstat_named_t xcalls
;
229 kstat_named_t mutex_adenters
;
230 kstat_named_t rw_rdfails
;
231 kstat_named_t rw_wrfails
;
232 kstat_named_t modload
;
233 kstat_named_t modunload
;
234 kstat_named_t bawrite
;
235 kstat_named_t iowait
;
236 } cpu_sys_stats_ks_data_template
= {
237 { "cpu_ticks_idle", KSTAT_DATA_UINT64
},
238 { "cpu_ticks_user", KSTAT_DATA_UINT64
},
239 { "cpu_ticks_kernel", KSTAT_DATA_UINT64
},
240 { "cpu_ticks_wait", KSTAT_DATA_UINT64
},
241 { "cpu_nsec_idle", KSTAT_DATA_UINT64
},
242 { "cpu_nsec_user", KSTAT_DATA_UINT64
},
243 { "cpu_nsec_kernel", KSTAT_DATA_UINT64
},
244 { "cpu_nsec_dtrace", KSTAT_DATA_UINT64
},
245 { "cpu_nsec_intr", KSTAT_DATA_UINT64
},
246 { "cpu_load_intr", KSTAT_DATA_UINT64
},
247 { "wait_ticks_io", KSTAT_DATA_UINT64
},
248 { "dtrace_probes", KSTAT_DATA_UINT64
},
249 { "bread", KSTAT_DATA_UINT64
},
250 { "bwrite", KSTAT_DATA_UINT64
},
251 { "lread", KSTAT_DATA_UINT64
},
252 { "lwrite", KSTAT_DATA_UINT64
},
253 { "phread", KSTAT_DATA_UINT64
},
254 { "phwrite", KSTAT_DATA_UINT64
},
255 { "pswitch", KSTAT_DATA_UINT64
},
256 { "trap", KSTAT_DATA_UINT64
},
257 { "intr", KSTAT_DATA_UINT64
},
258 { "syscall", KSTAT_DATA_UINT64
},
259 { "sysread", KSTAT_DATA_UINT64
},
260 { "syswrite", KSTAT_DATA_UINT64
},
261 { "sysfork", KSTAT_DATA_UINT64
},
262 { "sysvfork", KSTAT_DATA_UINT64
},
263 { "sysexec", KSTAT_DATA_UINT64
},
264 { "readch", KSTAT_DATA_UINT64
},
265 { "writech", KSTAT_DATA_UINT64
},
266 { "rcvint", KSTAT_DATA_UINT64
},
267 { "xmtint", KSTAT_DATA_UINT64
},
268 { "mdmint", KSTAT_DATA_UINT64
},
269 { "rawch", KSTAT_DATA_UINT64
},
270 { "canch", KSTAT_DATA_UINT64
},
271 { "outch", KSTAT_DATA_UINT64
},
272 { "msg", KSTAT_DATA_UINT64
},
273 { "sema", KSTAT_DATA_UINT64
},
274 { "namei", KSTAT_DATA_UINT64
},
275 { "ufsiget", KSTAT_DATA_UINT64
},
276 { "ufsdirblk", KSTAT_DATA_UINT64
},
277 { "ufsipage", KSTAT_DATA_UINT64
},
278 { "ufsinopage", KSTAT_DATA_UINT64
},
279 { "procovf", KSTAT_DATA_UINT64
},
280 { "intrthread", KSTAT_DATA_UINT64
},
281 { "intrblk", KSTAT_DATA_UINT64
},
282 { "intrunpin", KSTAT_DATA_UINT64
},
283 { "idlethread", KSTAT_DATA_UINT64
},
284 { "inv_swtch", KSTAT_DATA_UINT64
},
285 { "nthreads", KSTAT_DATA_UINT64
},
286 { "cpumigrate", KSTAT_DATA_UINT64
},
287 { "xcalls", KSTAT_DATA_UINT64
},
288 { "mutex_adenters", KSTAT_DATA_UINT64
},
289 { "rw_rdfails", KSTAT_DATA_UINT64
},
290 { "rw_wrfails", KSTAT_DATA_UINT64
},
291 { "modload", KSTAT_DATA_UINT64
},
292 { "modunload", KSTAT_DATA_UINT64
},
293 { "bawrite", KSTAT_DATA_UINT64
},
294 { "iowait", KSTAT_DATA_UINT64
},
297 static struct cpu_vm_stats_ks_data
{
299 kstat_named_t pgfrec
;
301 kstat_named_t pgpgin
;
303 kstat_named_t pgpgout
;
308 kstat_named_t hat_fault
;
309 kstat_named_t as_fault
;
310 kstat_named_t maj_fault
;
311 kstat_named_t cow_fault
;
312 kstat_named_t prot_fault
;
313 kstat_named_t softlock
;
314 kstat_named_t kernel_asflt
;
315 kstat_named_t pgrrun
;
316 kstat_named_t execpgin
;
317 kstat_named_t execpgout
;
318 kstat_named_t execfree
;
319 kstat_named_t anonpgin
;
320 kstat_named_t anonpgout
;
321 kstat_named_t anonfree
;
322 kstat_named_t fspgin
;
323 kstat_named_t fspgout
;
324 kstat_named_t fsfree
;
325 } cpu_vm_stats_ks_data_template
= {
326 { "pgrec", KSTAT_DATA_UINT64
},
327 { "pgfrec", KSTAT_DATA_UINT64
},
328 { "pgin", KSTAT_DATA_UINT64
},
329 { "pgpgin", KSTAT_DATA_UINT64
},
330 { "pgout", KSTAT_DATA_UINT64
},
331 { "pgpgout", KSTAT_DATA_UINT64
},
332 { "zfod", KSTAT_DATA_UINT64
},
333 { "dfree", KSTAT_DATA_UINT64
},
334 { "scan", KSTAT_DATA_UINT64
},
335 { "rev", KSTAT_DATA_UINT64
},
336 { "hat_fault", KSTAT_DATA_UINT64
},
337 { "as_fault", KSTAT_DATA_UINT64
},
338 { "maj_fault", KSTAT_DATA_UINT64
},
339 { "cow_fault", KSTAT_DATA_UINT64
},
340 { "prot_fault", KSTAT_DATA_UINT64
},
341 { "softlock", KSTAT_DATA_UINT64
},
342 { "kernel_asflt", KSTAT_DATA_UINT64
},
343 { "pgrrun", KSTAT_DATA_UINT64
},
344 { "execpgin", KSTAT_DATA_UINT64
},
345 { "execpgout", KSTAT_DATA_UINT64
},
346 { "execfree", KSTAT_DATA_UINT64
},
347 { "anonpgin", KSTAT_DATA_UINT64
},
348 { "anonpgout", KSTAT_DATA_UINT64
},
349 { "anonfree", KSTAT_DATA_UINT64
},
350 { "fspgin", KSTAT_DATA_UINT64
},
351 { "fspgout", KSTAT_DATA_UINT64
},
352 { "fsfree", KSTAT_DATA_UINT64
},
356 * Force the specified thread to migrate to the appropriate processor.
357 * Called with thread lock held, returns with it dropped.
360 force_thread_migrate(kthread_id_t tp
)
362 ASSERT(THREAD_LOCK_HELD(tp
));
363 if (tp
== curthread
) {
364 THREAD_TRANSITION(tp
);
366 thread_unlock_nopreempt(tp
);
369 if (tp
->t_state
== TS_ONPROC
) {
371 } else if (tp
->t_state
== TS_RUN
) {
380 * Set affinity for a specified CPU.
381 * A reference count is incremented and the affinity is held until the
382 * reference count is decremented to zero by thread_affinity_clear().
383 * This is so regions of code requiring affinity can be nested.
384 * Caller needs to ensure that cpu_id remains valid, which can be
385 * done by holding cpu_lock across this call, unless the caller
386 * specifies CPU_CURRENT in which case the cpu_lock will be acquired
387 * by thread_affinity_set and CPU->cpu_id will be the target CPU.
390 thread_affinity_set(kthread_id_t t
, int cpu_id
)
395 ASSERT(!(t
== curthread
&& t
->t_weakbound_cpu
!= NULL
));
397 if ((c
= cpu_id
) == CPU_CURRENT
) {
398 mutex_enter(&cpu_lock
);
399 cpu_id
= CPU
->cpu_id
;
401 ASSERT(MUTEX_HELD(&cpu_lock
));
402 ASSERT((cpu_id
>= 0) && (cpu_id
< NCPU
));
404 ASSERT(cp
!= NULL
); /* user must provide a good cpu_id */
406 * If there is already a hard affinity requested, and this affinity
407 * conflicts with that, panic.
410 if (t
->t_affinitycnt
> 0 && t
->t_bound_cpu
!= cp
) {
411 panic("affinity_set: setting %p but already bound to %p",
412 (void *)cp
, (void *)t
->t_bound_cpu
);
418 * Make sure we're running on the right CPU.
420 if (cp
!= t
->t_cpu
|| t
!= curthread
) {
421 force_thread_migrate(t
); /* drops thread lock */
426 if (c
== CPU_CURRENT
)
427 mutex_exit(&cpu_lock
);
431 * Wrapper for backward compatibility.
434 affinity_set(int cpu_id
)
436 thread_affinity_set(curthread
, cpu_id
);
440 * Decrement the affinity reservation count and if it becomes zero,
441 * clear the CPU affinity for the current thread, or set it to the user's
442 * software binding request.
445 thread_affinity_clear(kthread_id_t t
)
447 register processorid_t binding
;
450 if (--t
->t_affinitycnt
== 0) {
451 if ((binding
= t
->t_bind_cpu
) == PBIND_NONE
) {
453 * Adjust disp_max_unbound_pri if necessary.
455 disp_adjust_unbound_pri(t
);
456 t
->t_bound_cpu
= NULL
;
457 if (t
->t_cpu
->cpu_part
!= t
->t_cpupart
) {
458 force_thread_migrate(t
);
462 t
->t_bound_cpu
= cpu
[binding
];
464 * Make sure the thread is running on the bound CPU.
466 if (t
->t_cpu
!= t
->t_bound_cpu
) {
467 force_thread_migrate(t
);
468 return; /* already dropped lock */
476 * Wrapper for backward compatibility.
481 thread_affinity_clear(curthread
);
485 * Weak cpu affinity. Bind to the "current" cpu for short periods
486 * of time during which the thread must not block (but may be preempted).
487 * Use this instead of kpreempt_disable() when it is only "no migration"
488 * rather than "no preemption" semantics that are required - disabling
489 * preemption holds higher priority threads off of cpu and if the
490 * operation that is protected is more than momentary this is not good
493 * Weakly bound threads will not prevent a cpu from being offlined -
494 * we'll only run them on the cpu to which they are weakly bound but
495 * (because they do not block) we'll always be able to move them on to
496 * another cpu at offline time if we give them just a short moment to
497 * run during which they will unbind. To give a cpu a chance of offlining,
498 * however, we require a barrier to weak bindings that may be raised for a
499 * given cpu (offline/move code may set this and then wait a short time for
500 * existing weak bindings to drop); the cpu_inmotion pointer is that barrier.
502 * There are few restrictions on the calling context of thread_nomigrate.
503 * The caller must not hold the thread lock. Calls may be nested.
505 * After weakbinding a thread must not perform actions that may block.
506 * In particular it must not call thread_affinity_set; calling that when
507 * already weakbound is nonsensical anyway.
509 * If curthread is prevented from migrating for other reasons
510 * (kernel preemption disabled; high pil; strongly bound; interrupt thread)
511 * then the weak binding will succeed even if this cpu is the target of an
512 * offline/move request.
515 thread_nomigrate(void)
518 kthread_id_t t
= curthread
;
525 * A highlevel interrupt must not modify t_nomigrate or
526 * t_weakbound_cpu of the thread it has interrupted. A lowlevel
527 * interrupt thread cannot migrate and we can avoid the
528 * thread_lock call below by short-circuiting here. In either
529 * case we can just return since no migration is possible and
530 * the condition will persist (ie, when we test for these again
531 * in thread_allowmigrate they can't have changed). Migration
532 * is also impossible if we're at or above DISP_LEVEL pil.
534 if (CPU_ON_INTR(cp
) || t
->t_flag
& T_INTR_THREAD
||
535 getpil() >= DISP_LEVEL
) {
541 * We must be consistent with existing weak bindings. Since we
542 * may be interrupted between the increment of t_nomigrate and
543 * the store to t_weakbound_cpu below we cannot assume that
544 * t_weakbound_cpu will be set if t_nomigrate is. Note that we
545 * cannot assert t_weakbound_cpu == t_bind_cpu since that is not
548 if (t
->t_nomigrate
&& t
->t_weakbound_cpu
&& t
->t_weakbound_cpu
!= cp
) {
550 panic("thread_nomigrate: binding to %p but already "
551 "bound to %p", (void *)cp
,
552 (void *)t
->t_weakbound_cpu
);
556 * At this point we have preemption disabled and we don't yet hold
557 * the thread lock. So it's possible that somebody else could
558 * set t_bind_cpu here and not be able to force us across to the
559 * new cpu (since we have preemption disabled).
561 thread_lock(curthread
);
564 * If further weak bindings are being (temporarily) suppressed then
565 * we'll settle for disabling kernel preemption (which assures
566 * no migration provided the thread does not block which it is
567 * not allowed to if using thread_nomigrate). We must remember
568 * this disposition so we can take appropriate action in
569 * thread_allowmigrate. If this is a nested call and the
570 * thread is already weakbound then fall through as normal.
571 * We remember the decision to settle for kpreempt_disable through
572 * negative nesting counting in t_nomigrate. Once a thread has had one
573 * weakbinding request satisfied in this way any further (nested)
574 * requests will continue to be satisfied in the same way,
575 * even if weak bindings have recommenced.
577 if (t
->t_nomigrate
< 0 || weakbindingbarrier
&& t
->t_nomigrate
== 0) {
579 thread_unlock(curthread
);
580 return; /* with kpreempt_disable still active */
584 * We hold thread_lock so t_bind_cpu cannot change. We could,
585 * however, be running on a different cpu to which we are t_bound_cpu
586 * to (as explained above). If we grant the weak binding request
587 * in that case then the dispatcher must favour our weak binding
588 * over our strong (in which case, just as when preemption is
589 * disabled, we can continue to run on a cpu other than the one to
590 * which we are strongbound; the difference in this case is that
591 * this thread can be preempted and so can appear on the dispatch
592 * queues of a cpu other than the one it is strongbound to).
594 * If the cpu we are running on does not appear to be a current
595 * offline target (we check cpu_inmotion to determine this - since
596 * we don't hold cpu_lock we may not see a recent store to that,
597 * so it's possible that we at times can grant a weak binding to a
598 * cpu that is an offline target, but that one request will not
599 * prevent the offline from succeeding) then we will always grant
600 * the weak binding request. This includes the case above where
601 * we grant a weakbinding not commensurate with our strong binding.
603 * If our cpu does appear to be an offline target then we're inclined
604 * not to grant the weakbinding request just yet - we'd prefer to
605 * migrate to another cpu and grant the request there. The
606 * exceptions are those cases where going through preemption code
607 * will not result in us changing cpu:
609 * . interrupts have already bypassed this case (see above)
610 * . we are already weakbound to this cpu (dispatcher code will
611 * always return us to the weakbound cpu)
612 * . preemption was disabled even before we disabled it above
613 * . we are strongbound to this cpu (if we're strongbound to
614 * another and not yet running there the trip through the
615 * dispatcher will move us to the strongbound cpu and we
616 * will grant the weak binding there)
618 if (cp
!= cpu_inmotion
|| t
->t_nomigrate
> 0 || t
->t_preempt
> 1 ||
619 t
->t_bound_cpu
== cp
) {
621 * Don't be tempted to store to t_weakbound_cpu only on
622 * the first nested bind request - if we're interrupted
623 * after the increment of t_nomigrate and before the
624 * store to t_weakbound_cpu and the interrupt calls
625 * thread_nomigrate then the assertion in thread_allowmigrate
629 t
->t_weakbound_cpu
= cp
;
631 thread_unlock(curthread
);
633 * Now that we have dropped the thread_lock another thread
634 * can set our t_weakbound_cpu, and will try to migrate us
635 * to the strongbound cpu (which will not be prevented by
636 * preemption being disabled since we're about to enable
637 * preemption). We have granted the weakbinding to the current
638 * cpu, so again we are in the position that is is is possible
639 * that our weak and strong bindings differ. Again this
640 * is catered for by dispatcher code which will favour our
646 * Move to another cpu before granting the request by
647 * forcing this thread through preemption code. When we
648 * get to set{front,back}dq called from CL_PREEMPT()
649 * cpu_choose() will be used to select a cpu to queue
650 * us on - that will see cpu_inmotion and take
651 * steps to avoid returning us to this cpu.
653 cp
->cpu_kprunrun
= 1;
654 thread_unlock(curthread
);
655 kpreempt_enable(); /* will call preempt() */
661 thread_allowmigrate(void)
663 kthread_id_t t
= curthread
;
665 ASSERT(t
->t_weakbound_cpu
== CPU
||
666 (t
->t_nomigrate
< 0 && t
->t_preempt
> 0) ||
667 CPU_ON_INTR(CPU
) || t
->t_flag
& T_INTR_THREAD
||
668 getpil() >= DISP_LEVEL
);
670 if (CPU_ON_INTR(CPU
) || (t
->t_flag
& T_INTR_THREAD
) ||
671 getpil() >= DISP_LEVEL
)
674 if (t
->t_nomigrate
< 0) {
676 * This thread was granted "weak binding" in the
677 * stronger form of kernel preemption disabling.
678 * Undo a level of nesting for both t_nomigrate
683 } else if (--t
->t_nomigrate
== 0) {
685 * Time to drop the weak binding. We need to cater
686 * for the case where we're weakbound to a different
687 * cpu than that to which we're strongbound (a very
688 * temporary arrangement that must only persist until
689 * weak binding drops). We don't acquire thread_lock
690 * here so even as this code executes t_bound_cpu
691 * may be changing. So we disable preemption and
692 * a) in the case that t_bound_cpu changes while we
693 * have preemption disabled kprunrun will be set
694 * asynchronously, and b) if before disabling
695 * preemption we were already on a different cpu to
696 * our t_bound_cpu then we set kprunrun ourselves
697 * to force a trip through the dispatcher when
698 * preemption is enabled.
701 if (t
->t_bound_cpu
&&
702 t
->t_weakbound_cpu
!= t
->t_bound_cpu
)
703 CPU
->cpu_kprunrun
= 1;
704 t
->t_weakbound_cpu
= NULL
;
711 * weakbinding_stop can be used to temporarily cause weakbindings made
712 * with thread_nomigrate to be satisfied through the stronger action of
713 * kpreempt_disable. weakbinding_start recommences normal weakbinding.
717 weakbinding_stop(void)
719 ASSERT(MUTEX_HELD(&cpu_lock
));
720 weakbindingbarrier
= 1;
721 membar_producer(); /* make visible before subsequent thread_lock */
725 weakbinding_start(void)
727 ASSERT(MUTEX_HELD(&cpu_lock
));
728 weakbindingbarrier
= 0;
737 * This routine is called to place the CPUs in a safe place so that
738 * one of them can be taken off line or placed on line. What we are
739 * trying to do here is prevent a thread from traversing the list
740 * of active CPUs while we are changing it or from getting placed on
741 * the run queue of a CPU that has just gone off line. We do this by
742 * creating a thread with the highest possible prio for each CPU and
743 * having it call this routine. The advantage of this method is that
744 * we can eliminate all checks for CPU_ACTIVE in the disp routines.
745 * This makes disp faster at the expense of making p_online() slower
746 * which is a good trade off.
752 struct _cpu_pause_info
*cpi
= &cpu_pause_info
;
753 volatile char *safe
= &safe_list
[index
];
756 ASSERT((curthread
->t_bound_cpu
!= NULL
) || (*safe
== PAUSE_DIE
));
758 while (*safe
!= PAUSE_DIE
) {
760 membar_enter(); /* make sure stores are flushed */
761 sema_v(&cpi
->cp_sem
); /* signal requesting thread */
764 * Wait here until all pause threads are running. That
765 * indicates that it's safe to do the spl. Until
766 * cpu_pause_info.cp_go is set, we don't want to spl
767 * because that might block clock interrupts needed
768 * to preempt threads on other CPUs.
770 while (cpi
->cp_go
== 0)
773 * Even though we are at the highest disp prio, we need
774 * to block out all interrupts below LOCK_LEVEL so that
775 * an intr doesn't come in, wake up a thread, and call
776 * setbackdq/setfrontdq.
780 * if cp_func has been set then call it using index as the
781 * argument, currently only used by cpr_suspend_cpus().
782 * This function is used as the code to execute on the
783 * "paused" cpu's when a machine comes out of a sleep state
784 * and CPU's were powered off. (could also be used for
785 * hotplugging CPU's).
787 if (cpi
->cp_func
!= NULL
)
788 (*cpi
->cp_func
)((void *)lindex
);
790 mach_cpu_pause(safe
);
794 * Waiting is at an end. Switch out of cpu_pause
795 * loop and resume useful work.
800 mutex_enter(&pause_free_mutex
);
802 cv_broadcast(&pause_free_cv
);
803 mutex_exit(&pause_free_mutex
);
807 * Allow the cpus to start running again.
814 ASSERT(MUTEX_HELD(&cpu_lock
));
815 ASSERT(cpu_pause_info
.cp_paused
);
816 cpu_pause_info
.cp_paused
= NULL
;
817 for (i
= 0; i
< NCPU
; i
++)
818 safe_list
[i
] = PAUSE_IDLE
;
819 membar_enter(); /* make sure stores are flushed */
821 splx(cpu_pause_info
.cp_spl
);
826 * Allocate a pause thread for a CPU.
829 cpu_pause_alloc(cpu_t
*cp
)
832 long cpun
= cp
->cpu_id
;
835 * Note, v.v_nglobpris will not change value as long as I hold
838 t
= thread_create(NULL
, 0, cpu_pause
, (void *)cpun
,
839 0, &p0
, TS_STOPPED
, v
.v_nglobpris
- 1);
842 t
->t_disp_queue
= cp
->cpu_disp
;
843 t
->t_affinitycnt
= 1;
846 cp
->cpu_pause_thread
= t
;
848 * Registering a thread in the callback table is usually done
849 * in the initialization code of the thread. In this
850 * case, we do it right after thread creation because the
851 * thread itself may never run, and we need to register the
852 * fact that it is safe for cpr suspend.
854 CALLB_CPR_INIT_SAFE(t
, "cpu_pause");
858 * Free a pause thread for a CPU.
861 cpu_pause_free(cpu_t
*cp
)
864 int cpun
= cp
->cpu_id
;
866 ASSERT(MUTEX_HELD(&cpu_lock
));
868 * We have to get the thread and tell it to die.
870 if ((t
= cp
->cpu_pause_thread
) == NULL
) {
871 ASSERT(safe_list
[cpun
] == PAUSE_IDLE
);
875 t
->t_cpu
= CPU
; /* disp gets upset if last cpu is quiesced. */
876 t
->t_bound_cpu
= NULL
; /* Must un-bind; cpu may not be running. */
877 t
->t_pri
= v
.v_nglobpris
- 1;
878 ASSERT(safe_list
[cpun
] == PAUSE_IDLE
);
879 safe_list
[cpun
] = PAUSE_DIE
;
880 THREAD_TRANSITION(t
);
882 thread_unlock_nopreempt(t
);
885 * If we don't wait for the thread to actually die, it may try to
886 * run on the wrong cpu as part of an actual call to pause_cpus().
888 mutex_enter(&pause_free_mutex
);
889 while (safe_list
[cpun
] != PAUSE_DEAD
) {
890 cv_wait(&pause_free_cv
, &pause_free_mutex
);
892 mutex_exit(&pause_free_mutex
);
893 safe_list
[cpun
] = PAUSE_IDLE
;
895 cp
->cpu_pause_thread
= NULL
;
899 * Initialize basic structures for pausing CPUs.
904 sema_init(&cpu_pause_info
.cp_sem
, 0, NULL
, SEMA_DEFAULT
, NULL
);
906 * Create initial CPU pause thread.
908 cpu_pause_alloc(CPU
);
912 * Start the threads used to pause another CPU.
915 cpu_pause_start(processorid_t cpu_id
)
920 for (i
= 0; i
< NCPU
; i
++) {
925 if (!CPU_IN_SET(cpu_available
, i
) || (i
== cpu_id
)) {
926 safe_list
[i
] = PAUSE_WAIT
;
931 * Skip CPU if it is quiesced or not yet started.
933 if ((cp
->cpu_flags
& (CPU_QUIESCED
| CPU_READY
)) != CPU_READY
) {
934 safe_list
[i
] = PAUSE_WAIT
;
939 * Start this CPU's pause thread.
941 t
= cp
->cpu_pause_thread
;
944 * Reset the priority, since nglobpris may have
945 * changed since the thread was created, if someone
946 * has loaded the RT (or some other) scheduling
949 t
->t_pri
= v
.v_nglobpris
- 1;
950 THREAD_TRANSITION(t
);
952 thread_unlock_nopreempt(t
);
960 * Pause all of the CPUs except the one we are on by creating a high
961 * priority thread bound to those CPUs.
963 * Note that one must be extremely careful regarding code
964 * executed while CPUs are paused. Since a CPU may be paused
965 * while a thread scheduling on that CPU is holding an adaptive
966 * lock, code executed with CPUs paused must not acquire adaptive
967 * (or low-level spin) locks. Also, such code must not block,
968 * since the thread that is supposed to initiate the wakeup may
971 * With a few exceptions, the restrictions on code executed with CPUs
972 * paused match those for code executed at high-level interrupt
976 pause_cpus(cpu_t
*off_cp
, void *(*func
)(void *))
978 processorid_t cpu_id
;
980 struct _cpu_pause_info
*cpi
= &cpu_pause_info
;
982 ASSERT(MUTEX_HELD(&cpu_lock
));
983 ASSERT(cpi
->cp_paused
== NULL
);
986 for (i
= 0; i
< NCPU
; i
++)
987 safe_list
[i
] = PAUSE_IDLE
;
993 * If running on the cpu that is going offline, get off it.
994 * This is so that it won't be necessary to rechoose a CPU
998 cpu_id
= off_cp
->cpu_next_part
->cpu_id
;
1000 cpu_id
= CPU
->cpu_id
;
1001 affinity_set(cpu_id
);
1004 * Start the pause threads and record how many were started
1006 cpi
->cp_count
= cpu_pause_start(cpu_id
);
1009 * Now wait for all CPUs to be running the pause thread.
1011 while (cpi
->cp_count
> 0) {
1013 * Spin reading the count without grabbing the disp
1014 * lock to make sure we don't prevent the pause
1015 * threads from getting the lock.
1017 while (sema_held(&cpi
->cp_sem
))
1019 if (sema_tryp(&cpi
->cp_sem
))
1022 cpi
->cp_go
= 1; /* all have reached cpu_pause */
1025 * Now wait for all CPUs to spl. (Transition from PAUSE_READY
1028 for (i
= 0; i
< NCPU
; i
++) {
1029 while (safe_list
[i
] != PAUSE_WAIT
)
1032 cpi
->cp_spl
= splhigh(); /* block dispatcher on this CPU */
1033 cpi
->cp_paused
= curthread
;
1037 * Check whether the current thread has CPUs paused
1042 if (cpu_pause_info
.cp_paused
!= NULL
) {
1043 ASSERT(cpu_pause_info
.cp_paused
== curthread
);
1050 cpu_get_all(processorid_t cpun
)
1052 ASSERT(MUTEX_HELD(&cpu_lock
));
1054 if (cpun
>= NCPU
|| cpun
< 0 || !CPU_IN_SET(cpu_available
, cpun
))
1060 * Check whether cpun is a valid processor id and whether it should be
1061 * visible from the current zone. If it is, return a pointer to the
1062 * associated CPU structure.
1065 cpu_get(processorid_t cpun
)
1069 ASSERT(MUTEX_HELD(&cpu_lock
));
1070 c
= cpu_get_all(cpun
);
1071 if (c
!= NULL
&& !INGLOBALZONE(curproc
) && pool_pset_enabled() &&
1072 zone_pset_get(curproc
->p_zone
) != cpupart_query_cpu(c
))
1078 * The following functions should be used to check CPU states in the kernel.
1079 * They should be invoked with cpu_lock held. Kernel subsystems interested
1080 * in CPU states should *not* use cpu_get_state() and various P_ONLINE/etc
1081 * states. Those are for user-land (and system call) use only.
1085 * Determine whether the CPU is online and handling interrupts.
1088 cpu_is_online(cpu_t
*cpu
)
1090 ASSERT(MUTEX_HELD(&cpu_lock
));
1091 return (cpu_flagged_online(cpu
->cpu_flags
));
1095 * Determine whether the CPU is offline (this includes spare and faulted).
1098 cpu_is_offline(cpu_t
*cpu
)
1100 ASSERT(MUTEX_HELD(&cpu_lock
));
1101 return (cpu_flagged_offline(cpu
->cpu_flags
));
1105 * Determine whether the CPU is powered off.
1108 cpu_is_poweredoff(cpu_t
*cpu
)
1110 ASSERT(MUTEX_HELD(&cpu_lock
));
1111 return (cpu_flagged_poweredoff(cpu
->cpu_flags
));
1115 * Determine whether the CPU is handling interrupts.
1118 cpu_is_nointr(cpu_t
*cpu
)
1120 ASSERT(MUTEX_HELD(&cpu_lock
));
1121 return (cpu_flagged_nointr(cpu
->cpu_flags
));
1125 * Determine whether the CPU is active (scheduling threads).
1128 cpu_is_active(cpu_t
*cpu
)
1130 ASSERT(MUTEX_HELD(&cpu_lock
));
1131 return (cpu_flagged_active(cpu
->cpu_flags
));
1135 * Same as above, but these require cpu_flags instead of cpu_t pointers.
1138 cpu_flagged_online(cpu_flag_t cpu_flags
)
1140 return (cpu_flagged_active(cpu_flags
) &&
1141 (cpu_flags
& CPU_ENABLE
));
1145 cpu_flagged_offline(cpu_flag_t cpu_flags
)
1147 return (((cpu_flags
& CPU_POWEROFF
) == 0) &&
1148 ((cpu_flags
& (CPU_READY
| CPU_OFFLINE
)) != CPU_READY
));
1152 cpu_flagged_poweredoff(cpu_flag_t cpu_flags
)
1154 return ((cpu_flags
& CPU_POWEROFF
) == CPU_POWEROFF
);
1158 cpu_flagged_nointr(cpu_flag_t cpu_flags
)
1160 return (cpu_flagged_active(cpu_flags
) &&
1161 (cpu_flags
& CPU_ENABLE
) == 0);
1165 cpu_flagged_active(cpu_flag_t cpu_flags
)
1167 return (((cpu_flags
& (CPU_POWEROFF
| CPU_FAULTED
| CPU_SPARE
)) == 0) &&
1168 ((cpu_flags
& (CPU_READY
| CPU_OFFLINE
)) == CPU_READY
));
1172 * Bring the indicated CPU online.
1175 cpu_online(cpu_t
*cp
)
1180 * Handle on-line request.
1181 * This code must put the new CPU on the active list before
1182 * starting it because it will not be paused, and will start
1183 * using the active list immediately. The real start occurs
1184 * when the CPU_QUIESCED flag is turned off.
1187 ASSERT(MUTEX_HELD(&cpu_lock
));
1190 * Put all the cpus into a known safe place.
1191 * No mutexes can be entered while CPUs are paused.
1193 error
= mp_cpu_start(cp
); /* arch-dep hook */
1195 pg_cpupart_in(cp
, cp
->cpu_part
);
1196 pause_cpus(NULL
, NULL
);
1197 cpu_add_active_internal(cp
);
1198 if (cp
->cpu_flags
& CPU_FAULTED
) {
1199 cp
->cpu_flags
&= ~CPU_FAULTED
;
1200 mp_cpu_faulted_exit(cp
);
1202 cp
->cpu_flags
&= ~(CPU_QUIESCED
| CPU_OFFLINE
| CPU_FROZEN
|
1204 CPU_NEW_GENERATION(cp
);
1206 cpu_stats_kstat_create(cp
);
1207 cpu_create_intrstat(cp
);
1208 lgrp_kstat_create(cp
);
1209 cpu_state_change_notify(cp
->cpu_id
, CPU_ON
);
1210 cpu_intr_enable(cp
); /* arch-dep hook */
1211 cpu_state_change_notify(cp
->cpu_id
, CPU_INTR_ON
);
1215 * This has to be called only after cyclic_online(). This
1216 * function uses cyclics.
1218 callout_cpu_online(cp
);
1219 poke_cpu(cp
->cpu_id
);
1226 * Take the indicated CPU offline.
1229 cpu_offline(cpu_t
*cp
, int flags
)
1236 int callout_off
= 0;
1239 int (*bound_func
)(struct cpu
*, int);
1244 boolean_t unbind_all_threads
= (flags
& CPU_FORCED
) != 0;
1246 ASSERT(MUTEX_HELD(&cpu_lock
));
1249 * If we're going from faulted or spare to offline, just
1250 * clear these flags and update CPU state.
1252 if (cp
->cpu_flags
& (CPU_FAULTED
| CPU_SPARE
)) {
1253 if (cp
->cpu_flags
& CPU_FAULTED
) {
1254 cp
->cpu_flags
&= ~CPU_FAULTED
;
1255 mp_cpu_faulted_exit(cp
);
1257 cp
->cpu_flags
&= ~CPU_SPARE
;
1263 * Handle off-line request.
1267 * Don't offline last online CPU in partition
1269 if (ncpus_online
<= 1 || pp
->cp_ncpus
<= 1 || cpu_intr_count(cp
) < 2)
1272 * Unbind all soft-bound threads bound to our CPU and hard bound threads
1273 * if we were asked to.
1275 error
= cpu_unbind(cp
->cpu_id
, unbind_all_threads
);
1279 * We shouldn't be bound to this CPU ourselves.
1281 if (curthread
->t_bound_cpu
== cp
)
1285 * Tell interested parties that this CPU is going offline.
1287 CPU_NEW_GENERATION(cp
);
1288 cpu_state_change_notify(cp
->cpu_id
, CPU_OFF
);
1291 * Tell the PG subsystem that the CPU is leaving the partition
1293 pg_cpupart_out(cp
, pp
);
1296 * Take the CPU out of interrupt participation so we won't find
1297 * bound kernel threads. If the architecture cannot completely
1298 * shut off interrupts on the CPU, don't quiesce it, but don't
1299 * run anything but interrupt thread... this is indicated by
1300 * the CPU_OFFLINE flag being on but the CPU_QUIESCE flag being
1303 intr_enable
= cp
->cpu_flags
& CPU_ENABLE
;
1305 no_quiesce
= cpu_intr_disable(cp
);
1308 * Record that we are aiming to offline this cpu. This acts as
1309 * a barrier to further weak binding requests in thread_nomigrate
1310 * and also causes cpu_choose, disp_lowpri_cpu and setfrontdq to
1311 * lean away from this cpu. Further strong bindings are already
1312 * avoided since we hold cpu_lock. Since threads that are set
1313 * runnable around now and others coming off the target cpu are
1314 * directed away from the target, existing strong and weak bindings
1315 * (especially the latter) to the target cpu stand maximum chance of
1316 * being able to unbind during the short delay loop below (if other
1317 * unbound threads compete they may not see cpu in time to unbind
1318 * even if they would do so immediately.
1324 * Check for kernel threads (strong or weak) bound to that CPU.
1325 * Strongly bound threads may not unbind, and we'll have to return
1326 * EBUSY. Weakly bound threads should always disappear - we've
1327 * stopped more weak binding with cpu_inmotion and existing
1328 * bindings will drain imminently (they may not block). Nonetheless
1329 * we will wait for a fixed period for all bound threads to disappear.
1330 * Inactive interrupt threads are OK (they'll be in TS_FREE
1331 * state). If test finds some bound threads, wait a few ticks
1332 * to give short-lived threads (such as interrupts) chance to
1333 * complete. Note that if no_quiesce is set, i.e. this cpu
1334 * is required to service interrupts, then we take the route
1335 * that permits interrupt threads to be active (or bypassed).
1337 bound_func
= no_quiesce
? disp_bound_threads
: disp_bound_anythreads
;
1339 again
: for (loop_count
= 0; (*bound_func
)(cp
, 0); loop_count
++) {
1340 if (loop_count
>= 5) {
1341 error
= EBUSY
; /* some threads still bound */
1346 * If some threads were assigned, give them
1347 * a chance to complete or move.
1349 * This assumes that the clock_thread is not bound
1350 * to any CPU, because the clock_thread is needed to
1351 * do the delay(hz/100).
1353 * Note: we still hold the cpu_lock while waiting for
1354 * the next clock tick. This is OK since it isn't
1355 * needed for anything else except processor_bind(2),
1356 * and system initialization. If we drop the lock,
1357 * we would risk another p_online disabling the last
1363 if (error
== 0 && callout_off
== 0) {
1364 callout_cpu_offline(cp
);
1368 if (error
== 0 && cyclic_off
== 0) {
1369 if (!cyclic_offline(cp
)) {
1371 * We must have bound cyclics...
1380 * Call mp_cpu_stop() to perform any special operations
1381 * needed for this machine architecture to offline a CPU.
1384 error
= mp_cpu_stop(cp
); /* arch-dep hook */
1387 * If that all worked, take the CPU offline and decrement
1392 * Put all the cpus into a known safe place.
1393 * No mutexes can be entered while CPUs are paused.
1395 pause_cpus(cp
, NULL
);
1397 * Repeat the operation, if necessary, to make sure that
1398 * all outstanding low-level interrupts run to completion
1399 * before we set the CPU_QUIESCED flag. It's also possible
1400 * that a thread has weak bound to the cpu despite our raising
1401 * cpu_inmotion above since it may have loaded that
1402 * value before the barrier became visible (this would have
1403 * to be the thread that was on the target cpu at the time
1404 * we raised the barrier).
1406 if ((!no_quiesce
&& cp
->cpu_intr_actv
!= 0) ||
1407 (*bound_func
)(cp
, 1)) {
1409 (void) mp_cpu_start(cp
);
1412 ncp
= cp
->cpu_next_part
;
1413 cpu_lpl
= cp
->cpu_lpl
;
1414 ASSERT(cpu_lpl
!= NULL
);
1417 * Remove the CPU from the list of active CPUs.
1419 cpu_remove_active(cp
);
1422 * Walk the active process list and look for threads
1423 * whose home lgroup needs to be updated, or
1424 * the last CPU they run on is the one being offlined now.
1427 ASSERT(curthread
->t_cpu
!= cp
);
1428 for (p
= practive
; p
!= NULL
; p
= p
->p_next
) {
1438 ASSERT(t
->t_lpl
!= NULL
);
1440 * Taking last CPU in lpl offline
1441 * Rehome thread if it is in this lpl
1442 * Otherwise, update the count of how many
1443 * threads are in this CPU's lgroup but have
1447 if (cpu_lpl
->lpl_ncpu
== 0) {
1448 if (t
->t_lpl
== cpu_lpl
)
1452 else if (t
->t_lpl
->lpl_lgrpid
==
1453 cpu_lpl
->lpl_lgrpid
)
1456 ASSERT(t
->t_lpl
->lpl_ncpu
> 0);
1459 * Update CPU last ran on if it was this CPU
1461 if (t
->t_cpu
== cp
&& t
->t_bound_cpu
!= cp
)
1462 t
->t_cpu
= disp_lowpri_cpu(ncp
,
1463 t
->t_lpl
, t
->t_pri
, NULL
);
1464 ASSERT(t
->t_cpu
!= cp
|| t
->t_bound_cpu
== cp
||
1465 t
->t_weakbound_cpu
== cp
);
1468 } while (t
!= p
->p_tlist
);
1471 * Didn't find any threads in the same lgroup as this
1472 * CPU with a different lpl, so remove the lgroup from
1473 * the process lgroup bitmask.
1476 if (lgrp_diff_lpl
== 0)
1477 klgrpset_del(p
->p_lgrpset
, cpu_lpl
->lpl_lgrpid
);
1481 * Walk thread list looking for threads that need to be
1482 * rehomed, since there are some threads that are not in
1483 * their process's p_tlist.
1488 ASSERT(t
!= NULL
&& t
->t_lpl
!= NULL
);
1491 * Rehome threads with same lpl as this CPU when this
1492 * is the last CPU in the lpl.
1495 if ((cpu_lpl
->lpl_ncpu
== 0) && (t
->t_lpl
== cpu_lpl
))
1497 lgrp_choose(t
, t
->t_cpupart
), 1);
1499 ASSERT(t
->t_lpl
->lpl_ncpu
> 0);
1502 * Update CPU last ran on if it was this CPU
1505 if (t
->t_cpu
== cp
&& t
->t_bound_cpu
!= cp
) {
1506 t
->t_cpu
= disp_lowpri_cpu(ncp
,
1507 t
->t_lpl
, t
->t_pri
, NULL
);
1509 ASSERT(t
->t_cpu
!= cp
|| t
->t_bound_cpu
== cp
||
1510 t
->t_weakbound_cpu
== cp
);
1513 } while (t
!= curthread
);
1514 ASSERT((cp
->cpu_flags
& (CPU_FAULTED
| CPU_SPARE
)) == 0);
1515 cp
->cpu_flags
|= CPU_OFFLINE
;
1516 disp_cpu_inactive(cp
);
1518 cp
->cpu_flags
|= CPU_QUIESCED
;
1521 cpu_inmotion
= NULL
;
1523 cpu_stats_kstat_destroy(cp
);
1524 cpu_delete_intrstat(cp
);
1525 lgrp_kstat_destroy(cp
);
1529 cpu_inmotion
= NULL
;
1532 * If we failed, re-enable interrupts.
1533 * Do this even if cpu_intr_disable returned an error, because
1534 * it may have partially disabled interrupts.
1536 if (error
&& intr_enable
)
1537 cpu_intr_enable(cp
);
1540 * If we failed, but managed to offline the cyclic subsystem on this
1541 * CPU, bring it back online.
1543 if (error
&& cyclic_off
)
1547 * If we failed, but managed to offline callouts on this CPU,
1548 * bring it back online.
1550 if (error
&& callout_off
)
1551 callout_cpu_online(cp
);
1554 * If we failed, tell the PG subsystem that the CPU is back
1556 pg_cpupart_in(cp
, pp
);
1559 * If we failed, we need to notify everyone that this CPU is back on.
1562 CPU_NEW_GENERATION(cp
);
1563 cpu_state_change_notify(cp
->cpu_id
, CPU_ON
);
1564 cpu_state_change_notify(cp
->cpu_id
, CPU_INTR_ON
);
1571 * Mark the indicated CPU as faulted, taking it offline.
1574 cpu_faulted(cpu_t
*cp
, int flags
)
1578 ASSERT(MUTEX_HELD(&cpu_lock
));
1579 ASSERT(!cpu_is_poweredoff(cp
));
1581 if (cpu_is_offline(cp
)) {
1582 cp
->cpu_flags
&= ~CPU_SPARE
;
1583 cp
->cpu_flags
|= CPU_FAULTED
;
1584 mp_cpu_faulted_enter(cp
);
1589 if ((error
= cpu_offline(cp
, flags
)) == 0) {
1590 cp
->cpu_flags
|= CPU_FAULTED
;
1591 mp_cpu_faulted_enter(cp
);
1599 * Mark the indicated CPU as a spare, taking it offline.
1602 cpu_spare(cpu_t
*cp
, int flags
)
1606 ASSERT(MUTEX_HELD(&cpu_lock
));
1607 ASSERT(!cpu_is_poweredoff(cp
));
1609 if (cpu_is_offline(cp
)) {
1610 if (cp
->cpu_flags
& CPU_FAULTED
) {
1611 cp
->cpu_flags
&= ~CPU_FAULTED
;
1612 mp_cpu_faulted_exit(cp
);
1614 cp
->cpu_flags
|= CPU_SPARE
;
1619 if ((error
= cpu_offline(cp
, flags
)) == 0) {
1620 cp
->cpu_flags
|= CPU_SPARE
;
1628 * Take the indicated CPU from poweroff to offline.
1631 cpu_poweron(cpu_t
*cp
)
1633 int error
= ENOTSUP
;
1635 ASSERT(MUTEX_HELD(&cpu_lock
));
1636 ASSERT(cpu_is_poweredoff(cp
));
1638 error
= mp_cpu_poweron(cp
); /* arch-dep hook */
1646 * Take the indicated CPU from any inactive state to powered off.
1649 cpu_poweroff(cpu_t
*cp
)
1651 int error
= ENOTSUP
;
1653 ASSERT(MUTEX_HELD(&cpu_lock
));
1654 ASSERT(cpu_is_offline(cp
));
1656 if (!(cp
->cpu_flags
& CPU_QUIESCED
))
1657 return (EBUSY
); /* not completely idle */
1659 error
= mp_cpu_poweroff(cp
); /* arch-dep hook */
1667 * Initialize the Sequential CPU id lookup table
1674 tbl
= kmem_zalloc(sizeof (struct cpu
*) * max_ncpus
, KM_SLEEP
);
1681 * Initialize the CPU lists for the first CPU.
1684 cpu_list_init(cpu_t
*cp
)
1689 clock_cpu_list
= cp
;
1691 cp
->cpu_next_onln
= cp
;
1692 cp
->cpu_prev_onln
= cp
;
1696 CPUSET_ADD(cpu_seqid_inuse
, 0);
1699 * Bootstrap cpu_seq using cpu_list
1700 * The cpu_seq[] table will be dynamically allocated
1701 * when kmem later becomes available (but before going MP)
1703 cpu_seq
= &cpu_list
;
1705 cp
->cpu_cache_offset
= KMEM_CPU_CACHE_OFFSET(cp
->cpu_seqid
);
1706 cp_default
.cp_cpulist
= cp
;
1707 cp_default
.cp_ncpus
= 1;
1708 cp
->cpu_next_part
= cp
;
1709 cp
->cpu_prev_part
= cp
;
1710 cp
->cpu_part
= &cp_default
;
1712 CPUSET_ADD(cpu_available
, cp
->cpu_id
);
1716 * Insert a CPU into the list of available CPUs.
1719 cpu_add_unit(cpu_t
*cp
)
1723 ASSERT(MUTEX_HELD(&cpu_lock
));
1724 ASSERT(cpu_list
!= NULL
); /* list started in cpu_list_init */
1726 lgrp_config(LGRP_CONFIG_CPU_ADD
, (uintptr_t)cp
, 0);
1729 * Note: most users of the cpu_list will grab the
1730 * cpu_lock to insure that it isn't modified. However,
1731 * certain users can't or won't do that. To allow this
1732 * we pause the other cpus. Users who walk the list
1733 * without cpu_lock, must disable kernel preemption
1734 * to insure that the list isn't modified underneath
1735 * them. Also, any cached pointers to cpu structures
1736 * must be revalidated by checking to see if the
1737 * cpu_next pointer points to itself. This check must
1738 * be done with the cpu_lock held or kernel preemption
1739 * disabled. This check relies upon the fact that
1740 * old cpu structures are not free'ed or cleared after
1741 * then are removed from the cpu_list.
1743 * Note that the clock code walks the cpu list dereferencing
1744 * the cpu_part pointer, so we need to initialize it before
1745 * adding the cpu to the list.
1747 cp
->cpu_part
= &cp_default
;
1748 pause_cpus(NULL
, NULL
);
1749 cp
->cpu_next
= cpu_list
;
1750 cp
->cpu_prev
= cpu_list
->cpu_prev
;
1751 cpu_list
->cpu_prev
->cpu_next
= cp
;
1752 cpu_list
->cpu_prev
= cp
;
1755 for (seqid
= 0; CPU_IN_SET(cpu_seqid_inuse
, seqid
); seqid
++)
1757 CPUSET_ADD(cpu_seqid_inuse
, seqid
);
1758 cp
->cpu_seqid
= seqid
;
1760 if (seqid
> max_cpu_seqid_ever
)
1761 max_cpu_seqid_ever
= seqid
;
1763 ASSERT(ncpus
< max_ncpus
);
1765 cp
->cpu_cache_offset
= KMEM_CPU_CACHE_OFFSET(cp
->cpu_seqid
);
1766 cpu
[cp
->cpu_id
] = cp
;
1767 CPUSET_ADD(cpu_available
, cp
->cpu_id
);
1768 cpu_seq
[cp
->cpu_seqid
] = cp
;
1771 * allocate a pause thread for this CPU.
1773 cpu_pause_alloc(cp
);
1776 * So that new CPUs won't have NULL prev_onln and next_onln pointers,
1777 * link them into a list of just that CPU.
1778 * This is so that disp_lowpri_cpu will work for thread_create in
1779 * pause_cpus() when called from the startup thread in a new CPU.
1781 cp
->cpu_next_onln
= cp
;
1782 cp
->cpu_prev_onln
= cp
;
1783 cpu_info_kstat_create(cp
);
1784 cp
->cpu_next_part
= cp
;
1785 cp
->cpu_prev_part
= cp
;
1787 init_cpu_mstate(cp
, CMS_SYSTEM
);
1789 pool_pset_mod
= gethrtime();
1793 * Do the opposite of cpu_add_unit().
1796 cpu_del_unit(int cpuid
)
1798 struct cpu
*cp
, *cpnext
;
1800 ASSERT(MUTEX_HELD(&cpu_lock
));
1804 ASSERT(cp
->cpu_next_onln
== cp
);
1805 ASSERT(cp
->cpu_prev_onln
== cp
);
1806 ASSERT(cp
->cpu_next_part
== cp
);
1807 ASSERT(cp
->cpu_prev_part
== cp
);
1810 * Tear down the CPU's physical ID cache, and update any
1813 pg_cpu_fini(cp
, NULL
);
1814 pghw_physid_destroy(cp
);
1817 * Destroy kstat stuff.
1819 cpu_info_kstat_destroy(cp
);
1820 term_cpu_mstate(cp
);
1822 * Free up pause thread.
1825 CPUSET_DEL(cpu_available
, cp
->cpu_id
);
1826 cpu
[cp
->cpu_id
] = NULL
;
1827 cpu_seq
[cp
->cpu_seqid
] = NULL
;
1830 * The clock thread and mutex_vector_enter cannot hold the
1831 * cpu_lock while traversing the cpu list, therefore we pause
1832 * all other threads by pausing the other cpus. These, and any
1833 * other routines holding cpu pointers while possibly sleeping
1834 * must be sure to call kpreempt_disable before processing the
1835 * list and be sure to check that the cpu has not been deleted
1836 * after any sleeps (check cp->cpu_next != NULL). We guarantee
1837 * to keep the deleted cpu structure around.
1839 * Note that this MUST be done AFTER cpu_available
1840 * has been updated so that we don't waste time
1841 * trying to pause the cpu we're trying to delete.
1843 pause_cpus(NULL
, NULL
);
1845 cpnext
= cp
->cpu_next
;
1846 cp
->cpu_prev
->cpu_next
= cp
->cpu_next
;
1847 cp
->cpu_next
->cpu_prev
= cp
->cpu_prev
;
1852 * Signals that the cpu has been deleted (see above).
1854 cp
->cpu_next
= NULL
;
1855 cp
->cpu_prev
= NULL
;
1859 CPUSET_DEL(cpu_seqid_inuse
, cp
->cpu_seqid
);
1861 lgrp_config(LGRP_CONFIG_CPU_DEL
, (uintptr_t)cp
, 0);
1863 pool_pset_mod
= gethrtime();
1867 * Add a CPU to the list of active CPUs.
1868 * This routine must not get any locks, because other CPUs are paused.
1871 cpu_add_active_internal(cpu_t
*cp
)
1873 cpupart_t
*pp
= cp
->cpu_part
;
1875 ASSERT(MUTEX_HELD(&cpu_lock
));
1876 ASSERT(cpu_list
!= NULL
); /* list started in cpu_list_init */
1880 cp
->cpu_next_onln
= cpu_active
;
1881 cp
->cpu_prev_onln
= cpu_active
->cpu_prev_onln
;
1882 cpu_active
->cpu_prev_onln
->cpu_next_onln
= cp
;
1883 cpu_active
->cpu_prev_onln
= cp
;
1885 if (pp
->cp_cpulist
) {
1886 cp
->cpu_next_part
= pp
->cp_cpulist
;
1887 cp
->cpu_prev_part
= pp
->cp_cpulist
->cpu_prev_part
;
1888 pp
->cp_cpulist
->cpu_prev_part
->cpu_next_part
= cp
;
1889 pp
->cp_cpulist
->cpu_prev_part
= cp
;
1891 ASSERT(pp
->cp_ncpus
== 0);
1892 pp
->cp_cpulist
= cp
->cpu_next_part
= cp
->cpu_prev_part
= cp
;
1895 if (pp
->cp_ncpus
== 1) {
1896 cp_numparts_nonempty
++;
1897 ASSERT(cp_numparts_nonempty
!= 0);
1901 lgrp_config(LGRP_CONFIG_CPU_ONLINE
, (uintptr_t)cp
, 0);
1903 bzero(&cp
->cpu_loadavg
, sizeof (cp
->cpu_loadavg
));
1907 * Add a CPU to the list of active CPUs.
1908 * This is called from machine-dependent layers when a new CPU is started.
1911 cpu_add_active(cpu_t
*cp
)
1913 pg_cpupart_in(cp
, cp
->cpu_part
);
1915 pause_cpus(NULL
, NULL
);
1916 cpu_add_active_internal(cp
);
1919 cpu_stats_kstat_create(cp
);
1920 cpu_create_intrstat(cp
);
1921 lgrp_kstat_create(cp
);
1922 cpu_state_change_notify(cp
->cpu_id
, CPU_INIT
);
1927 * Remove a CPU from the list of active CPUs.
1928 * This routine must not get any locks, because other CPUs are paused.
1932 cpu_remove_active(cpu_t
*cp
)
1934 cpupart_t
*pp
= cp
->cpu_part
;
1936 ASSERT(MUTEX_HELD(&cpu_lock
));
1937 ASSERT(cp
->cpu_next_onln
!= cp
); /* not the last one */
1938 ASSERT(cp
->cpu_prev_onln
!= cp
); /* not the last one */
1940 pg_cpu_inactive(cp
);
1942 lgrp_config(LGRP_CONFIG_CPU_OFFLINE
, (uintptr_t)cp
, 0);
1944 if (cp
== clock_cpu_list
)
1945 clock_cpu_list
= cp
->cpu_next_onln
;
1947 cp
->cpu_prev_onln
->cpu_next_onln
= cp
->cpu_next_onln
;
1948 cp
->cpu_next_onln
->cpu_prev_onln
= cp
->cpu_prev_onln
;
1949 if (cpu_active
== cp
) {
1950 cpu_active
= cp
->cpu_next_onln
;
1952 cp
->cpu_next_onln
= cp
;
1953 cp
->cpu_prev_onln
= cp
;
1955 cp
->cpu_prev_part
->cpu_next_part
= cp
->cpu_next_part
;
1956 cp
->cpu_next_part
->cpu_prev_part
= cp
->cpu_prev_part
;
1957 if (pp
->cp_cpulist
== cp
) {
1958 pp
->cp_cpulist
= cp
->cpu_next_part
;
1959 ASSERT(pp
->cp_cpulist
!= cp
);
1961 cp
->cpu_next_part
= cp
;
1962 cp
->cpu_prev_part
= cp
;
1964 if (pp
->cp_ncpus
== 0) {
1965 cp_numparts_nonempty
--;
1966 ASSERT(cp_numparts_nonempty
!= 0);
1971 * Routine used to setup a newly inserted CPU in preparation for starting
1975 cpu_configure(int cpuid
)
1979 ASSERT(MUTEX_HELD(&cpu_lock
));
1982 * Some structures are statically allocated based upon
1983 * the maximum number of cpus the system supports. Do not
1984 * try to add anything beyond this limit.
1986 if (cpuid
< 0 || cpuid
>= NCPU
) {
1990 if ((cpu
[cpuid
] != NULL
) && (cpu
[cpuid
]->cpu_flags
!= 0)) {
1994 if ((retval
= mp_cpu_configure(cpuid
)) != 0) {
1998 cpu
[cpuid
]->cpu_flags
= CPU_QUIESCED
| CPU_OFFLINE
| CPU_POWEROFF
;
1999 cpu_set_state(cpu
[cpuid
]);
2000 retval
= cpu_state_change_hooks(cpuid
, CPU_CONFIG
, CPU_UNCONFIG
);
2002 (void) mp_cpu_unconfigure(cpuid
);
2008 * Routine used to cleanup a CPU that has been powered off. This will
2009 * destroy all per-cpu information related to this cpu.
2012 cpu_unconfigure(int cpuid
)
2016 ASSERT(MUTEX_HELD(&cpu_lock
));
2018 if (cpu
[cpuid
] == NULL
) {
2022 if (cpu
[cpuid
]->cpu_flags
== 0) {
2026 if ((cpu
[cpuid
]->cpu_flags
& CPU_POWEROFF
) == 0) {
2030 if (cpu
[cpuid
]->cpu_props
!= NULL
) {
2031 (void) nvlist_free(cpu
[cpuid
]->cpu_props
);
2032 cpu
[cpuid
]->cpu_props
= NULL
;
2035 error
= cpu_state_change_hooks(cpuid
, CPU_UNCONFIG
, CPU_CONFIG
);
2040 return (mp_cpu_unconfigure(cpuid
));
2044 * Routines for registering and de-registering cpu_setup callback functions.
2047 * These routines must not be called from a driver's attach(9E) or
2048 * detach(9E) entry point.
2050 * NOTE: CPU callbacks should not block. They are called with cpu_lock held.
2054 * Ideally, these would be dynamically allocated and put into a linked
2055 * list; however that is not feasible because the registration routine
2056 * has to be available before the kmem allocator is working (in fact,
2057 * it is called by the kmem allocator init code). In any case, there
2058 * are quite a few extra entries for future users.
2060 #define NCPU_SETUPS 20
2063 cpu_setup_func_t
*func
;
2065 } cpu_setups
[NCPU_SETUPS
];
2068 register_cpu_setup_func(cpu_setup_func_t
*func
, void *arg
)
2072 ASSERT(MUTEX_HELD(&cpu_lock
));
2074 for (i
= 0; i
< NCPU_SETUPS
; i
++)
2075 if (cpu_setups
[i
].func
== NULL
)
2077 if (i
>= NCPU_SETUPS
)
2078 cmn_err(CE_PANIC
, "Ran out of cpu_setup callback entries");
2080 cpu_setups
[i
].func
= func
;
2081 cpu_setups
[i
].arg
= arg
;
2085 unregister_cpu_setup_func(cpu_setup_func_t
*func
, void *arg
)
2089 ASSERT(MUTEX_HELD(&cpu_lock
));
2091 for (i
= 0; i
< NCPU_SETUPS
; i
++)
2092 if ((cpu_setups
[i
].func
== func
) &&
2093 (cpu_setups
[i
].arg
== arg
))
2095 if (i
>= NCPU_SETUPS
)
2096 cmn_err(CE_PANIC
, "Could not find cpu_setup callback to "
2099 cpu_setups
[i
].func
= NULL
;
2100 cpu_setups
[i
].arg
= 0;
2104 * Call any state change hooks for this CPU, ignore any errors.
2107 cpu_state_change_notify(int id
, cpu_setup_t what
)
2111 ASSERT(MUTEX_HELD(&cpu_lock
));
2113 for (i
= 0; i
< NCPU_SETUPS
; i
++) {
2114 if (cpu_setups
[i
].func
!= NULL
) {
2115 cpu_setups
[i
].func(what
, id
, cpu_setups
[i
].arg
);
2121 * Call any state change hooks for this CPU, undo it if error found.
2124 cpu_state_change_hooks(int id
, cpu_setup_t what
, cpu_setup_t undo
)
2129 ASSERT(MUTEX_HELD(&cpu_lock
));
2131 for (i
= 0; i
< NCPU_SETUPS
; i
++) {
2132 if (cpu_setups
[i
].func
!= NULL
) {
2133 retval
= cpu_setups
[i
].func(what
, id
,
2136 for (i
--; i
>= 0; i
--) {
2137 if (cpu_setups
[i
].func
!= NULL
)
2138 cpu_setups
[i
].func(undo
,
2139 id
, cpu_setups
[i
].arg
);
2149 * Export information about this CPU via the kstat mechanism.
2152 kstat_named_t ci_state
;
2153 kstat_named_t ci_state_begin
;
2154 kstat_named_t ci_cpu_type
;
2155 kstat_named_t ci_fpu_type
;
2156 kstat_named_t ci_clock_MHz
;
2157 kstat_named_t ci_chip_id
;
2158 kstat_named_t ci_implementation
;
2159 kstat_named_t ci_brandstr
;
2160 kstat_named_t ci_core_id
;
2161 kstat_named_t ci_curr_clock_Hz
;
2162 kstat_named_t ci_supp_freq_Hz
;
2163 kstat_named_t ci_pg_id
;
2164 #if defined(__sparcv9)
2165 kstat_named_t ci_device_ID
;
2166 kstat_named_t ci_cpu_fru
;
2169 kstat_named_t ci_vendorstr
;
2170 kstat_named_t ci_family
;
2171 kstat_named_t ci_model
;
2172 kstat_named_t ci_step
;
2173 kstat_named_t ci_clogid
;
2174 kstat_named_t ci_pkg_core_id
;
2175 kstat_named_t ci_ncpuperchip
;
2176 kstat_named_t ci_ncoreperchip
;
2177 kstat_named_t ci_max_cstates
;
2178 kstat_named_t ci_curr_cstate
;
2179 kstat_named_t ci_cacheid
;
2180 kstat_named_t ci_sktstr
;
2182 } cpu_info_template
= {
2183 { "state", KSTAT_DATA_CHAR
},
2184 { "state_begin", KSTAT_DATA_LONG
},
2185 { "cpu_type", KSTAT_DATA_CHAR
},
2186 { "fpu_type", KSTAT_DATA_CHAR
},
2187 { "clock_MHz", KSTAT_DATA_LONG
},
2188 { "chip_id", KSTAT_DATA_LONG
},
2189 { "implementation", KSTAT_DATA_STRING
},
2190 { "brand", KSTAT_DATA_STRING
},
2191 { "core_id", KSTAT_DATA_LONG
},
2192 { "current_clock_Hz", KSTAT_DATA_UINT64
},
2193 { "supported_frequencies_Hz", KSTAT_DATA_STRING
},
2194 { "pg_id", KSTAT_DATA_LONG
},
2195 #if defined(__sparcv9)
2196 { "device_ID", KSTAT_DATA_UINT64
},
2197 { "cpu_fru", KSTAT_DATA_STRING
},
2200 { "vendor_id", KSTAT_DATA_STRING
},
2201 { "family", KSTAT_DATA_INT32
},
2202 { "model", KSTAT_DATA_INT32
},
2203 { "stepping", KSTAT_DATA_INT32
},
2204 { "clog_id", KSTAT_DATA_INT32
},
2205 { "pkg_core_id", KSTAT_DATA_LONG
},
2206 { "ncpu_per_chip", KSTAT_DATA_INT32
},
2207 { "ncore_per_chip", KSTAT_DATA_INT32
},
2208 { "supported_max_cstates", KSTAT_DATA_INT32
},
2209 { "current_cstate", KSTAT_DATA_INT32
},
2210 { "cache_id", KSTAT_DATA_INT32
},
2211 { "socket_type", KSTAT_DATA_STRING
},
2215 static kmutex_t cpu_info_template_lock
;
2218 cpu_info_kstat_update(kstat_t
*ksp
, int rw
)
2220 cpu_t
*cp
= ksp
->ks_private
;
2221 const char *pi_state
;
2223 if (rw
== KSTAT_WRITE
)
2227 /* Is the cpu still initialising itself? */
2228 if (cpuid_checkpass(cp
, 1) == 0)
2231 switch (cp
->cpu_type_info
.pi_state
) {
2233 pi_state
= PS_ONLINE
;
2236 pi_state
= PS_POWEROFF
;
2239 pi_state
= PS_NOINTR
;
2242 pi_state
= PS_FAULTED
;
2245 pi_state
= PS_SPARE
;
2248 pi_state
= PS_OFFLINE
;
2251 pi_state
= "unknown";
2253 (void) strcpy(cpu_info_template
.ci_state
.value
.c
, pi_state
);
2254 cpu_info_template
.ci_state_begin
.value
.l
= cp
->cpu_state_begin
;
2255 (void) strncpy(cpu_info_template
.ci_cpu_type
.value
.c
,
2256 cp
->cpu_type_info
.pi_processor_type
, 15);
2257 (void) strncpy(cpu_info_template
.ci_fpu_type
.value
.c
,
2258 cp
->cpu_type_info
.pi_fputypes
, 15);
2259 cpu_info_template
.ci_clock_MHz
.value
.l
= cp
->cpu_type_info
.pi_clock
;
2260 cpu_info_template
.ci_chip_id
.value
.l
=
2261 pg_plat_hw_instance_id(cp
, PGHW_CHIP
);
2262 kstat_named_setstr(&cpu_info_template
.ci_implementation
,
2264 kstat_named_setstr(&cpu_info_template
.ci_brandstr
, cp
->cpu_brandstr
);
2265 cpu_info_template
.ci_core_id
.value
.l
= pg_plat_get_core_id(cp
);
2266 cpu_info_template
.ci_curr_clock_Hz
.value
.ui64
=
2268 cpu_info_template
.ci_pg_id
.value
.l
=
2269 cp
->cpu_pg
&& cp
->cpu_pg
->cmt_lineage
?
2270 cp
->cpu_pg
->cmt_lineage
->pg_id
: -1;
2271 kstat_named_setstr(&cpu_info_template
.ci_supp_freq_Hz
,
2272 cp
->cpu_supp_freqs
);
2273 #if defined(__sparcv9)
2274 cpu_info_template
.ci_device_ID
.value
.ui64
=
2275 cpunodes
[cp
->cpu_id
].device_id
;
2276 kstat_named_setstr(&cpu_info_template
.ci_cpu_fru
, cpu_fru_fmri(cp
));
2279 kstat_named_setstr(&cpu_info_template
.ci_vendorstr
,
2280 cpuid_getvendorstr(cp
));
2281 cpu_info_template
.ci_family
.value
.l
= cpuid_getfamily(cp
);
2282 cpu_info_template
.ci_model
.value
.l
= cpuid_getmodel(cp
);
2283 cpu_info_template
.ci_step
.value
.l
= cpuid_getstep(cp
);
2284 cpu_info_template
.ci_clogid
.value
.l
= cpuid_get_clogid(cp
);
2285 cpu_info_template
.ci_ncpuperchip
.value
.l
= cpuid_get_ncpu_per_chip(cp
);
2286 cpu_info_template
.ci_ncoreperchip
.value
.l
=
2287 cpuid_get_ncore_per_chip(cp
);
2288 cpu_info_template
.ci_pkg_core_id
.value
.l
= cpuid_get_pkgcoreid(cp
);
2289 cpu_info_template
.ci_max_cstates
.value
.l
= cp
->cpu_m
.max_cstates
;
2290 cpu_info_template
.ci_curr_cstate
.value
.l
= cpu_idle_get_cpu_state(cp
);
2291 cpu_info_template
.ci_cacheid
.value
.i32
= cpuid_get_cacheid(cp
);
2292 kstat_named_setstr(&cpu_info_template
.ci_sktstr
,
2293 cpuid_getsocketstr(cp
));
2300 cpu_info_kstat_create(cpu_t
*cp
)
2304 ASSERT(MUTEX_HELD(&cpu_lock
));
2306 if (pool_pset_enabled())
2307 zoneid
= GLOBAL_ZONEID
;
2310 if ((cp
->cpu_info_kstat
= kstat_create_zone("cpu_info", cp
->cpu_id
,
2311 NULL
, "misc", KSTAT_TYPE_NAMED
,
2312 sizeof (cpu_info_template
) / sizeof (kstat_named_t
),
2313 KSTAT_FLAG_VIRTUAL
| KSTAT_FLAG_VAR_SIZE
, zoneid
)) != NULL
) {
2314 cp
->cpu_info_kstat
->ks_data_size
+= 2 * CPU_IDSTRLEN
;
2315 #if defined(__sparcv9)
2316 cp
->cpu_info_kstat
->ks_data_size
+=
2317 strlen(cpu_fru_fmri(cp
)) + 1;
2320 cp
->cpu_info_kstat
->ks_data_size
+= X86_VENDOR_STRLEN
;
2322 if (cp
->cpu_supp_freqs
!= NULL
)
2323 cp
->cpu_info_kstat
->ks_data_size
+=
2324 strlen(cp
->cpu_supp_freqs
) + 1;
2325 cp
->cpu_info_kstat
->ks_lock
= &cpu_info_template_lock
;
2326 cp
->cpu_info_kstat
->ks_data
= &cpu_info_template
;
2327 cp
->cpu_info_kstat
->ks_private
= cp
;
2328 cp
->cpu_info_kstat
->ks_update
= cpu_info_kstat_update
;
2329 kstat_install(cp
->cpu_info_kstat
);
2334 cpu_info_kstat_destroy(cpu_t
*cp
)
2336 ASSERT(MUTEX_HELD(&cpu_lock
));
2338 kstat_delete(cp
->cpu_info_kstat
);
2339 cp
->cpu_info_kstat
= NULL
;
2343 * Create and install kstats for the boot CPU.
2346 cpu_kstat_init(cpu_t
*cp
)
2348 mutex_enter(&cpu_lock
);
2349 cpu_info_kstat_create(cp
);
2350 cpu_stats_kstat_create(cp
);
2351 cpu_create_intrstat(cp
);
2353 mutex_exit(&cpu_lock
);
2357 * Make visible to the zone that subset of the cpu information that would be
2358 * initialized when a cpu is configured (but still offline).
2361 cpu_visibility_configure(cpu_t
*cp
, zone_t
*zone
)
2363 zoneid_t zoneid
= zone
? zone
->zone_id
: ALL_ZONES
;
2365 ASSERT(MUTEX_HELD(&cpu_lock
));
2366 ASSERT(pool_pset_enabled());
2369 if (zoneid
!= ALL_ZONES
&& zoneid
!= GLOBAL_ZONEID
) {
2371 ASSERT(zone
->zone_ncpus
<= ncpus
);
2373 if (cp
->cpu_info_kstat
!= NULL
)
2374 kstat_zone_add(cp
->cpu_info_kstat
, zoneid
);
2378 * Make visible to the zone that subset of the cpu information that would be
2379 * initialized when a previously configured cpu is onlined.
2382 cpu_visibility_online(cpu_t
*cp
, zone_t
*zone
)
2385 char name
[sizeof ("cpu_stat") + 10]; /* enough for 32-bit cpuids */
2386 zoneid_t zoneid
= zone
? zone
->zone_id
: ALL_ZONES
;
2389 ASSERT(MUTEX_HELD(&cpu_lock
));
2390 ASSERT(pool_pset_enabled());
2392 ASSERT(cpu_is_active(cp
));
2395 if (zoneid
!= ALL_ZONES
&& zoneid
!= GLOBAL_ZONEID
) {
2396 zone
->zone_ncpus_online
++;
2397 ASSERT(zone
->zone_ncpus_online
<= ncpus_online
);
2399 (void) snprintf(name
, sizeof (name
), "cpu_stat%d", cpun
);
2400 if ((ksp
= kstat_hold_byname("cpu_stat", cpun
, name
, ALL_ZONES
))
2402 kstat_zone_add(ksp
, zoneid
);
2405 if ((ksp
= kstat_hold_byname("cpu", cpun
, "sys", ALL_ZONES
)) != NULL
) {
2406 kstat_zone_add(ksp
, zoneid
);
2409 if ((ksp
= kstat_hold_byname("cpu", cpun
, "vm", ALL_ZONES
)) != NULL
) {
2410 kstat_zone_add(ksp
, zoneid
);
2413 if ((ksp
= kstat_hold_byname("cpu", cpun
, "intrstat", ALL_ZONES
)) !=
2415 kstat_zone_add(ksp
, zoneid
);
2421 * Update relevant kstats such that cpu is now visible to processes
2422 * executing in specified zone.
2425 cpu_visibility_add(cpu_t
*cp
, zone_t
*zone
)
2427 cpu_visibility_configure(cp
, zone
);
2428 if (cpu_is_active(cp
))
2429 cpu_visibility_online(cp
, zone
);
2433 * Make invisible to the zone that subset of the cpu information that would be
2434 * torn down when a previously offlined cpu is unconfigured.
2437 cpu_visibility_unconfigure(cpu_t
*cp
, zone_t
*zone
)
2439 zoneid_t zoneid
= zone
? zone
->zone_id
: ALL_ZONES
;
2441 ASSERT(MUTEX_HELD(&cpu_lock
));
2442 ASSERT(pool_pset_enabled());
2445 if (zoneid
!= ALL_ZONES
&& zoneid
!= GLOBAL_ZONEID
) {
2446 ASSERT(zone
->zone_ncpus
!= 0);
2449 if (cp
->cpu_info_kstat
)
2450 kstat_zone_remove(cp
->cpu_info_kstat
, zoneid
);
2454 * Make invisible to the zone that subset of the cpu information that would be
2455 * torn down when a cpu is offlined (but still configured).
2458 cpu_visibility_offline(cpu_t
*cp
, zone_t
*zone
)
2461 char name
[sizeof ("cpu_stat") + 10]; /* enough for 32-bit cpuids */
2462 zoneid_t zoneid
= zone
? zone
->zone_id
: ALL_ZONES
;
2465 ASSERT(MUTEX_HELD(&cpu_lock
));
2466 ASSERT(pool_pset_enabled());
2468 ASSERT(cpu_is_active(cp
));
2471 if (zoneid
!= ALL_ZONES
&& zoneid
!= GLOBAL_ZONEID
) {
2472 ASSERT(zone
->zone_ncpus_online
!= 0);
2473 zone
->zone_ncpus_online
--;
2476 if ((ksp
= kstat_hold_byname("cpu", cpun
, "intrstat", ALL_ZONES
)) !=
2478 kstat_zone_remove(ksp
, zoneid
);
2481 if ((ksp
= kstat_hold_byname("cpu", cpun
, "vm", ALL_ZONES
)) != NULL
) {
2482 kstat_zone_remove(ksp
, zoneid
);
2485 if ((ksp
= kstat_hold_byname("cpu", cpun
, "sys", ALL_ZONES
)) != NULL
) {
2486 kstat_zone_remove(ksp
, zoneid
);
2489 (void) snprintf(name
, sizeof (name
), "cpu_stat%d", cpun
);
2490 if ((ksp
= kstat_hold_byname("cpu_stat", cpun
, name
, ALL_ZONES
))
2492 kstat_zone_remove(ksp
, zoneid
);
2498 * Update relevant kstats such that cpu is no longer visible to processes
2499 * executing in specified zone.
2502 cpu_visibility_remove(cpu_t
*cp
, zone_t
*zone
)
2504 if (cpu_is_active(cp
))
2505 cpu_visibility_offline(cp
, zone
);
2506 cpu_visibility_unconfigure(cp
, zone
);
2510 * Bind a thread to a CPU as requested.
2513 cpu_bind_thread(kthread_id_t tp
, processorid_t bind
, processorid_t
*obind
,
2516 processorid_t binding
;
2519 ASSERT(MUTEX_HELD(&cpu_lock
));
2520 ASSERT(MUTEX_HELD(&ttoproc(tp
)->p_lock
));
2525 * Record old binding, but change the obind, which was initialized
2526 * to PBIND_NONE, only if this thread has a binding. This avoids
2527 * reporting PBIND_NONE for a process when some LWPs are bound.
2529 binding
= tp
->t_bind_cpu
;
2530 if (binding
!= PBIND_NONE
)
2531 *obind
= binding
; /* record old binding */
2535 /* Just return the old binding */
2539 case PBIND_QUERY_TYPE
:
2540 /* Return the binding type */
2541 *obind
= TB_CPU_IS_SOFT(tp
) ? PBIND_SOFT
: PBIND_HARD
;
2547 * Set soft binding for this thread and return the actual
2550 TB_CPU_SOFT_SET(tp
);
2556 * Set hard binding for this thread and return the actual
2559 TB_CPU_HARD_SET(tp
);
2568 * If this thread/LWP cannot be bound because of permission
2569 * problems, just note that and return success so that the
2570 * other threads/LWPs will be bound. This is the way
2571 * processor_bind() is defined to work.
2573 * Binding will get EPERM if the thread is of system class
2574 * or hasprocperm() fails.
2576 if (tp
->t_cid
== 0 || !hasprocperm(tp
->t_cred
, CRED())) {
2583 if (binding
!= PBIND_NONE
) {
2584 cp
= cpu_get((processorid_t
)binding
);
2586 * Make sure binding is valid and is in right partition.
2588 if (cp
== NULL
|| tp
->t_cpupart
!= cp
->cpu_part
) {
2594 tp
->t_bind_cpu
= binding
; /* set new binding */
2597 * If there is no system-set reason for affinity, set
2598 * the t_bound_cpu field to reflect the binding.
2600 if (tp
->t_affinitycnt
== 0) {
2601 if (binding
== PBIND_NONE
) {
2603 * We may need to adjust disp_max_unbound_pri
2604 * since we're becoming unbound.
2606 disp_adjust_unbound_pri(tp
);
2608 tp
->t_bound_cpu
= NULL
; /* set new binding */
2611 * Move thread to lgroup with strongest affinity
2614 if (tp
->t_lgrp_affinity
)
2615 lgrp_move_thread(tp
,
2616 lgrp_choose(tp
, tp
->t_cpupart
), 1);
2618 if (tp
->t_state
== TS_ONPROC
&&
2619 tp
->t_cpu
->cpu_part
!= tp
->t_cpupart
)
2624 tp
->t_bound_cpu
= cp
;
2625 ASSERT(cp
->cpu_lpl
!= NULL
);
2628 * Set home to lgroup with most affinity containing CPU
2629 * that thread is being bound or minimum bounding
2630 * lgroup if no affinities set
2632 if (tp
->t_lgrp_affinity
)
2633 lpl
= lgrp_affinity_best(tp
, tp
->t_cpupart
,
2634 LGRP_NONE
, B_FALSE
);
2638 if (tp
->t_lpl
!= lpl
) {
2639 /* can't grab cpu_lock */
2640 lgrp_move_thread(tp
, lpl
, 1);
2644 * Make the thread switch to the bound CPU.
2645 * If the thread is runnable, we need to
2646 * requeue it even if t_cpu is already set
2647 * to the right CPU, since it may be on a
2648 * kpreempt queue and need to move to a local
2649 * queue. We could check t_disp_queue to
2650 * avoid unnecessary overhead if it's already
2651 * on the right queue, but since this isn't
2652 * a performance-critical operation it doesn't
2653 * seem worth the extra code and complexity.
2655 * If the thread is weakbound to the cpu then it will
2656 * resist the new binding request until the weak
2657 * binding drops. The cpu_surrender or requeueing
2658 * below could be skipped in such cases (since it
2659 * will have no effect), but that would require
2660 * thread_allowmigrate to acquire thread_lock so
2661 * we'll take the very occasional hit here instead.
2663 if (tp
->t_state
== TS_ONPROC
) {
2665 } else if (tp
->t_state
== TS_RUN
) {
2666 cpu_t
*ocp
= tp
->t_cpu
;
2671 * On the bound CPU's disp queue now.
2673 ASSERT(tp
->t_disp_queue
== cp
->cpu_disp
||
2674 tp
->t_weakbound_cpu
== ocp
);
2680 * Our binding has changed; set TP_CHANGEBIND.
2682 tp
->t_proc_flag
|= TP_CHANGEBIND
;
2690 #if CPUSET_WORDS > 1
2693 * Functions for implementing cpuset operations when a cpuset is more
2694 * than one word. On platforms where a cpuset is a single word these
2695 * are implemented as macros in cpuvar.h.
2699 cpuset_all(cpuset_t
*s
)
2703 for (i
= 0; i
< CPUSET_WORDS
; i
++)
2708 cpuset_all_but(cpuset_t
*s
, uint_t cpu
)
2711 CPUSET_DEL(*s
, cpu
);
2715 cpuset_only(cpuset_t
*s
, uint_t cpu
)
2718 CPUSET_ADD(*s
, cpu
);
2722 cpuset_isnull(cpuset_t
*s
)
2726 for (i
= 0; i
< CPUSET_WORDS
; i
++)
2727 if (s
->cpub
[i
] != 0)
2733 cpuset_cmp(cpuset_t
*s1
, cpuset_t
*s2
)
2737 for (i
= 0; i
< CPUSET_WORDS
; i
++)
2738 if (s1
->cpub
[i
] != s2
->cpub
[i
])
2744 cpuset_find(cpuset_t
*s
)
2748 uint_t cpu
= (uint_t
)-1;
2751 * Find a cpu in the cpuset
2753 for (i
= 0; i
< CPUSET_WORDS
; i
++) {
2754 cpu
= (uint_t
)(lowbit(s
->cpub
[i
]) - 1);
2755 if (cpu
!= (uint_t
)-1) {
2756 cpu
+= i
* BT_NBIPUL
;
2764 cpuset_bounds(cpuset_t
*s
, uint_t
*smallestid
, uint_t
*largestid
)
2770 * First, find the smallest cpu id in the set.
2772 for (i
= 0; i
< CPUSET_WORDS
; i
++) {
2773 if (s
->cpub
[i
] != 0) {
2774 bit
= (uint_t
)(lowbit(s
->cpub
[i
]) - 1);
2775 ASSERT(bit
!= (uint_t
)-1);
2776 *smallestid
= bit
+ (i
* BT_NBIPUL
);
2779 * Now find the largest cpu id in
2780 * the set and return immediately.
2781 * Done in an inner loop to avoid
2782 * having to break out of the first
2785 for (j
= CPUSET_WORDS
- 1; j
>= i
; j
--) {
2786 if (s
->cpub
[j
] != 0) {
2787 bit
= (uint_t
)(highbit(s
->cpub
[j
]) - 1);
2788 ASSERT(bit
!= (uint_t
)-1);
2789 *largestid
= bit
+ (j
* BT_NBIPUL
);
2790 ASSERT(*largestid
>= *smallestid
);
2796 * If this code is reached, a
2797 * smallestid was found, but not a
2798 * largestid. The cpuset must have
2799 * been changed during the course
2800 * of this function call.
2805 *smallestid
= *largestid
= CPUSET_NOTINSET
;
2808 #endif /* CPUSET_WORDS */
2811 * Unbind threads bound to specified CPU.
2813 * If `unbind_all_threads' is true, unbind all user threads bound to a given
2814 * CPU. Otherwise unbind all soft-bound user threads.
2817 cpu_unbind(processorid_t cpu
, boolean_t unbind_all_threads
)
2819 processorid_t obind
;
2825 ASSERT(MUTEX_HELD(&cpu_lock
));
2827 mutex_enter(&pidlock
);
2828 for (pp
= practive
; pp
!= NULL
; pp
= pp
->p_next
) {
2829 mutex_enter(&pp
->p_lock
);
2832 * Skip zombies, kernel processes, and processes in
2833 * other zones, if called from a non-global zone.
2835 if (tp
== NULL
|| (pp
->p_flag
& SSYS
) ||
2836 !HASZONEACCESS(curproc
, pp
->p_zone
->zone_id
)) {
2837 mutex_exit(&pp
->p_lock
);
2841 if (tp
->t_bind_cpu
!= cpu
)
2844 * Skip threads with hard binding when
2845 * `unbind_all_threads' is not specified.
2847 if (!unbind_all_threads
&& TB_CPU_IS_HARD(tp
))
2849 err
= cpu_bind_thread(tp
, PBIND_NONE
, &obind
, &berr
);
2852 } while ((tp
= tp
->t_forw
) != pp
->p_tlist
);
2853 mutex_exit(&pp
->p_lock
);
2855 mutex_exit(&pidlock
);
2863 * Destroy all remaining bound threads on a cpu.
2866 cpu_destroy_bound_threads(cpu_t
*cp
)
2869 register kthread_id_t t
, tlist
, tnext
;
2872 * Destroy all remaining bound threads on the cpu. This
2873 * should include both the interrupt threads and the idle thread.
2874 * This requires some care, since we need to traverse the
2875 * thread list with the pidlock mutex locked, but thread_free
2876 * also locks the pidlock mutex. So, we collect the threads
2877 * we're going to reap in a list headed by "tlist", then we
2878 * unlock the pidlock mutex and traverse the tlist list,
2879 * doing thread_free's on the thread's. Simple, n'est pas?
2880 * Also, this depends on thread_free not mucking with the
2881 * t_next and t_prev links of the thread.
2884 if ((t
= curthread
) != NULL
) {
2887 mutex_enter(&pidlock
);
2890 if (t
->t_bound_cpu
== cp
) {
2893 * We've found a bound thread, carefully unlink
2894 * it out of the thread list, and add it to
2895 * our "tlist". We "know" we don't have to
2896 * worry about unlinking curthread (the thread
2897 * that is executing this code).
2899 t
->t_next
->t_prev
= t
->t_prev
;
2900 t
->t_prev
->t_next
= t
->t_next
;
2903 ASSERT(t
->t_cid
== syscid
);
2904 /* wake up anyone blocked in thread_join */
2905 cv_broadcast(&t
->t_joincv
);
2907 * t_lwp set by interrupt threads and not
2912 * Pause and idle threads always have
2913 * t_state set to TS_ONPROC.
2915 t
->t_state
= TS_FREE
;
2916 t
->t_prev
= NULL
; /* Just in case */
2919 } while ((t
= tnext
) != curthread
);
2921 mutex_exit(&pidlock
);
2924 for (t
= tlist
; t
!= NULL
; t
= tnext
) {
2932 * Update the cpu_supp_freqs of this cpu. This information is returned
2933 * as part of cpu_info kstats. If the cpu_info_kstat exists already, then
2934 * maintain the kstat data size.
2937 cpu_set_supp_freqs(cpu_t
*cp
, const char *freqs
)
2939 char clkstr
[sizeof ("18446744073709551615") + 1]; /* ui64 MAX */
2940 const char *lfreqs
= clkstr
;
2941 boolean_t kstat_exists
= B_FALSE
;
2946 * A NULL pointer means we only support one speed.
2949 (void) snprintf(clkstr
, sizeof (clkstr
), "%"PRIu64
,
2950 cp
->cpu_curr_clock
);
2955 * Make sure the frequency doesn't change while a snapshot is
2956 * going on. Of course, we only need to worry about this if
2959 if ((ksp
= cp
->cpu_info_kstat
) != NULL
) {
2960 mutex_enter(ksp
->ks_lock
);
2961 kstat_exists
= B_TRUE
;
2965 * Free any previously allocated string and if the kstat
2966 * already exists, then update its data size.
2968 if (cp
->cpu_supp_freqs
!= NULL
) {
2969 len
= strlen(cp
->cpu_supp_freqs
) + 1;
2970 kmem_free(cp
->cpu_supp_freqs
, len
);
2972 ksp
->ks_data_size
-= len
;
2976 * Allocate the new string and set the pointer.
2978 len
= strlen(lfreqs
) + 1;
2979 cp
->cpu_supp_freqs
= kmem_alloc(len
, KM_SLEEP
);
2980 (void) strcpy(cp
->cpu_supp_freqs
, lfreqs
);
2983 * If the kstat already exists then update the data size and
2987 ksp
->ks_data_size
+= len
;
2988 mutex_exit(ksp
->ks_lock
);
2993 * Indicate the current CPU's clock freqency (in Hz).
2994 * The calling context must be such that CPU references are safe.
2997 cpu_set_curr_clock(uint64_t new_clk
)
3001 old_clk
= CPU
->cpu_curr_clock
;
3002 CPU
->cpu_curr_clock
= new_clk
;
3005 * The cpu-change-speed DTrace probe exports the frequency in Hz
3007 DTRACE_PROBE3(cpu__change__speed
, processorid_t
, CPU
->cpu_id
,
3008 uint64_t, old_clk
, uint64_t, new_clk
);
3012 * processor_info(2) and p_online(2) status support functions
3013 * The constants returned by the cpu_get_state() and cpu_get_state_str() are
3014 * for use in communicating processor state information to userland. Kernel
3015 * subsystems should only be using the cpu_flags value directly. Subsystems
3016 * modifying cpu_flags should record the state change via a call to the
3021 * Update the pi_state of this CPU. This function provides the CPU status for
3022 * the information returned by processor_info(2).
3025 cpu_set_state(cpu_t
*cpu
)
3027 ASSERT(MUTEX_HELD(&cpu_lock
));
3028 cpu
->cpu_type_info
.pi_state
= cpu_get_state(cpu
);
3029 cpu
->cpu_state_begin
= gethrestime_sec();
3030 pool_cpu_mod
= gethrtime();
3034 * Return offline/online/other status for the indicated CPU. Use only for
3035 * communication with user applications; cpu_flags provides the in-kernel
3039 cpu_get_state(cpu_t
*cpu
)
3041 ASSERT(MUTEX_HELD(&cpu_lock
));
3042 if (cpu
->cpu_flags
& CPU_POWEROFF
)
3043 return (P_POWEROFF
);
3044 else if (cpu
->cpu_flags
& CPU_FAULTED
)
3046 else if (cpu
->cpu_flags
& CPU_SPARE
)
3048 else if ((cpu
->cpu_flags
& (CPU_READY
| CPU_OFFLINE
)) != CPU_READY
)
3050 else if (cpu
->cpu_flags
& CPU_ENABLE
)
3057 * Return processor_info(2) state as a string.
3060 cpu_get_state_str(cpu_t
*cpu
)
3064 switch (cpu_get_state(cpu
)) {
3069 string
= PS_POWEROFF
;
3078 string
= PS_FAULTED
;
3081 string
= PS_OFFLINE
;
3091 * Export this CPU's statistics (cpu_stat_t and cpu_stats_t) as raw and named
3092 * kstats, respectively. This is done when a CPU is initialized or placed
3093 * online via p_online(2).
3096 cpu_stats_kstat_create(cpu_t
*cp
)
3098 int instance
= cp
->cpu_id
;
3099 char *module
= "cpu";
3100 char *class = "misc";
3104 ASSERT(MUTEX_HELD(&cpu_lock
));
3106 if (pool_pset_enabled())
3107 zoneid
= GLOBAL_ZONEID
;
3111 * Create named kstats
3113 #define CPU_STATS_KS_CREATE(name, tsize, update_func) \
3114 ksp = kstat_create_zone(module, instance, (name), class, \
3115 KSTAT_TYPE_NAMED, (tsize) / sizeof (kstat_named_t), 0, \
3117 if (ksp != NULL) { \
3118 ksp->ks_private = cp; \
3119 ksp->ks_update = (update_func); \
3120 kstat_install(ksp); \
3122 cmn_err(CE_WARN, "cpu: unable to create %s:%d:%s kstat", \
3123 module, instance, (name));
3125 CPU_STATS_KS_CREATE("sys", sizeof (cpu_sys_stats_ks_data_template
),
3126 cpu_sys_stats_ks_update
);
3127 CPU_STATS_KS_CREATE("vm", sizeof (cpu_vm_stats_ks_data_template
),
3128 cpu_vm_stats_ks_update
);
3131 * Export the familiar cpu_stat_t KSTAT_TYPE_RAW kstat.
3133 ksp
= kstat_create_zone("cpu_stat", cp
->cpu_id
, NULL
,
3134 "misc", KSTAT_TYPE_RAW
, sizeof (cpu_stat_t
), 0, zoneid
);
3136 ksp
->ks_update
= cpu_stat_ks_update
;
3137 ksp
->ks_private
= cp
;
3143 cpu_stats_kstat_destroy(cpu_t
*cp
)
3145 char ks_name
[KSTAT_STRLEN
];
3147 (void) sprintf(ks_name
, "cpu_stat%d", cp
->cpu_id
);
3148 kstat_delete_byname("cpu_stat", cp
->cpu_id
, ks_name
);
3150 kstat_delete_byname("cpu", cp
->cpu_id
, "sys");
3151 kstat_delete_byname("cpu", cp
->cpu_id
, "vm");
3155 cpu_sys_stats_ks_update(kstat_t
*ksp
, int rw
)
3157 cpu_t
*cp
= (cpu_t
*)ksp
->ks_private
;
3158 struct cpu_sys_stats_ks_data
*csskd
;
3159 cpu_sys_stats_t
*css
;
3160 hrtime_t msnsecs
[NCMSTATES
];
3163 if (rw
== KSTAT_WRITE
)
3166 csskd
= ksp
->ks_data
;
3167 css
= &cp
->cpu_stats
.sys
;
3170 * Read CPU mstate, but compare with the last values we
3171 * received to make sure that the returned kstats never
3175 get_cpu_mstate(cp
, msnsecs
);
3176 if (csskd
->cpu_nsec_idle
.value
.ui64
> msnsecs
[CMS_IDLE
])
3177 msnsecs
[CMS_IDLE
] = csskd
->cpu_nsec_idle
.value
.ui64
;
3178 if (csskd
->cpu_nsec_user
.value
.ui64
> msnsecs
[CMS_USER
])
3179 msnsecs
[CMS_USER
] = csskd
->cpu_nsec_user
.value
.ui64
;
3180 if (csskd
->cpu_nsec_kernel
.value
.ui64
> msnsecs
[CMS_SYSTEM
])
3181 msnsecs
[CMS_SYSTEM
] = csskd
->cpu_nsec_kernel
.value
.ui64
;
3183 bcopy(&cpu_sys_stats_ks_data_template
, ksp
->ks_data
,
3184 sizeof (cpu_sys_stats_ks_data_template
));
3186 csskd
->cpu_ticks_wait
.value
.ui64
= 0;
3187 csskd
->wait_ticks_io
.value
.ui64
= 0;
3189 csskd
->cpu_nsec_idle
.value
.ui64
= msnsecs
[CMS_IDLE
];
3190 csskd
->cpu_nsec_user
.value
.ui64
= msnsecs
[CMS_USER
];
3191 csskd
->cpu_nsec_kernel
.value
.ui64
= msnsecs
[CMS_SYSTEM
];
3192 csskd
->cpu_ticks_idle
.value
.ui64
=
3193 NSEC_TO_TICK(csskd
->cpu_nsec_idle
.value
.ui64
);
3194 csskd
->cpu_ticks_user
.value
.ui64
=
3195 NSEC_TO_TICK(csskd
->cpu_nsec_user
.value
.ui64
);
3196 csskd
->cpu_ticks_kernel
.value
.ui64
=
3197 NSEC_TO_TICK(csskd
->cpu_nsec_kernel
.value
.ui64
);
3198 csskd
->cpu_nsec_dtrace
.value
.ui64
= cp
->cpu_dtrace_nsec
;
3199 csskd
->dtrace_probes
.value
.ui64
= cp
->cpu_dtrace_probes
;
3200 csskd
->cpu_nsec_intr
.value
.ui64
= cp
->cpu_intrlast
;
3201 csskd
->cpu_load_intr
.value
.ui64
= cp
->cpu_intrload
;
3202 csskd
->bread
.value
.ui64
= css
->bread
;
3203 csskd
->bwrite
.value
.ui64
= css
->bwrite
;
3204 csskd
->lread
.value
.ui64
= css
->lread
;
3205 csskd
->lwrite
.value
.ui64
= css
->lwrite
;
3206 csskd
->phread
.value
.ui64
= css
->phread
;
3207 csskd
->phwrite
.value
.ui64
= css
->phwrite
;
3208 csskd
->pswitch
.value
.ui64
= css
->pswitch
;
3209 csskd
->trap
.value
.ui64
= css
->trap
;
3210 csskd
->intr
.value
.ui64
= 0;
3211 for (i
= 0; i
< PIL_MAX
; i
++)
3212 csskd
->intr
.value
.ui64
+= css
->intr
[i
];
3213 csskd
->syscall
.value
.ui64
= css
->syscall
;
3214 csskd
->sysread
.value
.ui64
= css
->sysread
;
3215 csskd
->syswrite
.value
.ui64
= css
->syswrite
;
3216 csskd
->sysfork
.value
.ui64
= css
->sysfork
;
3217 csskd
->sysvfork
.value
.ui64
= css
->sysvfork
;
3218 csskd
->sysexec
.value
.ui64
= css
->sysexec
;
3219 csskd
->readch
.value
.ui64
= css
->readch
;
3220 csskd
->writech
.value
.ui64
= css
->writech
;
3221 csskd
->rcvint
.value
.ui64
= css
->rcvint
;
3222 csskd
->xmtint
.value
.ui64
= css
->xmtint
;
3223 csskd
->mdmint
.value
.ui64
= css
->mdmint
;
3224 csskd
->rawch
.value
.ui64
= css
->rawch
;
3225 csskd
->canch
.value
.ui64
= css
->canch
;
3226 csskd
->outch
.value
.ui64
= css
->outch
;
3227 csskd
->msg
.value
.ui64
= css
->msg
;
3228 csskd
->sema
.value
.ui64
= css
->sema
;
3229 csskd
->namei
.value
.ui64
= css
->namei
;
3230 csskd
->ufsiget
.value
.ui64
= css
->ufsiget
;
3231 csskd
->ufsdirblk
.value
.ui64
= css
->ufsdirblk
;
3232 csskd
->ufsipage
.value
.ui64
= css
->ufsipage
;
3233 csskd
->ufsinopage
.value
.ui64
= css
->ufsinopage
;
3234 csskd
->procovf
.value
.ui64
= css
->procovf
;
3235 csskd
->intrthread
.value
.ui64
= 0;
3236 for (i
= 0; i
< LOCK_LEVEL
- 1; i
++)
3237 csskd
->intrthread
.value
.ui64
+= css
->intr
[i
];
3238 csskd
->intrblk
.value
.ui64
= css
->intrblk
;
3239 csskd
->intrunpin
.value
.ui64
= css
->intrunpin
;
3240 csskd
->idlethread
.value
.ui64
= css
->idlethread
;
3241 csskd
->inv_swtch
.value
.ui64
= css
->inv_swtch
;
3242 csskd
->nthreads
.value
.ui64
= css
->nthreads
;
3243 csskd
->cpumigrate
.value
.ui64
= css
->cpumigrate
;
3244 csskd
->xcalls
.value
.ui64
= css
->xcalls
;
3245 csskd
->mutex_adenters
.value
.ui64
= css
->mutex_adenters
;
3246 csskd
->rw_rdfails
.value
.ui64
= css
->rw_rdfails
;
3247 csskd
->rw_wrfails
.value
.ui64
= css
->rw_wrfails
;
3248 csskd
->modload
.value
.ui64
= css
->modload
;
3249 csskd
->modunload
.value
.ui64
= css
->modunload
;
3250 csskd
->bawrite
.value
.ui64
= css
->bawrite
;
3251 csskd
->iowait
.value
.ui64
= css
->iowait
;
3257 cpu_vm_stats_ks_update(kstat_t
*ksp
, int rw
)
3259 cpu_t
*cp
= (cpu_t
*)ksp
->ks_private
;
3260 struct cpu_vm_stats_ks_data
*cvskd
;
3261 cpu_vm_stats_t
*cvs
;
3263 if (rw
== KSTAT_WRITE
)
3266 cvs
= &cp
->cpu_stats
.vm
;
3267 cvskd
= ksp
->ks_data
;
3269 bcopy(&cpu_vm_stats_ks_data_template
, ksp
->ks_data
,
3270 sizeof (cpu_vm_stats_ks_data_template
));
3271 cvskd
->pgrec
.value
.ui64
= cvs
->pgrec
;
3272 cvskd
->pgfrec
.value
.ui64
= cvs
->pgfrec
;
3273 cvskd
->pgin
.value
.ui64
= cvs
->pgin
;
3274 cvskd
->pgpgin
.value
.ui64
= cvs
->pgpgin
;
3275 cvskd
->pgout
.value
.ui64
= cvs
->pgout
;
3276 cvskd
->pgpgout
.value
.ui64
= cvs
->pgpgout
;
3277 cvskd
->zfod
.value
.ui64
= cvs
->zfod
;
3278 cvskd
->dfree
.value
.ui64
= cvs
->dfree
;
3279 cvskd
->scan
.value
.ui64
= cvs
->scan
;
3280 cvskd
->rev
.value
.ui64
= cvs
->rev
;
3281 cvskd
->hat_fault
.value
.ui64
= cvs
->hat_fault
;
3282 cvskd
->as_fault
.value
.ui64
= cvs
->as_fault
;
3283 cvskd
->maj_fault
.value
.ui64
= cvs
->maj_fault
;
3284 cvskd
->cow_fault
.value
.ui64
= cvs
->cow_fault
;
3285 cvskd
->prot_fault
.value
.ui64
= cvs
->prot_fault
;
3286 cvskd
->softlock
.value
.ui64
= cvs
->softlock
;
3287 cvskd
->kernel_asflt
.value
.ui64
= cvs
->kernel_asflt
;
3288 cvskd
->pgrrun
.value
.ui64
= cvs
->pgrrun
;
3289 cvskd
->execpgin
.value
.ui64
= cvs
->execpgin
;
3290 cvskd
->execpgout
.value
.ui64
= cvs
->execpgout
;
3291 cvskd
->execfree
.value
.ui64
= cvs
->execfree
;
3292 cvskd
->anonpgin
.value
.ui64
= cvs
->anonpgin
;
3293 cvskd
->anonpgout
.value
.ui64
= cvs
->anonpgout
;
3294 cvskd
->anonfree
.value
.ui64
= cvs
->anonfree
;
3295 cvskd
->fspgin
.value
.ui64
= cvs
->fspgin
;
3296 cvskd
->fspgout
.value
.ui64
= cvs
->fspgout
;
3297 cvskd
->fsfree
.value
.ui64
= cvs
->fsfree
;
3303 cpu_stat_ks_update(kstat_t
*ksp
, int rw
)
3308 hrtime_t msnsecs
[NCMSTATES
];
3310 cso
= (cpu_stat_t
*)ksp
->ks_data
;
3311 cp
= (cpu_t
*)ksp
->ks_private
;
3313 if (rw
== KSTAT_WRITE
)
3317 * Read CPU mstate, but compare with the last values we
3318 * received to make sure that the returned kstats never
3322 get_cpu_mstate(cp
, msnsecs
);
3323 msnsecs
[CMS_IDLE
] = NSEC_TO_TICK(msnsecs
[CMS_IDLE
]);
3324 msnsecs
[CMS_USER
] = NSEC_TO_TICK(msnsecs
[CMS_USER
]);
3325 msnsecs
[CMS_SYSTEM
] = NSEC_TO_TICK(msnsecs
[CMS_SYSTEM
]);
3326 if (cso
->cpu_sysinfo
.cpu
[CPU_IDLE
] < msnsecs
[CMS_IDLE
])
3327 cso
->cpu_sysinfo
.cpu
[CPU_IDLE
] = msnsecs
[CMS_IDLE
];
3328 if (cso
->cpu_sysinfo
.cpu
[CPU_USER
] < msnsecs
[CMS_USER
])
3329 cso
->cpu_sysinfo
.cpu
[CPU_USER
] = msnsecs
[CMS_USER
];
3330 if (cso
->cpu_sysinfo
.cpu
[CPU_KERNEL
] < msnsecs
[CMS_SYSTEM
])
3331 cso
->cpu_sysinfo
.cpu
[CPU_KERNEL
] = msnsecs
[CMS_SYSTEM
];
3332 cso
->cpu_sysinfo
.cpu
[CPU_WAIT
] = 0;
3333 cso
->cpu_sysinfo
.wait
[W_IO
] = 0;
3334 cso
->cpu_sysinfo
.wait
[W_SWAP
] = 0;
3335 cso
->cpu_sysinfo
.wait
[W_PIO
] = 0;
3336 cso
->cpu_sysinfo
.bread
= CPU_STATS(cp
, sys
.bread
);
3337 cso
->cpu_sysinfo
.bwrite
= CPU_STATS(cp
, sys
.bwrite
);
3338 cso
->cpu_sysinfo
.lread
= CPU_STATS(cp
, sys
.lread
);
3339 cso
->cpu_sysinfo
.lwrite
= CPU_STATS(cp
, sys
.lwrite
);
3340 cso
->cpu_sysinfo
.phread
= CPU_STATS(cp
, sys
.phread
);
3341 cso
->cpu_sysinfo
.phwrite
= CPU_STATS(cp
, sys
.phwrite
);
3342 cso
->cpu_sysinfo
.pswitch
= CPU_STATS(cp
, sys
.pswitch
);
3343 cso
->cpu_sysinfo
.trap
= CPU_STATS(cp
, sys
.trap
);
3344 cso
->cpu_sysinfo
.intr
= 0;
3345 for (i
= 0; i
< PIL_MAX
; i
++)
3346 cso
->cpu_sysinfo
.intr
+= CPU_STATS(cp
, sys
.intr
[i
]);
3347 cso
->cpu_sysinfo
.syscall
= CPU_STATS(cp
, sys
.syscall
);
3348 cso
->cpu_sysinfo
.sysread
= CPU_STATS(cp
, sys
.sysread
);
3349 cso
->cpu_sysinfo
.syswrite
= CPU_STATS(cp
, sys
.syswrite
);
3350 cso
->cpu_sysinfo
.sysfork
= CPU_STATS(cp
, sys
.sysfork
);
3351 cso
->cpu_sysinfo
.sysvfork
= CPU_STATS(cp
, sys
.sysvfork
);
3352 cso
->cpu_sysinfo
.sysexec
= CPU_STATS(cp
, sys
.sysexec
);
3353 cso
->cpu_sysinfo
.readch
= CPU_STATS(cp
, sys
.readch
);
3354 cso
->cpu_sysinfo
.writech
= CPU_STATS(cp
, sys
.writech
);
3355 cso
->cpu_sysinfo
.rcvint
= CPU_STATS(cp
, sys
.rcvint
);
3356 cso
->cpu_sysinfo
.xmtint
= CPU_STATS(cp
, sys
.xmtint
);
3357 cso
->cpu_sysinfo
.mdmint
= CPU_STATS(cp
, sys
.mdmint
);
3358 cso
->cpu_sysinfo
.rawch
= CPU_STATS(cp
, sys
.rawch
);
3359 cso
->cpu_sysinfo
.canch
= CPU_STATS(cp
, sys
.canch
);
3360 cso
->cpu_sysinfo
.outch
= CPU_STATS(cp
, sys
.outch
);
3361 cso
->cpu_sysinfo
.msg
= CPU_STATS(cp
, sys
.msg
);
3362 cso
->cpu_sysinfo
.sema
= CPU_STATS(cp
, sys
.sema
);
3363 cso
->cpu_sysinfo
.namei
= CPU_STATS(cp
, sys
.namei
);
3364 cso
->cpu_sysinfo
.ufsiget
= CPU_STATS(cp
, sys
.ufsiget
);
3365 cso
->cpu_sysinfo
.ufsdirblk
= CPU_STATS(cp
, sys
.ufsdirblk
);
3366 cso
->cpu_sysinfo
.ufsipage
= CPU_STATS(cp
, sys
.ufsipage
);
3367 cso
->cpu_sysinfo
.ufsinopage
= CPU_STATS(cp
, sys
.ufsinopage
);
3368 cso
->cpu_sysinfo
.inodeovf
= 0;
3369 cso
->cpu_sysinfo
.fileovf
= 0;
3370 cso
->cpu_sysinfo
.procovf
= CPU_STATS(cp
, sys
.procovf
);
3371 cso
->cpu_sysinfo
.intrthread
= 0;
3372 for (i
= 0; i
< LOCK_LEVEL
- 1; i
++)
3373 cso
->cpu_sysinfo
.intrthread
+= CPU_STATS(cp
, sys
.intr
[i
]);
3374 cso
->cpu_sysinfo
.intrblk
= CPU_STATS(cp
, sys
.intrblk
);
3375 cso
->cpu_sysinfo
.idlethread
= CPU_STATS(cp
, sys
.idlethread
);
3376 cso
->cpu_sysinfo
.inv_swtch
= CPU_STATS(cp
, sys
.inv_swtch
);
3377 cso
->cpu_sysinfo
.nthreads
= CPU_STATS(cp
, sys
.nthreads
);
3378 cso
->cpu_sysinfo
.cpumigrate
= CPU_STATS(cp
, sys
.cpumigrate
);
3379 cso
->cpu_sysinfo
.xcalls
= CPU_STATS(cp
, sys
.xcalls
);
3380 cso
->cpu_sysinfo
.mutex_adenters
= CPU_STATS(cp
, sys
.mutex_adenters
);
3381 cso
->cpu_sysinfo
.rw_rdfails
= CPU_STATS(cp
, sys
.rw_rdfails
);
3382 cso
->cpu_sysinfo
.rw_wrfails
= CPU_STATS(cp
, sys
.rw_wrfails
);
3383 cso
->cpu_sysinfo
.modload
= CPU_STATS(cp
, sys
.modload
);
3384 cso
->cpu_sysinfo
.modunload
= CPU_STATS(cp
, sys
.modunload
);
3385 cso
->cpu_sysinfo
.bawrite
= CPU_STATS(cp
, sys
.bawrite
);
3386 cso
->cpu_sysinfo
.rw_enters
= 0;
3387 cso
->cpu_sysinfo
.win_uo_cnt
= 0;
3388 cso
->cpu_sysinfo
.win_uu_cnt
= 0;
3389 cso
->cpu_sysinfo
.win_so_cnt
= 0;
3390 cso
->cpu_sysinfo
.win_su_cnt
= 0;
3391 cso
->cpu_sysinfo
.win_suo_cnt
= 0;
3393 cso
->cpu_syswait
.iowait
= CPU_STATS(cp
, sys
.iowait
);
3394 cso
->cpu_syswait
.swap
= 0;
3395 cso
->cpu_syswait
.physio
= 0;
3397 cso
->cpu_vminfo
.pgrec
= CPU_STATS(cp
, vm
.pgrec
);
3398 cso
->cpu_vminfo
.pgfrec
= CPU_STATS(cp
, vm
.pgfrec
);
3399 cso
->cpu_vminfo
.pgin
= CPU_STATS(cp
, vm
.pgin
);
3400 cso
->cpu_vminfo
.pgpgin
= CPU_STATS(cp
, vm
.pgpgin
);
3401 cso
->cpu_vminfo
.pgout
= CPU_STATS(cp
, vm
.pgout
);
3402 cso
->cpu_vminfo
.pgpgout
= CPU_STATS(cp
, vm
.pgpgout
);
3403 cso
->cpu_vminfo
.zfod
= CPU_STATS(cp
, vm
.zfod
);
3404 cso
->cpu_vminfo
.dfree
= CPU_STATS(cp
, vm
.dfree
);
3405 cso
->cpu_vminfo
.scan
= CPU_STATS(cp
, vm
.scan
);
3406 cso
->cpu_vminfo
.rev
= CPU_STATS(cp
, vm
.rev
);
3407 cso
->cpu_vminfo
.hat_fault
= CPU_STATS(cp
, vm
.hat_fault
);
3408 cso
->cpu_vminfo
.as_fault
= CPU_STATS(cp
, vm
.as_fault
);
3409 cso
->cpu_vminfo
.maj_fault
= CPU_STATS(cp
, vm
.maj_fault
);
3410 cso
->cpu_vminfo
.cow_fault
= CPU_STATS(cp
, vm
.cow_fault
);
3411 cso
->cpu_vminfo
.prot_fault
= CPU_STATS(cp
, vm
.prot_fault
);
3412 cso
->cpu_vminfo
.softlock
= CPU_STATS(cp
, vm
.softlock
);
3413 cso
->cpu_vminfo
.kernel_asflt
= CPU_STATS(cp
, vm
.kernel_asflt
);
3414 cso
->cpu_vminfo
.pgrrun
= CPU_STATS(cp
, vm
.pgrrun
);
3415 cso
->cpu_vminfo
.execpgin
= CPU_STATS(cp
, vm
.execpgin
);
3416 cso
->cpu_vminfo
.execpgout
= CPU_STATS(cp
, vm
.execpgout
);
3417 cso
->cpu_vminfo
.execfree
= CPU_STATS(cp
, vm
.execfree
);
3418 cso
->cpu_vminfo
.anonpgin
= CPU_STATS(cp
, vm
.anonpgin
);
3419 cso
->cpu_vminfo
.anonpgout
= CPU_STATS(cp
, vm
.anonpgout
);
3420 cso
->cpu_vminfo
.anonfree
= CPU_STATS(cp
, vm
.anonfree
);
3421 cso
->cpu_vminfo
.fspgin
= CPU_STATS(cp
, vm
.fspgin
);
3422 cso
->cpu_vminfo
.fspgout
= CPU_STATS(cp
, vm
.fspgout
);
3423 cso
->cpu_vminfo
.fsfree
= CPU_STATS(cp
, vm
.fsfree
);