4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2012 by Delphix. All rights reserved.
27 * Architecture-independent CPU control functions.
30 #include <sys/types.h>
31 #include <sys/param.h>
33 #include <sys/thread.h>
34 #include <sys/cpuvar.h>
35 #include <sys/cpu_event.h>
36 #include <sys/kstat.h>
37 #include <sys/uadmin.h>
38 #include <sys/systm.h>
39 #include <sys/errno.h>
40 #include <sys/cmn_err.h>
41 #include <sys/procset.h>
42 #include <sys/processor.h>
43 #include <sys/debug.h>
44 #include <sys/cpupart.h>
49 #include <sys/kmem_impl.h> /* to set per-cpu kmem_cache offset */
50 #include <sys/atomic.h>
51 #include <sys/callb.h>
52 #include <sys/vtrace.h>
53 #include <sys/cyclic.h>
54 #include <sys/bitmap.h>
55 #include <sys/nvpair.h>
56 #include <sys/pool_pset.h>
57 #include <sys/msacct.h>
59 #include <sys/archsystm.h>
61 #if defined(__x86) || defined(__amd64)
62 #include <sys/x86_archext.h>
64 #include <sys/callo.h>
66 extern int mp_cpu_start(cpu_t
*);
67 extern int mp_cpu_stop(cpu_t
*);
68 extern int mp_cpu_poweron(cpu_t
*);
69 extern int mp_cpu_poweroff(cpu_t
*);
70 extern int mp_cpu_configure(int);
71 extern int mp_cpu_unconfigure(int);
72 extern void mp_cpu_faulted_enter(cpu_t
*);
73 extern void mp_cpu_faulted_exit(cpu_t
*);
75 extern int cmp_cpu_to_chip(processorid_t cpuid
);
77 static void cpu_add_active_internal(cpu_t
*cp
);
78 static void cpu_remove_active(cpu_t
*cp
);
79 static void cpu_info_kstat_create(cpu_t
*cp
);
80 static void cpu_info_kstat_destroy(cpu_t
*cp
);
81 static void cpu_stats_kstat_create(cpu_t
*cp
);
82 static void cpu_stats_kstat_destroy(cpu_t
*cp
);
84 static int cpu_sys_stats_ks_update(kstat_t
*ksp
, int rw
);
85 static int cpu_vm_stats_ks_update(kstat_t
*ksp
, int rw
);
86 static int cpu_stat_ks_update(kstat_t
*ksp
, int rw
);
87 static int cpu_state_change_hooks(int, cpu_setup_t
, cpu_setup_t
);
90 * cpu_lock protects ncpus, ncpus_online, cpu_flag, cpu_list, cpu_active,
91 * max_cpu_seqid_ever, and dispatch queue reallocations. The lock ordering with
92 * respect to related locks is:
94 * cpu_lock --> thread_free_lock ---> p_lock ---> thread_lock()
96 * Warning: Certain sections of code do not use the cpu_lock when
97 * traversing the cpu_list (e.g. mutex_vector_enter(), clock()). Since
98 * all cpus are paused during modifications to this list, a solution
99 * to protect the list is too either disable kernel preemption while
100 * walking the list, *or* recheck the cpu_next pointer at each
101 * iteration in the loop. Note that in no cases can any cached
102 * copies of the cpu pointers be kept as they may become invalid.
105 cpu_t
*cpu_list
; /* list of all CPUs */
106 cpu_t
*clock_cpu_list
; /* used by clock to walk CPUs */
107 cpu_t
*cpu_active
; /* list of active CPUs */
108 static cpuset_t cpu_available
; /* set of available CPUs */
109 cpuset_t cpu_seqid_inuse
; /* which cpu_seqids are in use */
111 cpu_t
**cpu_seq
; /* ptrs to CPUs, indexed by seq_id */
114 * max_ncpus keeps the max cpus the system can have. Initially
115 * it's NCPU, but since most archs scan the devtree for cpus
116 * fairly early on during boot, the real max can be known before
117 * ncpus is set (useful for early NCPU based allocations).
119 int max_ncpus
= NCPU
;
121 * platforms that set max_ncpus to maxiumum number of cpus that can be
122 * dynamically added will set boot_max_ncpus to the number of cpus found
123 * at device tree scan time during boot.
125 int boot_max_ncpus
= -1;
128 * Maximum possible CPU id. This can never be >= NCPU since NCPU is
129 * used to size arrays that are indexed by CPU id.
131 processorid_t max_cpuid
= NCPU
- 1;
134 * Maximum cpu_seqid was given. This number can only grow and never shrink. It
135 * can be used to optimize NCPU loops to avoid going through CPUs which were
138 processorid_t max_cpu_seqid_ever
= 0;
141 int ncpus_online
= 1;
144 * CPU that we're trying to offline. Protected by cpu_lock.
149 * Can be raised to suppress further weakbinding, which are instead
150 * satisfied by disabling preemption. Must be raised/lowered under cpu_lock,
151 * while individual thread weakbinding synchronization is done under thread
154 int weakbindingbarrier
;
157 * Variables used in pause_cpus().
159 static volatile char safe_list
[NCPU
];
161 static struct _cpu_pause_info
{
162 int cp_spl
; /* spl saved in pause_cpus() */
163 volatile int cp_go
; /* Go signal sent after all ready */
164 int cp_count
; /* # of CPUs to pause */
165 ksema_t cp_sem
; /* synch pause_cpus & cpu_pause */
166 kthread_id_t cp_paused
;
167 void *(*cp_func
)(void *);
170 static kmutex_t pause_free_mutex
;
171 static kcondvar_t pause_free_cv
;
174 static struct cpu_sys_stats_ks_data
{
175 kstat_named_t cpu_ticks_idle
;
176 kstat_named_t cpu_ticks_user
;
177 kstat_named_t cpu_ticks_kernel
;
178 kstat_named_t cpu_ticks_wait
;
179 kstat_named_t cpu_nsec_idle
;
180 kstat_named_t cpu_nsec_user
;
181 kstat_named_t cpu_nsec_kernel
;
182 kstat_named_t cpu_nsec_dtrace
;
183 kstat_named_t cpu_nsec_intr
;
184 kstat_named_t cpu_load_intr
;
185 kstat_named_t wait_ticks_io
;
186 kstat_named_t dtrace_probes
;
188 kstat_named_t bwrite
;
190 kstat_named_t lwrite
;
191 kstat_named_t phread
;
192 kstat_named_t phwrite
;
193 kstat_named_t pswitch
;
196 kstat_named_t syscall
;
197 kstat_named_t sysread
;
198 kstat_named_t syswrite
;
199 kstat_named_t sysfork
;
200 kstat_named_t sysvfork
;
201 kstat_named_t sysexec
;
202 kstat_named_t readch
;
203 kstat_named_t writech
;
204 kstat_named_t rcvint
;
205 kstat_named_t xmtint
;
206 kstat_named_t mdmint
;
213 kstat_named_t ufsiget
;
214 kstat_named_t ufsdirblk
;
215 kstat_named_t ufsipage
;
216 kstat_named_t ufsinopage
;
217 kstat_named_t procovf
;
218 kstat_named_t intrthread
;
219 kstat_named_t intrblk
;
220 kstat_named_t intrunpin
;
221 kstat_named_t idlethread
;
222 kstat_named_t inv_swtch
;
223 kstat_named_t nthreads
;
224 kstat_named_t cpumigrate
;
225 kstat_named_t xcalls
;
226 kstat_named_t mutex_adenters
;
227 kstat_named_t rw_rdfails
;
228 kstat_named_t rw_wrfails
;
229 kstat_named_t modload
;
230 kstat_named_t modunload
;
231 kstat_named_t bawrite
;
232 kstat_named_t iowait
;
233 } cpu_sys_stats_ks_data_template
= {
234 { "cpu_ticks_idle", KSTAT_DATA_UINT64
},
235 { "cpu_ticks_user", KSTAT_DATA_UINT64
},
236 { "cpu_ticks_kernel", KSTAT_DATA_UINT64
},
237 { "cpu_ticks_wait", KSTAT_DATA_UINT64
},
238 { "cpu_nsec_idle", KSTAT_DATA_UINT64
},
239 { "cpu_nsec_user", KSTAT_DATA_UINT64
},
240 { "cpu_nsec_kernel", KSTAT_DATA_UINT64
},
241 { "cpu_nsec_dtrace", KSTAT_DATA_UINT64
},
242 { "cpu_nsec_intr", KSTAT_DATA_UINT64
},
243 { "cpu_load_intr", KSTAT_DATA_UINT64
},
244 { "wait_ticks_io", KSTAT_DATA_UINT64
},
245 { "dtrace_probes", KSTAT_DATA_UINT64
},
246 { "bread", KSTAT_DATA_UINT64
},
247 { "bwrite", KSTAT_DATA_UINT64
},
248 { "lread", KSTAT_DATA_UINT64
},
249 { "lwrite", KSTAT_DATA_UINT64
},
250 { "phread", KSTAT_DATA_UINT64
},
251 { "phwrite", KSTAT_DATA_UINT64
},
252 { "pswitch", KSTAT_DATA_UINT64
},
253 { "trap", KSTAT_DATA_UINT64
},
254 { "intr", KSTAT_DATA_UINT64
},
255 { "syscall", KSTAT_DATA_UINT64
},
256 { "sysread", KSTAT_DATA_UINT64
},
257 { "syswrite", KSTAT_DATA_UINT64
},
258 { "sysfork", KSTAT_DATA_UINT64
},
259 { "sysvfork", KSTAT_DATA_UINT64
},
260 { "sysexec", KSTAT_DATA_UINT64
},
261 { "readch", KSTAT_DATA_UINT64
},
262 { "writech", KSTAT_DATA_UINT64
},
263 { "rcvint", KSTAT_DATA_UINT64
},
264 { "xmtint", KSTAT_DATA_UINT64
},
265 { "mdmint", KSTAT_DATA_UINT64
},
266 { "rawch", KSTAT_DATA_UINT64
},
267 { "canch", KSTAT_DATA_UINT64
},
268 { "outch", KSTAT_DATA_UINT64
},
269 { "msg", KSTAT_DATA_UINT64
},
270 { "sema", KSTAT_DATA_UINT64
},
271 { "namei", KSTAT_DATA_UINT64
},
272 { "ufsiget", KSTAT_DATA_UINT64
},
273 { "ufsdirblk", KSTAT_DATA_UINT64
},
274 { "ufsipage", KSTAT_DATA_UINT64
},
275 { "ufsinopage", KSTAT_DATA_UINT64
},
276 { "procovf", KSTAT_DATA_UINT64
},
277 { "intrthread", KSTAT_DATA_UINT64
},
278 { "intrblk", KSTAT_DATA_UINT64
},
279 { "intrunpin", KSTAT_DATA_UINT64
},
280 { "idlethread", KSTAT_DATA_UINT64
},
281 { "inv_swtch", KSTAT_DATA_UINT64
},
282 { "nthreads", KSTAT_DATA_UINT64
},
283 { "cpumigrate", KSTAT_DATA_UINT64
},
284 { "xcalls", KSTAT_DATA_UINT64
},
285 { "mutex_adenters", KSTAT_DATA_UINT64
},
286 { "rw_rdfails", KSTAT_DATA_UINT64
},
287 { "rw_wrfails", KSTAT_DATA_UINT64
},
288 { "modload", KSTAT_DATA_UINT64
},
289 { "modunload", KSTAT_DATA_UINT64
},
290 { "bawrite", KSTAT_DATA_UINT64
},
291 { "iowait", KSTAT_DATA_UINT64
},
294 static struct cpu_vm_stats_ks_data
{
296 kstat_named_t pgfrec
;
298 kstat_named_t pgpgin
;
300 kstat_named_t pgpgout
;
305 kstat_named_t hat_fault
;
306 kstat_named_t as_fault
;
307 kstat_named_t maj_fault
;
308 kstat_named_t cow_fault
;
309 kstat_named_t prot_fault
;
310 kstat_named_t softlock
;
311 kstat_named_t kernel_asflt
;
312 kstat_named_t pgrrun
;
313 kstat_named_t execpgin
;
314 kstat_named_t execpgout
;
315 kstat_named_t execfree
;
316 kstat_named_t anonpgin
;
317 kstat_named_t anonpgout
;
318 kstat_named_t anonfree
;
319 kstat_named_t fspgin
;
320 kstat_named_t fspgout
;
321 kstat_named_t fsfree
;
322 } cpu_vm_stats_ks_data_template
= {
323 { "pgrec", KSTAT_DATA_UINT64
},
324 { "pgfrec", KSTAT_DATA_UINT64
},
325 { "pgin", KSTAT_DATA_UINT64
},
326 { "pgpgin", KSTAT_DATA_UINT64
},
327 { "pgout", KSTAT_DATA_UINT64
},
328 { "pgpgout", KSTAT_DATA_UINT64
},
329 { "zfod", KSTAT_DATA_UINT64
},
330 { "dfree", KSTAT_DATA_UINT64
},
331 { "scan", KSTAT_DATA_UINT64
},
332 { "rev", KSTAT_DATA_UINT64
},
333 { "hat_fault", KSTAT_DATA_UINT64
},
334 { "as_fault", KSTAT_DATA_UINT64
},
335 { "maj_fault", KSTAT_DATA_UINT64
},
336 { "cow_fault", KSTAT_DATA_UINT64
},
337 { "prot_fault", KSTAT_DATA_UINT64
},
338 { "softlock", KSTAT_DATA_UINT64
},
339 { "kernel_asflt", KSTAT_DATA_UINT64
},
340 { "pgrrun", KSTAT_DATA_UINT64
},
341 { "execpgin", KSTAT_DATA_UINT64
},
342 { "execpgout", KSTAT_DATA_UINT64
},
343 { "execfree", KSTAT_DATA_UINT64
},
344 { "anonpgin", KSTAT_DATA_UINT64
},
345 { "anonpgout", KSTAT_DATA_UINT64
},
346 { "anonfree", KSTAT_DATA_UINT64
},
347 { "fspgin", KSTAT_DATA_UINT64
},
348 { "fspgout", KSTAT_DATA_UINT64
},
349 { "fsfree", KSTAT_DATA_UINT64
},
353 * Force the specified thread to migrate to the appropriate processor.
354 * Called with thread lock held, returns with it dropped.
357 force_thread_migrate(kthread_id_t tp
)
359 ASSERT(THREAD_LOCK_HELD(tp
));
360 if (tp
== curthread
) {
361 THREAD_TRANSITION(tp
);
363 thread_unlock_nopreempt(tp
);
366 if (tp
->t_state
== TS_ONPROC
) {
368 } else if (tp
->t_state
== TS_RUN
) {
377 * Set affinity for a specified CPU.
378 * A reference count is incremented and the affinity is held until the
379 * reference count is decremented to zero by thread_affinity_clear().
380 * This is so regions of code requiring affinity can be nested.
381 * Caller needs to ensure that cpu_id remains valid, which can be
382 * done by holding cpu_lock across this call, unless the caller
383 * specifies CPU_CURRENT in which case the cpu_lock will be acquired
384 * by thread_affinity_set and CPU->cpu_id will be the target CPU.
387 thread_affinity_set(kthread_id_t t
, int cpu_id
)
392 ASSERT(!(t
== curthread
&& t
->t_weakbound_cpu
!= NULL
));
394 if ((c
= cpu_id
) == CPU_CURRENT
) {
395 mutex_enter(&cpu_lock
);
396 cpu_id
= CPU
->cpu_id
;
398 ASSERT(MUTEX_HELD(&cpu_lock
));
399 ASSERT((cpu_id
>= 0) && (cpu_id
< NCPU
));
401 ASSERT(cp
!= NULL
); /* user must provide a good cpu_id */
403 * If there is already a hard affinity requested, and this affinity
404 * conflicts with that, panic.
407 if (t
->t_affinitycnt
> 0 && t
->t_bound_cpu
!= cp
) {
408 panic("affinity_set: setting %p but already bound to %p",
409 (void *)cp
, (void *)t
->t_bound_cpu
);
415 * Make sure we're running on the right CPU.
417 if (cp
!= t
->t_cpu
|| t
!= curthread
) {
418 force_thread_migrate(t
); /* drops thread lock */
423 if (c
== CPU_CURRENT
)
424 mutex_exit(&cpu_lock
);
428 * Wrapper for backward compatibility.
431 affinity_set(int cpu_id
)
433 thread_affinity_set(curthread
, cpu_id
);
437 * Decrement the affinity reservation count and if it becomes zero,
438 * clear the CPU affinity for the current thread, or set it to the user's
439 * software binding request.
442 thread_affinity_clear(kthread_id_t t
)
444 register processorid_t binding
;
447 if (--t
->t_affinitycnt
== 0) {
448 if ((binding
= t
->t_bind_cpu
) == PBIND_NONE
) {
450 * Adjust disp_max_unbound_pri if necessary.
452 disp_adjust_unbound_pri(t
);
453 t
->t_bound_cpu
= NULL
;
454 if (t
->t_cpu
->cpu_part
!= t
->t_cpupart
) {
455 force_thread_migrate(t
);
459 t
->t_bound_cpu
= cpu
[binding
];
461 * Make sure the thread is running on the bound CPU.
463 if (t
->t_cpu
!= t
->t_bound_cpu
) {
464 force_thread_migrate(t
);
465 return; /* already dropped lock */
473 * Wrapper for backward compatibility.
478 thread_affinity_clear(curthread
);
482 * Weak cpu affinity. Bind to the "current" cpu for short periods
483 * of time during which the thread must not block (but may be preempted).
484 * Use this instead of kpreempt_disable() when it is only "no migration"
485 * rather than "no preemption" semantics that are required - disabling
486 * preemption holds higher priority threads off of cpu and if the
487 * operation that is protected is more than momentary this is not good
490 * Weakly bound threads will not prevent a cpu from being offlined -
491 * we'll only run them on the cpu to which they are weakly bound but
492 * (because they do not block) we'll always be able to move them on to
493 * another cpu at offline time if we give them just a short moment to
494 * run during which they will unbind. To give a cpu a chance of offlining,
495 * however, we require a barrier to weak bindings that may be raised for a
496 * given cpu (offline/move code may set this and then wait a short time for
497 * existing weak bindings to drop); the cpu_inmotion pointer is that barrier.
499 * There are few restrictions on the calling context of thread_nomigrate.
500 * The caller must not hold the thread lock. Calls may be nested.
502 * After weakbinding a thread must not perform actions that may block.
503 * In particular it must not call thread_affinity_set; calling that when
504 * already weakbound is nonsensical anyway.
506 * If curthread is prevented from migrating for other reasons
507 * (kernel preemption disabled; high pil; strongly bound; interrupt thread)
508 * then the weak binding will succeed even if this cpu is the target of an
509 * offline/move request.
512 thread_nomigrate(void)
515 kthread_id_t t
= curthread
;
522 * A highlevel interrupt must not modify t_nomigrate or
523 * t_weakbound_cpu of the thread it has interrupted. A lowlevel
524 * interrupt thread cannot migrate and we can avoid the
525 * thread_lock call below by short-circuiting here. In either
526 * case we can just return since no migration is possible and
527 * the condition will persist (ie, when we test for these again
528 * in thread_allowmigrate they can't have changed). Migration
529 * is also impossible if we're at or above DISP_LEVEL pil.
531 if (CPU_ON_INTR(cp
) || t
->t_flag
& T_INTR_THREAD
||
532 getpil() >= DISP_LEVEL
) {
538 * We must be consistent with existing weak bindings. Since we
539 * may be interrupted between the increment of t_nomigrate and
540 * the store to t_weakbound_cpu below we cannot assume that
541 * t_weakbound_cpu will be set if t_nomigrate is. Note that we
542 * cannot assert t_weakbound_cpu == t_bind_cpu since that is not
545 if (t
->t_nomigrate
&& t
->t_weakbound_cpu
&& t
->t_weakbound_cpu
!= cp
) {
547 panic("thread_nomigrate: binding to %p but already "
548 "bound to %p", (void *)cp
,
549 (void *)t
->t_weakbound_cpu
);
553 * At this point we have preemption disabled and we don't yet hold
554 * the thread lock. So it's possible that somebody else could
555 * set t_bind_cpu here and not be able to force us across to the
556 * new cpu (since we have preemption disabled).
558 thread_lock(curthread
);
561 * If further weak bindings are being (temporarily) suppressed then
562 * we'll settle for disabling kernel preemption (which assures
563 * no migration provided the thread does not block which it is
564 * not allowed to if using thread_nomigrate). We must remember
565 * this disposition so we can take appropriate action in
566 * thread_allowmigrate. If this is a nested call and the
567 * thread is already weakbound then fall through as normal.
568 * We remember the decision to settle for kpreempt_disable through
569 * negative nesting counting in t_nomigrate. Once a thread has had one
570 * weakbinding request satisfied in this way any further (nested)
571 * requests will continue to be satisfied in the same way,
572 * even if weak bindings have recommenced.
574 if (t
->t_nomigrate
< 0 || weakbindingbarrier
&& t
->t_nomigrate
== 0) {
576 thread_unlock(curthread
);
577 return; /* with kpreempt_disable still active */
581 * We hold thread_lock so t_bind_cpu cannot change. We could,
582 * however, be running on a different cpu to which we are t_bound_cpu
583 * to (as explained above). If we grant the weak binding request
584 * in that case then the dispatcher must favour our weak binding
585 * over our strong (in which case, just as when preemption is
586 * disabled, we can continue to run on a cpu other than the one to
587 * which we are strongbound; the difference in this case is that
588 * this thread can be preempted and so can appear on the dispatch
589 * queues of a cpu other than the one it is strongbound to).
591 * If the cpu we are running on does not appear to be a current
592 * offline target (we check cpu_inmotion to determine this - since
593 * we don't hold cpu_lock we may not see a recent store to that,
594 * so it's possible that we at times can grant a weak binding to a
595 * cpu that is an offline target, but that one request will not
596 * prevent the offline from succeeding) then we will always grant
597 * the weak binding request. This includes the case above where
598 * we grant a weakbinding not commensurate with our strong binding.
600 * If our cpu does appear to be an offline target then we're inclined
601 * not to grant the weakbinding request just yet - we'd prefer to
602 * migrate to another cpu and grant the request there. The
603 * exceptions are those cases where going through preemption code
604 * will not result in us changing cpu:
606 * . interrupts have already bypassed this case (see above)
607 * . we are already weakbound to this cpu (dispatcher code will
608 * always return us to the weakbound cpu)
609 * . preemption was disabled even before we disabled it above
610 * . we are strongbound to this cpu (if we're strongbound to
611 * another and not yet running there the trip through the
612 * dispatcher will move us to the strongbound cpu and we
613 * will grant the weak binding there)
615 if (cp
!= cpu_inmotion
|| t
->t_nomigrate
> 0 || t
->t_preempt
> 1 ||
616 t
->t_bound_cpu
== cp
) {
618 * Don't be tempted to store to t_weakbound_cpu only on
619 * the first nested bind request - if we're interrupted
620 * after the increment of t_nomigrate and before the
621 * store to t_weakbound_cpu and the interrupt calls
622 * thread_nomigrate then the assertion in thread_allowmigrate
626 t
->t_weakbound_cpu
= cp
;
628 thread_unlock(curthread
);
630 * Now that we have dropped the thread_lock another thread
631 * can set our t_weakbound_cpu, and will try to migrate us
632 * to the strongbound cpu (which will not be prevented by
633 * preemption being disabled since we're about to enable
634 * preemption). We have granted the weakbinding to the current
635 * cpu, so again we are in the position that is is is possible
636 * that our weak and strong bindings differ. Again this
637 * is catered for by dispatcher code which will favour our
643 * Move to another cpu before granting the request by
644 * forcing this thread through preemption code. When we
645 * get to set{front,back}dq called from CL_PREEMPT()
646 * cpu_choose() will be used to select a cpu to queue
647 * us on - that will see cpu_inmotion and take
648 * steps to avoid returning us to this cpu.
650 cp
->cpu_kprunrun
= 1;
651 thread_unlock(curthread
);
652 kpreempt_enable(); /* will call preempt() */
658 thread_allowmigrate(void)
660 kthread_id_t t
= curthread
;
662 ASSERT(t
->t_weakbound_cpu
== CPU
||
663 (t
->t_nomigrate
< 0 && t
->t_preempt
> 0) ||
664 CPU_ON_INTR(CPU
) || t
->t_flag
& T_INTR_THREAD
||
665 getpil() >= DISP_LEVEL
);
667 if (CPU_ON_INTR(CPU
) || (t
->t_flag
& T_INTR_THREAD
) ||
668 getpil() >= DISP_LEVEL
)
671 if (t
->t_nomigrate
< 0) {
673 * This thread was granted "weak binding" in the
674 * stronger form of kernel preemption disabling.
675 * Undo a level of nesting for both t_nomigrate
680 } else if (--t
->t_nomigrate
== 0) {
682 * Time to drop the weak binding. We need to cater
683 * for the case where we're weakbound to a different
684 * cpu than that to which we're strongbound (a very
685 * temporary arrangement that must only persist until
686 * weak binding drops). We don't acquire thread_lock
687 * here so even as this code executes t_bound_cpu
688 * may be changing. So we disable preemption and
689 * a) in the case that t_bound_cpu changes while we
690 * have preemption disabled kprunrun will be set
691 * asynchronously, and b) if before disabling
692 * preemption we were already on a different cpu to
693 * our t_bound_cpu then we set kprunrun ourselves
694 * to force a trip through the dispatcher when
695 * preemption is enabled.
698 if (t
->t_bound_cpu
&&
699 t
->t_weakbound_cpu
!= t
->t_bound_cpu
)
700 CPU
->cpu_kprunrun
= 1;
701 t
->t_weakbound_cpu
= NULL
;
708 * weakbinding_stop can be used to temporarily cause weakbindings made
709 * with thread_nomigrate to be satisfied through the stronger action of
710 * kpreempt_disable. weakbinding_start recommences normal weakbinding.
714 weakbinding_stop(void)
716 ASSERT(MUTEX_HELD(&cpu_lock
));
717 weakbindingbarrier
= 1;
718 membar_producer(); /* make visible before subsequent thread_lock */
722 weakbinding_start(void)
724 ASSERT(MUTEX_HELD(&cpu_lock
));
725 weakbindingbarrier
= 0;
734 * This routine is called to place the CPUs in a safe place so that
735 * one of them can be taken off line or placed on line. What we are
736 * trying to do here is prevent a thread from traversing the list
737 * of active CPUs while we are changing it or from getting placed on
738 * the run queue of a CPU that has just gone off line. We do this by
739 * creating a thread with the highest possible prio for each CPU and
740 * having it call this routine. The advantage of this method is that
741 * we can eliminate all checks for CPU_ACTIVE in the disp routines.
742 * This makes disp faster at the expense of making p_online() slower
743 * which is a good trade off.
749 struct _cpu_pause_info
*cpi
= &cpu_pause_info
;
750 volatile char *safe
= &safe_list
[index
];
753 ASSERT((curthread
->t_bound_cpu
!= NULL
) || (*safe
== PAUSE_DIE
));
755 while (*safe
!= PAUSE_DIE
) {
757 membar_enter(); /* make sure stores are flushed */
758 sema_v(&cpi
->cp_sem
); /* signal requesting thread */
761 * Wait here until all pause threads are running. That
762 * indicates that it's safe to do the spl. Until
763 * cpu_pause_info.cp_go is set, we don't want to spl
764 * because that might block clock interrupts needed
765 * to preempt threads on other CPUs.
767 while (cpi
->cp_go
== 0)
770 * Even though we are at the highest disp prio, we need
771 * to block out all interrupts below LOCK_LEVEL so that
772 * an intr doesn't come in, wake up a thread, and call
773 * setbackdq/setfrontdq.
777 * if cp_func has been set then call it using index as the
778 * argument, currently only used by cpr_suspend_cpus().
779 * This function is used as the code to execute on the
780 * "paused" cpu's when a machine comes out of a sleep state
781 * and CPU's were powered off. (could also be used for
782 * hotplugging CPU's).
784 if (cpi
->cp_func
!= NULL
)
785 (*cpi
->cp_func
)((void *)lindex
);
787 mach_cpu_pause(safe
);
791 * Waiting is at an end. Switch out of cpu_pause
792 * loop and resume useful work.
797 mutex_enter(&pause_free_mutex
);
799 cv_broadcast(&pause_free_cv
);
800 mutex_exit(&pause_free_mutex
);
804 * Allow the cpus to start running again.
811 ASSERT(MUTEX_HELD(&cpu_lock
));
812 ASSERT(cpu_pause_info
.cp_paused
);
813 cpu_pause_info
.cp_paused
= NULL
;
814 for (i
= 0; i
< NCPU
; i
++)
815 safe_list
[i
] = PAUSE_IDLE
;
816 membar_enter(); /* make sure stores are flushed */
818 splx(cpu_pause_info
.cp_spl
);
823 * Allocate a pause thread for a CPU.
826 cpu_pause_alloc(cpu_t
*cp
)
829 long cpun
= cp
->cpu_id
;
832 * Note, v.v_nglobpris will not change value as long as I hold
835 t
= thread_create(NULL
, 0, cpu_pause
, (void *)cpun
,
836 0, &p0
, TS_STOPPED
, v
.v_nglobpris
- 1);
839 t
->t_disp_queue
= cp
->cpu_disp
;
840 t
->t_affinitycnt
= 1;
843 cp
->cpu_pause_thread
= t
;
845 * Registering a thread in the callback table is usually done
846 * in the initialization code of the thread. In this
847 * case, we do it right after thread creation because the
848 * thread itself may never run, and we need to register the
849 * fact that it is safe for cpr suspend.
851 CALLB_CPR_INIT_SAFE(t
, "cpu_pause");
855 * Free a pause thread for a CPU.
858 cpu_pause_free(cpu_t
*cp
)
861 int cpun
= cp
->cpu_id
;
863 ASSERT(MUTEX_HELD(&cpu_lock
));
865 * We have to get the thread and tell it to die.
867 if ((t
= cp
->cpu_pause_thread
) == NULL
) {
868 ASSERT(safe_list
[cpun
] == PAUSE_IDLE
);
872 t
->t_cpu
= CPU
; /* disp gets upset if last cpu is quiesced. */
873 t
->t_bound_cpu
= NULL
; /* Must un-bind; cpu may not be running. */
874 t
->t_pri
= v
.v_nglobpris
- 1;
875 ASSERT(safe_list
[cpun
] == PAUSE_IDLE
);
876 safe_list
[cpun
] = PAUSE_DIE
;
877 THREAD_TRANSITION(t
);
879 thread_unlock_nopreempt(t
);
882 * If we don't wait for the thread to actually die, it may try to
883 * run on the wrong cpu as part of an actual call to pause_cpus().
885 mutex_enter(&pause_free_mutex
);
886 while (safe_list
[cpun
] != PAUSE_DEAD
) {
887 cv_wait(&pause_free_cv
, &pause_free_mutex
);
889 mutex_exit(&pause_free_mutex
);
890 safe_list
[cpun
] = PAUSE_IDLE
;
892 cp
->cpu_pause_thread
= NULL
;
896 * Initialize basic structures for pausing CPUs.
901 sema_init(&cpu_pause_info
.cp_sem
, 0, NULL
, SEMA_DEFAULT
, NULL
);
903 * Create initial CPU pause thread.
905 cpu_pause_alloc(CPU
);
909 * Start the threads used to pause another CPU.
912 cpu_pause_start(processorid_t cpu_id
)
917 for (i
= 0; i
< NCPU
; i
++) {
922 if (!CPU_IN_SET(cpu_available
, i
) || (i
== cpu_id
)) {
923 safe_list
[i
] = PAUSE_WAIT
;
928 * Skip CPU if it is quiesced or not yet started.
930 if ((cp
->cpu_flags
& (CPU_QUIESCED
| CPU_READY
)) != CPU_READY
) {
931 safe_list
[i
] = PAUSE_WAIT
;
936 * Start this CPU's pause thread.
938 t
= cp
->cpu_pause_thread
;
941 * Reset the priority, since nglobpris may have
942 * changed since the thread was created, if someone
943 * has loaded the RT (or some other) scheduling
946 t
->t_pri
= v
.v_nglobpris
- 1;
947 THREAD_TRANSITION(t
);
949 thread_unlock_nopreempt(t
);
957 * Pause all of the CPUs except the one we are on by creating a high
958 * priority thread bound to those CPUs.
960 * Note that one must be extremely careful regarding code
961 * executed while CPUs are paused. Since a CPU may be paused
962 * while a thread scheduling on that CPU is holding an adaptive
963 * lock, code executed with CPUs paused must not acquire adaptive
964 * (or low-level spin) locks. Also, such code must not block,
965 * since the thread that is supposed to initiate the wakeup may
968 * With a few exceptions, the restrictions on code executed with CPUs
969 * paused match those for code executed at high-level interrupt
973 pause_cpus(cpu_t
*off_cp
, void *(*func
)(void *))
975 processorid_t cpu_id
;
977 struct _cpu_pause_info
*cpi
= &cpu_pause_info
;
979 ASSERT(MUTEX_HELD(&cpu_lock
));
980 ASSERT(cpi
->cp_paused
== NULL
);
983 for (i
= 0; i
< NCPU
; i
++)
984 safe_list
[i
] = PAUSE_IDLE
;
990 * If running on the cpu that is going offline, get off it.
991 * This is so that it won't be necessary to rechoose a CPU
995 cpu_id
= off_cp
->cpu_next_part
->cpu_id
;
997 cpu_id
= CPU
->cpu_id
;
998 affinity_set(cpu_id
);
1001 * Start the pause threads and record how many were started
1003 cpi
->cp_count
= cpu_pause_start(cpu_id
);
1006 * Now wait for all CPUs to be running the pause thread.
1008 while (cpi
->cp_count
> 0) {
1010 * Spin reading the count without grabbing the disp
1011 * lock to make sure we don't prevent the pause
1012 * threads from getting the lock.
1014 while (sema_held(&cpi
->cp_sem
))
1016 if (sema_tryp(&cpi
->cp_sem
))
1019 cpi
->cp_go
= 1; /* all have reached cpu_pause */
1022 * Now wait for all CPUs to spl. (Transition from PAUSE_READY
1025 for (i
= 0; i
< NCPU
; i
++) {
1026 while (safe_list
[i
] != PAUSE_WAIT
)
1029 cpi
->cp_spl
= splhigh(); /* block dispatcher on this CPU */
1030 cpi
->cp_paused
= curthread
;
1034 * Check whether the current thread has CPUs paused
1039 if (cpu_pause_info
.cp_paused
!= NULL
) {
1040 ASSERT(cpu_pause_info
.cp_paused
== curthread
);
1047 cpu_get_all(processorid_t cpun
)
1049 ASSERT(MUTEX_HELD(&cpu_lock
));
1051 if (cpun
>= NCPU
|| cpun
< 0 || !CPU_IN_SET(cpu_available
, cpun
))
1057 * Check whether cpun is a valid processor id and whether it should be
1058 * visible from the current zone. If it is, return a pointer to the
1059 * associated CPU structure.
1062 cpu_get(processorid_t cpun
)
1066 ASSERT(MUTEX_HELD(&cpu_lock
));
1067 c
= cpu_get_all(cpun
);
1068 if (c
!= NULL
&& !INGLOBALZONE(curproc
) && pool_pset_enabled() &&
1069 zone_pset_get(curproc
->p_zone
) != cpupart_query_cpu(c
))
1075 * The following functions should be used to check CPU states in the kernel.
1076 * They should be invoked with cpu_lock held. Kernel subsystems interested
1077 * in CPU states should *not* use cpu_get_state() and various P_ONLINE/etc
1078 * states. Those are for user-land (and system call) use only.
1082 * Determine whether the CPU is online and handling interrupts.
1085 cpu_is_online(cpu_t
*cpu
)
1087 ASSERT(MUTEX_HELD(&cpu_lock
));
1088 return (cpu_flagged_online(cpu
->cpu_flags
));
1092 * Determine whether the CPU is offline (this includes spare and faulted).
1095 cpu_is_offline(cpu_t
*cpu
)
1097 ASSERT(MUTEX_HELD(&cpu_lock
));
1098 return (cpu_flagged_offline(cpu
->cpu_flags
));
1102 * Determine whether the CPU is powered off.
1105 cpu_is_poweredoff(cpu_t
*cpu
)
1107 ASSERT(MUTEX_HELD(&cpu_lock
));
1108 return (cpu_flagged_poweredoff(cpu
->cpu_flags
));
1112 * Determine whether the CPU is handling interrupts.
1115 cpu_is_nointr(cpu_t
*cpu
)
1117 ASSERT(MUTEX_HELD(&cpu_lock
));
1118 return (cpu_flagged_nointr(cpu
->cpu_flags
));
1122 * Determine whether the CPU is active (scheduling threads).
1125 cpu_is_active(cpu_t
*cpu
)
1127 ASSERT(MUTEX_HELD(&cpu_lock
));
1128 return (cpu_flagged_active(cpu
->cpu_flags
));
1132 * Same as above, but these require cpu_flags instead of cpu_t pointers.
1135 cpu_flagged_online(cpu_flag_t cpu_flags
)
1137 return (cpu_flagged_active(cpu_flags
) &&
1138 (cpu_flags
& CPU_ENABLE
));
1142 cpu_flagged_offline(cpu_flag_t cpu_flags
)
1144 return (((cpu_flags
& CPU_POWEROFF
) == 0) &&
1145 ((cpu_flags
& (CPU_READY
| CPU_OFFLINE
)) != CPU_READY
));
1149 cpu_flagged_poweredoff(cpu_flag_t cpu_flags
)
1151 return ((cpu_flags
& CPU_POWEROFF
) == CPU_POWEROFF
);
1155 cpu_flagged_nointr(cpu_flag_t cpu_flags
)
1157 return (cpu_flagged_active(cpu_flags
) &&
1158 (cpu_flags
& CPU_ENABLE
) == 0);
1162 cpu_flagged_active(cpu_flag_t cpu_flags
)
1164 return (((cpu_flags
& (CPU_POWEROFF
| CPU_FAULTED
| CPU_SPARE
)) == 0) &&
1165 ((cpu_flags
& (CPU_READY
| CPU_OFFLINE
)) == CPU_READY
));
1169 * Bring the indicated CPU online.
1172 cpu_online(cpu_t
*cp
)
1177 * Handle on-line request.
1178 * This code must put the new CPU on the active list before
1179 * starting it because it will not be paused, and will start
1180 * using the active list immediately. The real start occurs
1181 * when the CPU_QUIESCED flag is turned off.
1184 ASSERT(MUTEX_HELD(&cpu_lock
));
1187 * Put all the cpus into a known safe place.
1188 * No mutexes can be entered while CPUs are paused.
1190 error
= mp_cpu_start(cp
); /* arch-dep hook */
1192 pg_cpupart_in(cp
, cp
->cpu_part
);
1193 pause_cpus(NULL
, NULL
);
1194 cpu_add_active_internal(cp
);
1195 if (cp
->cpu_flags
& CPU_FAULTED
) {
1196 cp
->cpu_flags
&= ~CPU_FAULTED
;
1197 mp_cpu_faulted_exit(cp
);
1199 cp
->cpu_flags
&= ~(CPU_QUIESCED
| CPU_OFFLINE
| CPU_FROZEN
|
1201 CPU_NEW_GENERATION(cp
);
1203 cpu_stats_kstat_create(cp
);
1204 cpu_create_intrstat(cp
);
1205 lgrp_kstat_create(cp
);
1206 cpu_state_change_notify(cp
->cpu_id
, CPU_ON
);
1207 cpu_intr_enable(cp
); /* arch-dep hook */
1208 cpu_state_change_notify(cp
->cpu_id
, CPU_INTR_ON
);
1212 * This has to be called only after cyclic_online(). This
1213 * function uses cyclics.
1215 callout_cpu_online(cp
);
1216 poke_cpu(cp
->cpu_id
);
1223 * Take the indicated CPU offline.
1226 cpu_offline(cpu_t
*cp
, int flags
)
1233 int callout_off
= 0;
1236 int (*bound_func
)(struct cpu
*, int);
1241 boolean_t unbind_all_threads
= (flags
& CPU_FORCED
) != 0;
1243 ASSERT(MUTEX_HELD(&cpu_lock
));
1246 * If we're going from faulted or spare to offline, just
1247 * clear these flags and update CPU state.
1249 if (cp
->cpu_flags
& (CPU_FAULTED
| CPU_SPARE
)) {
1250 if (cp
->cpu_flags
& CPU_FAULTED
) {
1251 cp
->cpu_flags
&= ~CPU_FAULTED
;
1252 mp_cpu_faulted_exit(cp
);
1254 cp
->cpu_flags
&= ~CPU_SPARE
;
1260 * Handle off-line request.
1264 * Don't offline last online CPU in partition
1266 if (ncpus_online
<= 1 || pp
->cp_ncpus
<= 1 || cpu_intr_count(cp
) < 2)
1269 * Unbind all soft-bound threads bound to our CPU and hard bound threads
1270 * if we were asked to.
1272 error
= cpu_unbind(cp
->cpu_id
, unbind_all_threads
);
1276 * We shouldn't be bound to this CPU ourselves.
1278 if (curthread
->t_bound_cpu
== cp
)
1282 * Tell interested parties that this CPU is going offline.
1284 CPU_NEW_GENERATION(cp
);
1285 cpu_state_change_notify(cp
->cpu_id
, CPU_OFF
);
1288 * Tell the PG subsystem that the CPU is leaving the partition
1290 pg_cpupart_out(cp
, pp
);
1293 * Take the CPU out of interrupt participation so we won't find
1294 * bound kernel threads. If the architecture cannot completely
1295 * shut off interrupts on the CPU, don't quiesce it, but don't
1296 * run anything but interrupt thread... this is indicated by
1297 * the CPU_OFFLINE flag being on but the CPU_QUIESCE flag being
1300 intr_enable
= cp
->cpu_flags
& CPU_ENABLE
;
1302 no_quiesce
= cpu_intr_disable(cp
);
1305 * Record that we are aiming to offline this cpu. This acts as
1306 * a barrier to further weak binding requests in thread_nomigrate
1307 * and also causes cpu_choose, disp_lowpri_cpu and setfrontdq to
1308 * lean away from this cpu. Further strong bindings are already
1309 * avoided since we hold cpu_lock. Since threads that are set
1310 * runnable around now and others coming off the target cpu are
1311 * directed away from the target, existing strong and weak bindings
1312 * (especially the latter) to the target cpu stand maximum chance of
1313 * being able to unbind during the short delay loop below (if other
1314 * unbound threads compete they may not see cpu in time to unbind
1315 * even if they would do so immediately.
1321 * Check for kernel threads (strong or weak) bound to that CPU.
1322 * Strongly bound threads may not unbind, and we'll have to return
1323 * EBUSY. Weakly bound threads should always disappear - we've
1324 * stopped more weak binding with cpu_inmotion and existing
1325 * bindings will drain imminently (they may not block). Nonetheless
1326 * we will wait for a fixed period for all bound threads to disappear.
1327 * Inactive interrupt threads are OK (they'll be in TS_FREE
1328 * state). If test finds some bound threads, wait a few ticks
1329 * to give short-lived threads (such as interrupts) chance to
1330 * complete. Note that if no_quiesce is set, i.e. this cpu
1331 * is required to service interrupts, then we take the route
1332 * that permits interrupt threads to be active (or bypassed).
1334 bound_func
= no_quiesce
? disp_bound_threads
: disp_bound_anythreads
;
1336 again
: for (loop_count
= 0; (*bound_func
)(cp
, 0); loop_count
++) {
1337 if (loop_count
>= 5) {
1338 error
= EBUSY
; /* some threads still bound */
1343 * If some threads were assigned, give them
1344 * a chance to complete or move.
1346 * This assumes that the clock_thread is not bound
1347 * to any CPU, because the clock_thread is needed to
1348 * do the delay(hz/100).
1350 * Note: we still hold the cpu_lock while waiting for
1351 * the next clock tick. This is OK since it isn't
1352 * needed for anything else except processor_bind(2),
1353 * and system initialization. If we drop the lock,
1354 * we would risk another p_online disabling the last
1360 if (error
== 0 && callout_off
== 0) {
1361 callout_cpu_offline(cp
);
1365 if (error
== 0 && cyclic_off
== 0) {
1366 if (!cyclic_offline(cp
)) {
1368 * We must have bound cyclics...
1377 * Call mp_cpu_stop() to perform any special operations
1378 * needed for this machine architecture to offline a CPU.
1381 error
= mp_cpu_stop(cp
); /* arch-dep hook */
1384 * If that all worked, take the CPU offline and decrement
1389 * Put all the cpus into a known safe place.
1390 * No mutexes can be entered while CPUs are paused.
1392 pause_cpus(cp
, NULL
);
1394 * Repeat the operation, if necessary, to make sure that
1395 * all outstanding low-level interrupts run to completion
1396 * before we set the CPU_QUIESCED flag. It's also possible
1397 * that a thread has weak bound to the cpu despite our raising
1398 * cpu_inmotion above since it may have loaded that
1399 * value before the barrier became visible (this would have
1400 * to be the thread that was on the target cpu at the time
1401 * we raised the barrier).
1403 if ((!no_quiesce
&& cp
->cpu_intr_actv
!= 0) ||
1404 (*bound_func
)(cp
, 1)) {
1406 (void) mp_cpu_start(cp
);
1409 ncp
= cp
->cpu_next_part
;
1410 cpu_lpl
= cp
->cpu_lpl
;
1411 ASSERT(cpu_lpl
!= NULL
);
1414 * Remove the CPU from the list of active CPUs.
1416 cpu_remove_active(cp
);
1419 * Walk the active process list and look for threads
1420 * whose home lgroup needs to be updated, or
1421 * the last CPU they run on is the one being offlined now.
1424 ASSERT(curthread
->t_cpu
!= cp
);
1425 for (p
= practive
; p
!= NULL
; p
= p
->p_next
) {
1435 ASSERT(t
->t_lpl
!= NULL
);
1437 * Taking last CPU in lpl offline
1438 * Rehome thread if it is in this lpl
1439 * Otherwise, update the count of how many
1440 * threads are in this CPU's lgroup but have
1444 if (cpu_lpl
->lpl_ncpu
== 0) {
1445 if (t
->t_lpl
== cpu_lpl
)
1449 else if (t
->t_lpl
->lpl_lgrpid
==
1450 cpu_lpl
->lpl_lgrpid
)
1453 ASSERT(t
->t_lpl
->lpl_ncpu
> 0);
1456 * Update CPU last ran on if it was this CPU
1458 if (t
->t_cpu
== cp
&& t
->t_bound_cpu
!= cp
)
1459 t
->t_cpu
= disp_lowpri_cpu(ncp
,
1460 t
->t_lpl
, t
->t_pri
, NULL
);
1461 ASSERT(t
->t_cpu
!= cp
|| t
->t_bound_cpu
== cp
||
1462 t
->t_weakbound_cpu
== cp
);
1465 } while (t
!= p
->p_tlist
);
1468 * Didn't find any threads in the same lgroup as this
1469 * CPU with a different lpl, so remove the lgroup from
1470 * the process lgroup bitmask.
1473 if (lgrp_diff_lpl
== 0)
1474 klgrpset_del(p
->p_lgrpset
, cpu_lpl
->lpl_lgrpid
);
1478 * Walk thread list looking for threads that need to be
1479 * rehomed, since there are some threads that are not in
1480 * their process's p_tlist.
1485 ASSERT(t
!= NULL
&& t
->t_lpl
!= NULL
);
1488 * Rehome threads with same lpl as this CPU when this
1489 * is the last CPU in the lpl.
1492 if ((cpu_lpl
->lpl_ncpu
== 0) && (t
->t_lpl
== cpu_lpl
))
1494 lgrp_choose(t
, t
->t_cpupart
), 1);
1496 ASSERT(t
->t_lpl
->lpl_ncpu
> 0);
1499 * Update CPU last ran on if it was this CPU
1502 if (t
->t_cpu
== cp
&& t
->t_bound_cpu
!= cp
) {
1503 t
->t_cpu
= disp_lowpri_cpu(ncp
,
1504 t
->t_lpl
, t
->t_pri
, NULL
);
1506 ASSERT(t
->t_cpu
!= cp
|| t
->t_bound_cpu
== cp
||
1507 t
->t_weakbound_cpu
== cp
);
1510 } while (t
!= curthread
);
1511 ASSERT((cp
->cpu_flags
& (CPU_FAULTED
| CPU_SPARE
)) == 0);
1512 cp
->cpu_flags
|= CPU_OFFLINE
;
1513 disp_cpu_inactive(cp
);
1515 cp
->cpu_flags
|= CPU_QUIESCED
;
1518 cpu_inmotion
= NULL
;
1520 cpu_stats_kstat_destroy(cp
);
1521 cpu_delete_intrstat(cp
);
1522 lgrp_kstat_destroy(cp
);
1526 cpu_inmotion
= NULL
;
1529 * If we failed, re-enable interrupts.
1530 * Do this even if cpu_intr_disable returned an error, because
1531 * it may have partially disabled interrupts.
1533 if (error
&& intr_enable
)
1534 cpu_intr_enable(cp
);
1537 * If we failed, but managed to offline the cyclic subsystem on this
1538 * CPU, bring it back online.
1540 if (error
&& cyclic_off
)
1544 * If we failed, but managed to offline callouts on this CPU,
1545 * bring it back online.
1547 if (error
&& callout_off
)
1548 callout_cpu_online(cp
);
1551 * If we failed, tell the PG subsystem that the CPU is back
1553 pg_cpupart_in(cp
, pp
);
1556 * If we failed, we need to notify everyone that this CPU is back on.
1559 CPU_NEW_GENERATION(cp
);
1560 cpu_state_change_notify(cp
->cpu_id
, CPU_ON
);
1561 cpu_state_change_notify(cp
->cpu_id
, CPU_INTR_ON
);
1568 * Mark the indicated CPU as faulted, taking it offline.
1571 cpu_faulted(cpu_t
*cp
, int flags
)
1575 ASSERT(MUTEX_HELD(&cpu_lock
));
1576 ASSERT(!cpu_is_poweredoff(cp
));
1578 if (cpu_is_offline(cp
)) {
1579 cp
->cpu_flags
&= ~CPU_SPARE
;
1580 cp
->cpu_flags
|= CPU_FAULTED
;
1581 mp_cpu_faulted_enter(cp
);
1586 if ((error
= cpu_offline(cp
, flags
)) == 0) {
1587 cp
->cpu_flags
|= CPU_FAULTED
;
1588 mp_cpu_faulted_enter(cp
);
1596 * Mark the indicated CPU as a spare, taking it offline.
1599 cpu_spare(cpu_t
*cp
, int flags
)
1603 ASSERT(MUTEX_HELD(&cpu_lock
));
1604 ASSERT(!cpu_is_poweredoff(cp
));
1606 if (cpu_is_offline(cp
)) {
1607 if (cp
->cpu_flags
& CPU_FAULTED
) {
1608 cp
->cpu_flags
&= ~CPU_FAULTED
;
1609 mp_cpu_faulted_exit(cp
);
1611 cp
->cpu_flags
|= CPU_SPARE
;
1616 if ((error
= cpu_offline(cp
, flags
)) == 0) {
1617 cp
->cpu_flags
|= CPU_SPARE
;
1625 * Take the indicated CPU from poweroff to offline.
1628 cpu_poweron(cpu_t
*cp
)
1630 int error
= ENOTSUP
;
1632 ASSERT(MUTEX_HELD(&cpu_lock
));
1633 ASSERT(cpu_is_poweredoff(cp
));
1635 error
= mp_cpu_poweron(cp
); /* arch-dep hook */
1643 * Take the indicated CPU from any inactive state to powered off.
1646 cpu_poweroff(cpu_t
*cp
)
1648 int error
= ENOTSUP
;
1650 ASSERT(MUTEX_HELD(&cpu_lock
));
1651 ASSERT(cpu_is_offline(cp
));
1653 if (!(cp
->cpu_flags
& CPU_QUIESCED
))
1654 return (EBUSY
); /* not completely idle */
1656 error
= mp_cpu_poweroff(cp
); /* arch-dep hook */
1664 * Initialize the Sequential CPU id lookup table
1671 tbl
= kmem_zalloc(sizeof (struct cpu
*) * max_ncpus
, KM_SLEEP
);
1678 * Initialize the CPU lists for the first CPU.
1681 cpu_list_init(cpu_t
*cp
)
1686 clock_cpu_list
= cp
;
1688 cp
->cpu_next_onln
= cp
;
1689 cp
->cpu_prev_onln
= cp
;
1693 CPUSET_ADD(cpu_seqid_inuse
, 0);
1696 * Bootstrap cpu_seq using cpu_list
1697 * The cpu_seq[] table will be dynamically allocated
1698 * when kmem later becomes available (but before going MP)
1700 cpu_seq
= &cpu_list
;
1702 cp
->cpu_cache_offset
= KMEM_CPU_CACHE_OFFSET(cp
->cpu_seqid
);
1703 cp_default
.cp_cpulist
= cp
;
1704 cp_default
.cp_ncpus
= 1;
1705 cp
->cpu_next_part
= cp
;
1706 cp
->cpu_prev_part
= cp
;
1707 cp
->cpu_part
= &cp_default
;
1709 CPUSET_ADD(cpu_available
, cp
->cpu_id
);
1713 * Insert a CPU into the list of available CPUs.
1716 cpu_add_unit(cpu_t
*cp
)
1720 ASSERT(MUTEX_HELD(&cpu_lock
));
1721 ASSERT(cpu_list
!= NULL
); /* list started in cpu_list_init */
1723 lgrp_config(LGRP_CONFIG_CPU_ADD
, (uintptr_t)cp
, 0);
1726 * Note: most users of the cpu_list will grab the
1727 * cpu_lock to insure that it isn't modified. However,
1728 * certain users can't or won't do that. To allow this
1729 * we pause the other cpus. Users who walk the list
1730 * without cpu_lock, must disable kernel preemption
1731 * to insure that the list isn't modified underneath
1732 * them. Also, any cached pointers to cpu structures
1733 * must be revalidated by checking to see if the
1734 * cpu_next pointer points to itself. This check must
1735 * be done with the cpu_lock held or kernel preemption
1736 * disabled. This check relies upon the fact that
1737 * old cpu structures are not free'ed or cleared after
1738 * then are removed from the cpu_list.
1740 * Note that the clock code walks the cpu list dereferencing
1741 * the cpu_part pointer, so we need to initialize it before
1742 * adding the cpu to the list.
1744 cp
->cpu_part
= &cp_default
;
1745 pause_cpus(NULL
, NULL
);
1746 cp
->cpu_next
= cpu_list
;
1747 cp
->cpu_prev
= cpu_list
->cpu_prev
;
1748 cpu_list
->cpu_prev
->cpu_next
= cp
;
1749 cpu_list
->cpu_prev
= cp
;
1752 for (seqid
= 0; CPU_IN_SET(cpu_seqid_inuse
, seqid
); seqid
++)
1754 CPUSET_ADD(cpu_seqid_inuse
, seqid
);
1755 cp
->cpu_seqid
= seqid
;
1757 if (seqid
> max_cpu_seqid_ever
)
1758 max_cpu_seqid_ever
= seqid
;
1760 ASSERT(ncpus
< max_ncpus
);
1762 cp
->cpu_cache_offset
= KMEM_CPU_CACHE_OFFSET(cp
->cpu_seqid
);
1763 cpu
[cp
->cpu_id
] = cp
;
1764 CPUSET_ADD(cpu_available
, cp
->cpu_id
);
1765 cpu_seq
[cp
->cpu_seqid
] = cp
;
1768 * allocate a pause thread for this CPU.
1770 cpu_pause_alloc(cp
);
1773 * So that new CPUs won't have NULL prev_onln and next_onln pointers,
1774 * link them into a list of just that CPU.
1775 * This is so that disp_lowpri_cpu will work for thread_create in
1776 * pause_cpus() when called from the startup thread in a new CPU.
1778 cp
->cpu_next_onln
= cp
;
1779 cp
->cpu_prev_onln
= cp
;
1780 cpu_info_kstat_create(cp
);
1781 cp
->cpu_next_part
= cp
;
1782 cp
->cpu_prev_part
= cp
;
1784 init_cpu_mstate(cp
, CMS_SYSTEM
);
1786 pool_pset_mod
= gethrtime();
1790 * Do the opposite of cpu_add_unit().
1793 cpu_del_unit(int cpuid
)
1795 struct cpu
*cp
, *cpnext
;
1797 ASSERT(MUTEX_HELD(&cpu_lock
));
1801 ASSERT(cp
->cpu_next_onln
== cp
);
1802 ASSERT(cp
->cpu_prev_onln
== cp
);
1803 ASSERT(cp
->cpu_next_part
== cp
);
1804 ASSERT(cp
->cpu_prev_part
== cp
);
1807 * Tear down the CPU's physical ID cache, and update any
1810 pg_cpu_fini(cp
, NULL
);
1811 pghw_physid_destroy(cp
);
1814 * Destroy kstat stuff.
1816 cpu_info_kstat_destroy(cp
);
1817 term_cpu_mstate(cp
);
1819 * Free up pause thread.
1822 CPUSET_DEL(cpu_available
, cp
->cpu_id
);
1823 cpu
[cp
->cpu_id
] = NULL
;
1824 cpu_seq
[cp
->cpu_seqid
] = NULL
;
1827 * The clock thread and mutex_vector_enter cannot hold the
1828 * cpu_lock while traversing the cpu list, therefore we pause
1829 * all other threads by pausing the other cpus. These, and any
1830 * other routines holding cpu pointers while possibly sleeping
1831 * must be sure to call kpreempt_disable before processing the
1832 * list and be sure to check that the cpu has not been deleted
1833 * after any sleeps (check cp->cpu_next != NULL). We guarantee
1834 * to keep the deleted cpu structure around.
1836 * Note that this MUST be done AFTER cpu_available
1837 * has been updated so that we don't waste time
1838 * trying to pause the cpu we're trying to delete.
1840 pause_cpus(NULL
, NULL
);
1842 cpnext
= cp
->cpu_next
;
1843 cp
->cpu_prev
->cpu_next
= cp
->cpu_next
;
1844 cp
->cpu_next
->cpu_prev
= cp
->cpu_prev
;
1849 * Signals that the cpu has been deleted (see above).
1851 cp
->cpu_next
= NULL
;
1852 cp
->cpu_prev
= NULL
;
1856 CPUSET_DEL(cpu_seqid_inuse
, cp
->cpu_seqid
);
1858 lgrp_config(LGRP_CONFIG_CPU_DEL
, (uintptr_t)cp
, 0);
1860 pool_pset_mod
= gethrtime();
1864 * Add a CPU to the list of active CPUs.
1865 * This routine must not get any locks, because other CPUs are paused.
1868 cpu_add_active_internal(cpu_t
*cp
)
1870 cpupart_t
*pp
= cp
->cpu_part
;
1872 ASSERT(MUTEX_HELD(&cpu_lock
));
1873 ASSERT(cpu_list
!= NULL
); /* list started in cpu_list_init */
1877 cp
->cpu_next_onln
= cpu_active
;
1878 cp
->cpu_prev_onln
= cpu_active
->cpu_prev_onln
;
1879 cpu_active
->cpu_prev_onln
->cpu_next_onln
= cp
;
1880 cpu_active
->cpu_prev_onln
= cp
;
1882 if (pp
->cp_cpulist
) {
1883 cp
->cpu_next_part
= pp
->cp_cpulist
;
1884 cp
->cpu_prev_part
= pp
->cp_cpulist
->cpu_prev_part
;
1885 pp
->cp_cpulist
->cpu_prev_part
->cpu_next_part
= cp
;
1886 pp
->cp_cpulist
->cpu_prev_part
= cp
;
1888 ASSERT(pp
->cp_ncpus
== 0);
1889 pp
->cp_cpulist
= cp
->cpu_next_part
= cp
->cpu_prev_part
= cp
;
1892 if (pp
->cp_ncpus
== 1) {
1893 cp_numparts_nonempty
++;
1894 ASSERT(cp_numparts_nonempty
!= 0);
1898 lgrp_config(LGRP_CONFIG_CPU_ONLINE
, (uintptr_t)cp
, 0);
1900 bzero(&cp
->cpu_loadavg
, sizeof (cp
->cpu_loadavg
));
1904 * Add a CPU to the list of active CPUs.
1905 * This is called from machine-dependent layers when a new CPU is started.
1908 cpu_add_active(cpu_t
*cp
)
1910 pg_cpupart_in(cp
, cp
->cpu_part
);
1912 pause_cpus(NULL
, NULL
);
1913 cpu_add_active_internal(cp
);
1916 cpu_stats_kstat_create(cp
);
1917 cpu_create_intrstat(cp
);
1918 lgrp_kstat_create(cp
);
1919 cpu_state_change_notify(cp
->cpu_id
, CPU_INIT
);
1924 * Remove a CPU from the list of active CPUs.
1925 * This routine must not get any locks, because other CPUs are paused.
1929 cpu_remove_active(cpu_t
*cp
)
1931 cpupart_t
*pp
= cp
->cpu_part
;
1933 ASSERT(MUTEX_HELD(&cpu_lock
));
1934 ASSERT(cp
->cpu_next_onln
!= cp
); /* not the last one */
1935 ASSERT(cp
->cpu_prev_onln
!= cp
); /* not the last one */
1937 pg_cpu_inactive(cp
);
1939 lgrp_config(LGRP_CONFIG_CPU_OFFLINE
, (uintptr_t)cp
, 0);
1941 if (cp
== clock_cpu_list
)
1942 clock_cpu_list
= cp
->cpu_next_onln
;
1944 cp
->cpu_prev_onln
->cpu_next_onln
= cp
->cpu_next_onln
;
1945 cp
->cpu_next_onln
->cpu_prev_onln
= cp
->cpu_prev_onln
;
1946 if (cpu_active
== cp
) {
1947 cpu_active
= cp
->cpu_next_onln
;
1949 cp
->cpu_next_onln
= cp
;
1950 cp
->cpu_prev_onln
= cp
;
1952 cp
->cpu_prev_part
->cpu_next_part
= cp
->cpu_next_part
;
1953 cp
->cpu_next_part
->cpu_prev_part
= cp
->cpu_prev_part
;
1954 if (pp
->cp_cpulist
== cp
) {
1955 pp
->cp_cpulist
= cp
->cpu_next_part
;
1956 ASSERT(pp
->cp_cpulist
!= cp
);
1958 cp
->cpu_next_part
= cp
;
1959 cp
->cpu_prev_part
= cp
;
1961 if (pp
->cp_ncpus
== 0) {
1962 cp_numparts_nonempty
--;
1963 ASSERT(cp_numparts_nonempty
!= 0);
1968 * Routine used to setup a newly inserted CPU in preparation for starting
1972 cpu_configure(int cpuid
)
1976 ASSERT(MUTEX_HELD(&cpu_lock
));
1979 * Some structures are statically allocated based upon
1980 * the maximum number of cpus the system supports. Do not
1981 * try to add anything beyond this limit.
1983 if (cpuid
< 0 || cpuid
>= NCPU
) {
1987 if ((cpu
[cpuid
] != NULL
) && (cpu
[cpuid
]->cpu_flags
!= 0)) {
1991 if ((retval
= mp_cpu_configure(cpuid
)) != 0) {
1995 cpu
[cpuid
]->cpu_flags
= CPU_QUIESCED
| CPU_OFFLINE
| CPU_POWEROFF
;
1996 cpu_set_state(cpu
[cpuid
]);
1997 retval
= cpu_state_change_hooks(cpuid
, CPU_CONFIG
, CPU_UNCONFIG
);
1999 (void) mp_cpu_unconfigure(cpuid
);
2005 * Routine used to cleanup a CPU that has been powered off. This will
2006 * destroy all per-cpu information related to this cpu.
2009 cpu_unconfigure(int cpuid
)
2013 ASSERT(MUTEX_HELD(&cpu_lock
));
2015 if (cpu
[cpuid
] == NULL
) {
2019 if (cpu
[cpuid
]->cpu_flags
== 0) {
2023 if ((cpu
[cpuid
]->cpu_flags
& CPU_POWEROFF
) == 0) {
2027 if (cpu
[cpuid
]->cpu_props
!= NULL
) {
2028 (void) nvlist_free(cpu
[cpuid
]->cpu_props
);
2029 cpu
[cpuid
]->cpu_props
= NULL
;
2032 error
= cpu_state_change_hooks(cpuid
, CPU_UNCONFIG
, CPU_CONFIG
);
2037 return (mp_cpu_unconfigure(cpuid
));
2041 * Routines for registering and de-registering cpu_setup callback functions.
2044 * These routines must not be called from a driver's attach(9E) or
2045 * detach(9E) entry point.
2047 * NOTE: CPU callbacks should not block. They are called with cpu_lock held.
2051 * Ideally, these would be dynamically allocated and put into a linked
2052 * list; however that is not feasible because the registration routine
2053 * has to be available before the kmem allocator is working (in fact,
2054 * it is called by the kmem allocator init code). In any case, there
2055 * are quite a few extra entries for future users.
2057 #define NCPU_SETUPS 20
2060 cpu_setup_func_t
*func
;
2062 } cpu_setups
[NCPU_SETUPS
];
2065 register_cpu_setup_func(cpu_setup_func_t
*func
, void *arg
)
2069 ASSERT(MUTEX_HELD(&cpu_lock
));
2071 for (i
= 0; i
< NCPU_SETUPS
; i
++)
2072 if (cpu_setups
[i
].func
== NULL
)
2074 if (i
>= NCPU_SETUPS
)
2075 cmn_err(CE_PANIC
, "Ran out of cpu_setup callback entries");
2077 cpu_setups
[i
].func
= func
;
2078 cpu_setups
[i
].arg
= arg
;
2082 unregister_cpu_setup_func(cpu_setup_func_t
*func
, void *arg
)
2086 ASSERT(MUTEX_HELD(&cpu_lock
));
2088 for (i
= 0; i
< NCPU_SETUPS
; i
++)
2089 if ((cpu_setups
[i
].func
== func
) &&
2090 (cpu_setups
[i
].arg
== arg
))
2092 if (i
>= NCPU_SETUPS
)
2093 cmn_err(CE_PANIC
, "Could not find cpu_setup callback to "
2096 cpu_setups
[i
].func
= NULL
;
2097 cpu_setups
[i
].arg
= 0;
2101 * Call any state change hooks for this CPU, ignore any errors.
2104 cpu_state_change_notify(int id
, cpu_setup_t what
)
2108 ASSERT(MUTEX_HELD(&cpu_lock
));
2110 for (i
= 0; i
< NCPU_SETUPS
; i
++) {
2111 if (cpu_setups
[i
].func
!= NULL
) {
2112 cpu_setups
[i
].func(what
, id
, cpu_setups
[i
].arg
);
2118 * Call any state change hooks for this CPU, undo it if error found.
2121 cpu_state_change_hooks(int id
, cpu_setup_t what
, cpu_setup_t undo
)
2126 ASSERT(MUTEX_HELD(&cpu_lock
));
2128 for (i
= 0; i
< NCPU_SETUPS
; i
++) {
2129 if (cpu_setups
[i
].func
!= NULL
) {
2130 retval
= cpu_setups
[i
].func(what
, id
,
2133 for (i
--; i
>= 0; i
--) {
2134 if (cpu_setups
[i
].func
!= NULL
)
2135 cpu_setups
[i
].func(undo
,
2136 id
, cpu_setups
[i
].arg
);
2146 * Export information about this CPU via the kstat mechanism.
2149 kstat_named_t ci_state
;
2150 kstat_named_t ci_state_begin
;
2151 kstat_named_t ci_cpu_type
;
2152 kstat_named_t ci_fpu_type
;
2153 kstat_named_t ci_clock_MHz
;
2154 kstat_named_t ci_chip_id
;
2155 kstat_named_t ci_implementation
;
2156 kstat_named_t ci_brandstr
;
2157 kstat_named_t ci_core_id
;
2158 kstat_named_t ci_curr_clock_Hz
;
2159 kstat_named_t ci_supp_freq_Hz
;
2160 kstat_named_t ci_pg_id
;
2162 kstat_named_t ci_vendorstr
;
2163 kstat_named_t ci_family
;
2164 kstat_named_t ci_model
;
2165 kstat_named_t ci_step
;
2166 kstat_named_t ci_clogid
;
2167 kstat_named_t ci_pkg_core_id
;
2168 kstat_named_t ci_ncpuperchip
;
2169 kstat_named_t ci_ncoreperchip
;
2170 kstat_named_t ci_max_cstates
;
2171 kstat_named_t ci_curr_cstate
;
2172 kstat_named_t ci_cacheid
;
2173 kstat_named_t ci_sktstr
;
2175 } cpu_info_template
= {
2176 { "state", KSTAT_DATA_CHAR
},
2177 { "state_begin", KSTAT_DATA_LONG
},
2178 { "cpu_type", KSTAT_DATA_CHAR
},
2179 { "fpu_type", KSTAT_DATA_CHAR
},
2180 { "clock_MHz", KSTAT_DATA_LONG
},
2181 { "chip_id", KSTAT_DATA_LONG
},
2182 { "implementation", KSTAT_DATA_STRING
},
2183 { "brand", KSTAT_DATA_STRING
},
2184 { "core_id", KSTAT_DATA_LONG
},
2185 { "current_clock_Hz", KSTAT_DATA_UINT64
},
2186 { "supported_frequencies_Hz", KSTAT_DATA_STRING
},
2187 { "pg_id", KSTAT_DATA_LONG
},
2189 { "vendor_id", KSTAT_DATA_STRING
},
2190 { "family", KSTAT_DATA_INT32
},
2191 { "model", KSTAT_DATA_INT32
},
2192 { "stepping", KSTAT_DATA_INT32
},
2193 { "clog_id", KSTAT_DATA_INT32
},
2194 { "pkg_core_id", KSTAT_DATA_LONG
},
2195 { "ncpu_per_chip", KSTAT_DATA_INT32
},
2196 { "ncore_per_chip", KSTAT_DATA_INT32
},
2197 { "supported_max_cstates", KSTAT_DATA_INT32
},
2198 { "current_cstate", KSTAT_DATA_INT32
},
2199 { "cache_id", KSTAT_DATA_INT32
},
2200 { "socket_type", KSTAT_DATA_STRING
},
2204 static kmutex_t cpu_info_template_lock
;
2207 cpu_info_kstat_update(kstat_t
*ksp
, int rw
)
2209 cpu_t
*cp
= ksp
->ks_private
;
2210 const char *pi_state
;
2212 if (rw
== KSTAT_WRITE
)
2216 /* Is the cpu still initialising itself? */
2217 if (cpuid_checkpass(cp
, 1) == 0)
2220 switch (cp
->cpu_type_info
.pi_state
) {
2222 pi_state
= PS_ONLINE
;
2225 pi_state
= PS_POWEROFF
;
2228 pi_state
= PS_NOINTR
;
2231 pi_state
= PS_FAULTED
;
2234 pi_state
= PS_SPARE
;
2237 pi_state
= PS_OFFLINE
;
2240 pi_state
= "unknown";
2242 (void) strcpy(cpu_info_template
.ci_state
.value
.c
, pi_state
);
2243 cpu_info_template
.ci_state_begin
.value
.l
= cp
->cpu_state_begin
;
2244 (void) strncpy(cpu_info_template
.ci_cpu_type
.value
.c
,
2245 cp
->cpu_type_info
.pi_processor_type
, 15);
2246 (void) strncpy(cpu_info_template
.ci_fpu_type
.value
.c
,
2247 cp
->cpu_type_info
.pi_fputypes
, 15);
2248 cpu_info_template
.ci_clock_MHz
.value
.l
= cp
->cpu_type_info
.pi_clock
;
2249 cpu_info_template
.ci_chip_id
.value
.l
=
2250 pg_plat_hw_instance_id(cp
, PGHW_CHIP
);
2251 kstat_named_setstr(&cpu_info_template
.ci_implementation
,
2253 kstat_named_setstr(&cpu_info_template
.ci_brandstr
, cp
->cpu_brandstr
);
2254 cpu_info_template
.ci_core_id
.value
.l
= pg_plat_get_core_id(cp
);
2255 cpu_info_template
.ci_curr_clock_Hz
.value
.ui64
=
2257 cpu_info_template
.ci_pg_id
.value
.l
=
2258 cp
->cpu_pg
&& cp
->cpu_pg
->cmt_lineage
?
2259 cp
->cpu_pg
->cmt_lineage
->pg_id
: -1;
2260 kstat_named_setstr(&cpu_info_template
.ci_supp_freq_Hz
,
2261 cp
->cpu_supp_freqs
);
2263 kstat_named_setstr(&cpu_info_template
.ci_vendorstr
,
2264 cpuid_getvendorstr(cp
));
2265 cpu_info_template
.ci_family
.value
.l
= cpuid_getfamily(cp
);
2266 cpu_info_template
.ci_model
.value
.l
= cpuid_getmodel(cp
);
2267 cpu_info_template
.ci_step
.value
.l
= cpuid_getstep(cp
);
2268 cpu_info_template
.ci_clogid
.value
.l
= cpuid_get_clogid(cp
);
2269 cpu_info_template
.ci_ncpuperchip
.value
.l
= cpuid_get_ncpu_per_chip(cp
);
2270 cpu_info_template
.ci_ncoreperchip
.value
.l
=
2271 cpuid_get_ncore_per_chip(cp
);
2272 cpu_info_template
.ci_pkg_core_id
.value
.l
= cpuid_get_pkgcoreid(cp
);
2273 cpu_info_template
.ci_max_cstates
.value
.l
= cp
->cpu_m
.max_cstates
;
2274 cpu_info_template
.ci_curr_cstate
.value
.l
= cpu_idle_get_cpu_state(cp
);
2275 cpu_info_template
.ci_cacheid
.value
.i32
= cpuid_get_cacheid(cp
);
2276 kstat_named_setstr(&cpu_info_template
.ci_sktstr
,
2277 cpuid_getsocketstr(cp
));
2284 cpu_info_kstat_create(cpu_t
*cp
)
2288 ASSERT(MUTEX_HELD(&cpu_lock
));
2290 if (pool_pset_enabled())
2291 zoneid
= GLOBAL_ZONEID
;
2294 if ((cp
->cpu_info_kstat
= kstat_create_zone("cpu_info", cp
->cpu_id
,
2295 NULL
, "misc", KSTAT_TYPE_NAMED
,
2296 sizeof (cpu_info_template
) / sizeof (kstat_named_t
),
2297 KSTAT_FLAG_VIRTUAL
| KSTAT_FLAG_VAR_SIZE
, zoneid
)) != NULL
) {
2298 cp
->cpu_info_kstat
->ks_data_size
+= 2 * CPU_IDSTRLEN
;
2300 cp
->cpu_info_kstat
->ks_data_size
+= X86_VENDOR_STRLEN
;
2302 if (cp
->cpu_supp_freqs
!= NULL
)
2303 cp
->cpu_info_kstat
->ks_data_size
+=
2304 strlen(cp
->cpu_supp_freqs
) + 1;
2305 cp
->cpu_info_kstat
->ks_lock
= &cpu_info_template_lock
;
2306 cp
->cpu_info_kstat
->ks_data
= &cpu_info_template
;
2307 cp
->cpu_info_kstat
->ks_private
= cp
;
2308 cp
->cpu_info_kstat
->ks_update
= cpu_info_kstat_update
;
2309 kstat_install(cp
->cpu_info_kstat
);
2314 cpu_info_kstat_destroy(cpu_t
*cp
)
2316 ASSERT(MUTEX_HELD(&cpu_lock
));
2318 kstat_delete(cp
->cpu_info_kstat
);
2319 cp
->cpu_info_kstat
= NULL
;
2323 * Create and install kstats for the boot CPU.
2326 cpu_kstat_init(cpu_t
*cp
)
2328 mutex_enter(&cpu_lock
);
2329 cpu_info_kstat_create(cp
);
2330 cpu_stats_kstat_create(cp
);
2331 cpu_create_intrstat(cp
);
2333 mutex_exit(&cpu_lock
);
2337 * Make visible to the zone that subset of the cpu information that would be
2338 * initialized when a cpu is configured (but still offline).
2341 cpu_visibility_configure(cpu_t
*cp
, zone_t
*zone
)
2343 zoneid_t zoneid
= zone
? zone
->zone_id
: ALL_ZONES
;
2345 ASSERT(MUTEX_HELD(&cpu_lock
));
2346 ASSERT(pool_pset_enabled());
2349 if (zoneid
!= ALL_ZONES
&& zoneid
!= GLOBAL_ZONEID
) {
2351 ASSERT(zone
->zone_ncpus
<= ncpus
);
2353 if (cp
->cpu_info_kstat
!= NULL
)
2354 kstat_zone_add(cp
->cpu_info_kstat
, zoneid
);
2358 * Make visible to the zone that subset of the cpu information that would be
2359 * initialized when a previously configured cpu is onlined.
2362 cpu_visibility_online(cpu_t
*cp
, zone_t
*zone
)
2365 char name
[sizeof ("cpu_stat") + 10]; /* enough for 32-bit cpuids */
2366 zoneid_t zoneid
= zone
? zone
->zone_id
: ALL_ZONES
;
2369 ASSERT(MUTEX_HELD(&cpu_lock
));
2370 ASSERT(pool_pset_enabled());
2372 ASSERT(cpu_is_active(cp
));
2375 if (zoneid
!= ALL_ZONES
&& zoneid
!= GLOBAL_ZONEID
) {
2376 zone
->zone_ncpus_online
++;
2377 ASSERT(zone
->zone_ncpus_online
<= ncpus_online
);
2379 (void) snprintf(name
, sizeof (name
), "cpu_stat%d", cpun
);
2380 if ((ksp
= kstat_hold_byname("cpu_stat", cpun
, name
, ALL_ZONES
))
2382 kstat_zone_add(ksp
, zoneid
);
2385 if ((ksp
= kstat_hold_byname("cpu", cpun
, "sys", ALL_ZONES
)) != NULL
) {
2386 kstat_zone_add(ksp
, zoneid
);
2389 if ((ksp
= kstat_hold_byname("cpu", cpun
, "vm", ALL_ZONES
)) != NULL
) {
2390 kstat_zone_add(ksp
, zoneid
);
2393 if ((ksp
= kstat_hold_byname("cpu", cpun
, "intrstat", ALL_ZONES
)) !=
2395 kstat_zone_add(ksp
, zoneid
);
2401 * Update relevant kstats such that cpu is now visible to processes
2402 * executing in specified zone.
2405 cpu_visibility_add(cpu_t
*cp
, zone_t
*zone
)
2407 cpu_visibility_configure(cp
, zone
);
2408 if (cpu_is_active(cp
))
2409 cpu_visibility_online(cp
, zone
);
2413 * Make invisible to the zone that subset of the cpu information that would be
2414 * torn down when a previously offlined cpu is unconfigured.
2417 cpu_visibility_unconfigure(cpu_t
*cp
, zone_t
*zone
)
2419 zoneid_t zoneid
= zone
? zone
->zone_id
: ALL_ZONES
;
2421 ASSERT(MUTEX_HELD(&cpu_lock
));
2422 ASSERT(pool_pset_enabled());
2425 if (zoneid
!= ALL_ZONES
&& zoneid
!= GLOBAL_ZONEID
) {
2426 ASSERT(zone
->zone_ncpus
!= 0);
2429 if (cp
->cpu_info_kstat
)
2430 kstat_zone_remove(cp
->cpu_info_kstat
, zoneid
);
2434 * Make invisible to the zone that subset of the cpu information that would be
2435 * torn down when a cpu is offlined (but still configured).
2438 cpu_visibility_offline(cpu_t
*cp
, zone_t
*zone
)
2441 char name
[sizeof ("cpu_stat") + 10]; /* enough for 32-bit cpuids */
2442 zoneid_t zoneid
= zone
? zone
->zone_id
: ALL_ZONES
;
2445 ASSERT(MUTEX_HELD(&cpu_lock
));
2446 ASSERT(pool_pset_enabled());
2448 ASSERT(cpu_is_active(cp
));
2451 if (zoneid
!= ALL_ZONES
&& zoneid
!= GLOBAL_ZONEID
) {
2452 ASSERT(zone
->zone_ncpus_online
!= 0);
2453 zone
->zone_ncpus_online
--;
2456 if ((ksp
= kstat_hold_byname("cpu", cpun
, "intrstat", ALL_ZONES
)) !=
2458 kstat_zone_remove(ksp
, zoneid
);
2461 if ((ksp
= kstat_hold_byname("cpu", cpun
, "vm", ALL_ZONES
)) != NULL
) {
2462 kstat_zone_remove(ksp
, zoneid
);
2465 if ((ksp
= kstat_hold_byname("cpu", cpun
, "sys", ALL_ZONES
)) != NULL
) {
2466 kstat_zone_remove(ksp
, zoneid
);
2469 (void) snprintf(name
, sizeof (name
), "cpu_stat%d", cpun
);
2470 if ((ksp
= kstat_hold_byname("cpu_stat", cpun
, name
, ALL_ZONES
))
2472 kstat_zone_remove(ksp
, zoneid
);
2478 * Update relevant kstats such that cpu is no longer visible to processes
2479 * executing in specified zone.
2482 cpu_visibility_remove(cpu_t
*cp
, zone_t
*zone
)
2484 if (cpu_is_active(cp
))
2485 cpu_visibility_offline(cp
, zone
);
2486 cpu_visibility_unconfigure(cp
, zone
);
2490 * Bind a thread to a CPU as requested.
2493 cpu_bind_thread(kthread_id_t tp
, processorid_t bind
, processorid_t
*obind
,
2496 processorid_t binding
;
2499 ASSERT(MUTEX_HELD(&cpu_lock
));
2500 ASSERT(MUTEX_HELD(&ttoproc(tp
)->p_lock
));
2505 * Record old binding, but change the obind, which was initialized
2506 * to PBIND_NONE, only if this thread has a binding. This avoids
2507 * reporting PBIND_NONE for a process when some LWPs are bound.
2509 binding
= tp
->t_bind_cpu
;
2510 if (binding
!= PBIND_NONE
)
2511 *obind
= binding
; /* record old binding */
2515 /* Just return the old binding */
2519 case PBIND_QUERY_TYPE
:
2520 /* Return the binding type */
2521 *obind
= TB_CPU_IS_SOFT(tp
) ? PBIND_SOFT
: PBIND_HARD
;
2527 * Set soft binding for this thread and return the actual
2530 TB_CPU_SOFT_SET(tp
);
2536 * Set hard binding for this thread and return the actual
2539 TB_CPU_HARD_SET(tp
);
2548 * If this thread/LWP cannot be bound because of permission
2549 * problems, just note that and return success so that the
2550 * other threads/LWPs will be bound. This is the way
2551 * processor_bind() is defined to work.
2553 * Binding will get EPERM if the thread is of system class
2554 * or hasprocperm() fails.
2556 if (tp
->t_cid
== 0 || !hasprocperm(tp
->t_cred
, CRED())) {
2563 if (binding
!= PBIND_NONE
) {
2564 cp
= cpu_get((processorid_t
)binding
);
2566 * Make sure binding is valid and is in right partition.
2568 if (cp
== NULL
|| tp
->t_cpupart
!= cp
->cpu_part
) {
2574 tp
->t_bind_cpu
= binding
; /* set new binding */
2577 * If there is no system-set reason for affinity, set
2578 * the t_bound_cpu field to reflect the binding.
2580 if (tp
->t_affinitycnt
== 0) {
2581 if (binding
== PBIND_NONE
) {
2583 * We may need to adjust disp_max_unbound_pri
2584 * since we're becoming unbound.
2586 disp_adjust_unbound_pri(tp
);
2588 tp
->t_bound_cpu
= NULL
; /* set new binding */
2591 * Move thread to lgroup with strongest affinity
2594 if (tp
->t_lgrp_affinity
)
2595 lgrp_move_thread(tp
,
2596 lgrp_choose(tp
, tp
->t_cpupart
), 1);
2598 if (tp
->t_state
== TS_ONPROC
&&
2599 tp
->t_cpu
->cpu_part
!= tp
->t_cpupart
)
2604 tp
->t_bound_cpu
= cp
;
2605 ASSERT(cp
->cpu_lpl
!= NULL
);
2608 * Set home to lgroup with most affinity containing CPU
2609 * that thread is being bound or minimum bounding
2610 * lgroup if no affinities set
2612 if (tp
->t_lgrp_affinity
)
2613 lpl
= lgrp_affinity_best(tp
, tp
->t_cpupart
,
2614 LGRP_NONE
, B_FALSE
);
2618 if (tp
->t_lpl
!= lpl
) {
2619 /* can't grab cpu_lock */
2620 lgrp_move_thread(tp
, lpl
, 1);
2624 * Make the thread switch to the bound CPU.
2625 * If the thread is runnable, we need to
2626 * requeue it even if t_cpu is already set
2627 * to the right CPU, since it may be on a
2628 * kpreempt queue and need to move to a local
2629 * queue. We could check t_disp_queue to
2630 * avoid unnecessary overhead if it's already
2631 * on the right queue, but since this isn't
2632 * a performance-critical operation it doesn't
2633 * seem worth the extra code and complexity.
2635 * If the thread is weakbound to the cpu then it will
2636 * resist the new binding request until the weak
2637 * binding drops. The cpu_surrender or requeueing
2638 * below could be skipped in such cases (since it
2639 * will have no effect), but that would require
2640 * thread_allowmigrate to acquire thread_lock so
2641 * we'll take the very occasional hit here instead.
2643 if (tp
->t_state
== TS_ONPROC
) {
2645 } else if (tp
->t_state
== TS_RUN
) {
2646 cpu_t
*ocp
= tp
->t_cpu
;
2651 * On the bound CPU's disp queue now.
2653 ASSERT(tp
->t_disp_queue
== cp
->cpu_disp
||
2654 tp
->t_weakbound_cpu
== ocp
);
2660 * Our binding has changed; set TP_CHANGEBIND.
2662 tp
->t_proc_flag
|= TP_CHANGEBIND
;
2670 #if CPUSET_WORDS > 1
2673 * Functions for implementing cpuset operations when a cpuset is more
2674 * than one word. On platforms where a cpuset is a single word these
2675 * are implemented as macros in cpuvar.h.
2679 cpuset_all(cpuset_t
*s
)
2683 for (i
= 0; i
< CPUSET_WORDS
; i
++)
2688 cpuset_all_but(cpuset_t
*s
, uint_t cpu
)
2691 CPUSET_DEL(*s
, cpu
);
2695 cpuset_only(cpuset_t
*s
, uint_t cpu
)
2698 CPUSET_ADD(*s
, cpu
);
2702 cpuset_isnull(cpuset_t
*s
)
2706 for (i
= 0; i
< CPUSET_WORDS
; i
++)
2707 if (s
->cpub
[i
] != 0)
2713 cpuset_cmp(cpuset_t
*s1
, cpuset_t
*s2
)
2717 for (i
= 0; i
< CPUSET_WORDS
; i
++)
2718 if (s1
->cpub
[i
] != s2
->cpub
[i
])
2724 cpuset_find(cpuset_t
*s
)
2728 uint_t cpu
= (uint_t
)-1;
2731 * Find a cpu in the cpuset
2733 for (i
= 0; i
< CPUSET_WORDS
; i
++) {
2734 cpu
= (uint_t
)(lowbit(s
->cpub
[i
]) - 1);
2735 if (cpu
!= (uint_t
)-1) {
2736 cpu
+= i
* BT_NBIPUL
;
2744 cpuset_bounds(cpuset_t
*s
, uint_t
*smallestid
, uint_t
*largestid
)
2750 * First, find the smallest cpu id in the set.
2752 for (i
= 0; i
< CPUSET_WORDS
; i
++) {
2753 if (s
->cpub
[i
] != 0) {
2754 bit
= (uint_t
)(lowbit(s
->cpub
[i
]) - 1);
2755 ASSERT(bit
!= (uint_t
)-1);
2756 *smallestid
= bit
+ (i
* BT_NBIPUL
);
2759 * Now find the largest cpu id in
2760 * the set and return immediately.
2761 * Done in an inner loop to avoid
2762 * having to break out of the first
2765 for (j
= CPUSET_WORDS
- 1; j
>= i
; j
--) {
2766 if (s
->cpub
[j
] != 0) {
2767 bit
= (uint_t
)(highbit(s
->cpub
[j
]) - 1);
2768 ASSERT(bit
!= (uint_t
)-1);
2769 *largestid
= bit
+ (j
* BT_NBIPUL
);
2770 ASSERT(*largestid
>= *smallestid
);
2776 * If this code is reached, a
2777 * smallestid was found, but not a
2778 * largestid. The cpuset must have
2779 * been changed during the course
2780 * of this function call.
2785 *smallestid
= *largestid
= CPUSET_NOTINSET
;
2788 #endif /* CPUSET_WORDS */
2791 * Unbind threads bound to specified CPU.
2793 * If `unbind_all_threads' is true, unbind all user threads bound to a given
2794 * CPU. Otherwise unbind all soft-bound user threads.
2797 cpu_unbind(processorid_t cpu
, boolean_t unbind_all_threads
)
2799 processorid_t obind
;
2805 ASSERT(MUTEX_HELD(&cpu_lock
));
2807 mutex_enter(&pidlock
);
2808 for (pp
= practive
; pp
!= NULL
; pp
= pp
->p_next
) {
2809 mutex_enter(&pp
->p_lock
);
2812 * Skip zombies, kernel processes, and processes in
2813 * other zones, if called from a non-global zone.
2815 if (tp
== NULL
|| (pp
->p_flag
& SSYS
) ||
2816 !HASZONEACCESS(curproc
, pp
->p_zone
->zone_id
)) {
2817 mutex_exit(&pp
->p_lock
);
2821 if (tp
->t_bind_cpu
!= cpu
)
2824 * Skip threads with hard binding when
2825 * `unbind_all_threads' is not specified.
2827 if (!unbind_all_threads
&& TB_CPU_IS_HARD(tp
))
2829 err
= cpu_bind_thread(tp
, PBIND_NONE
, &obind
, &berr
);
2832 } while ((tp
= tp
->t_forw
) != pp
->p_tlist
);
2833 mutex_exit(&pp
->p_lock
);
2835 mutex_exit(&pidlock
);
2843 * Destroy all remaining bound threads on a cpu.
2846 cpu_destroy_bound_threads(cpu_t
*cp
)
2849 register kthread_id_t t
, tlist
, tnext
;
2852 * Destroy all remaining bound threads on the cpu. This
2853 * should include both the interrupt threads and the idle thread.
2854 * This requires some care, since we need to traverse the
2855 * thread list with the pidlock mutex locked, but thread_free
2856 * also locks the pidlock mutex. So, we collect the threads
2857 * we're going to reap in a list headed by "tlist", then we
2858 * unlock the pidlock mutex and traverse the tlist list,
2859 * doing thread_free's on the thread's. Simple, n'est pas?
2860 * Also, this depends on thread_free not mucking with the
2861 * t_next and t_prev links of the thread.
2864 if ((t
= curthread
) != NULL
) {
2867 mutex_enter(&pidlock
);
2870 if (t
->t_bound_cpu
== cp
) {
2873 * We've found a bound thread, carefully unlink
2874 * it out of the thread list, and add it to
2875 * our "tlist". We "know" we don't have to
2876 * worry about unlinking curthread (the thread
2877 * that is executing this code).
2879 t
->t_next
->t_prev
= t
->t_prev
;
2880 t
->t_prev
->t_next
= t
->t_next
;
2883 ASSERT(t
->t_cid
== syscid
);
2884 /* wake up anyone blocked in thread_join */
2885 cv_broadcast(&t
->t_joincv
);
2887 * t_lwp set by interrupt threads and not
2892 * Pause and idle threads always have
2893 * t_state set to TS_ONPROC.
2895 t
->t_state
= TS_FREE
;
2896 t
->t_prev
= NULL
; /* Just in case */
2899 } while ((t
= tnext
) != curthread
);
2901 mutex_exit(&pidlock
);
2904 for (t
= tlist
; t
!= NULL
; t
= tnext
) {
2912 * Update the cpu_supp_freqs of this cpu. This information is returned
2913 * as part of cpu_info kstats. If the cpu_info_kstat exists already, then
2914 * maintain the kstat data size.
2917 cpu_set_supp_freqs(cpu_t
*cp
, const char *freqs
)
2919 char clkstr
[sizeof ("18446744073709551615") + 1]; /* ui64 MAX */
2920 const char *lfreqs
= clkstr
;
2921 boolean_t kstat_exists
= B_FALSE
;
2926 * A NULL pointer means we only support one speed.
2929 (void) snprintf(clkstr
, sizeof (clkstr
), "%"PRIu64
,
2930 cp
->cpu_curr_clock
);
2935 * Make sure the frequency doesn't change while a snapshot is
2936 * going on. Of course, we only need to worry about this if
2939 if ((ksp
= cp
->cpu_info_kstat
) != NULL
) {
2940 mutex_enter(ksp
->ks_lock
);
2941 kstat_exists
= B_TRUE
;
2945 * Free any previously allocated string and if the kstat
2946 * already exists, then update its data size.
2948 if (cp
->cpu_supp_freqs
!= NULL
) {
2949 len
= strlen(cp
->cpu_supp_freqs
) + 1;
2950 kmem_free(cp
->cpu_supp_freqs
, len
);
2952 ksp
->ks_data_size
-= len
;
2956 * Allocate the new string and set the pointer.
2958 len
= strlen(lfreqs
) + 1;
2959 cp
->cpu_supp_freqs
= kmem_alloc(len
, KM_SLEEP
);
2960 (void) strcpy(cp
->cpu_supp_freqs
, lfreqs
);
2963 * If the kstat already exists then update the data size and
2967 ksp
->ks_data_size
+= len
;
2968 mutex_exit(ksp
->ks_lock
);
2973 * Indicate the current CPU's clock freqency (in Hz).
2974 * The calling context must be such that CPU references are safe.
2977 cpu_set_curr_clock(uint64_t new_clk
)
2981 old_clk
= CPU
->cpu_curr_clock
;
2982 CPU
->cpu_curr_clock
= new_clk
;
2985 * The cpu-change-speed DTrace probe exports the frequency in Hz
2987 DTRACE_PROBE3(cpu__change__speed
, processorid_t
, CPU
->cpu_id
,
2988 uint64_t, old_clk
, uint64_t, new_clk
);
2992 * processor_info(2) and p_online(2) status support functions
2993 * The constants returned by the cpu_get_state() and cpu_get_state_str() are
2994 * for use in communicating processor state information to userland. Kernel
2995 * subsystems should only be using the cpu_flags value directly. Subsystems
2996 * modifying cpu_flags should record the state change via a call to the
3001 * Update the pi_state of this CPU. This function provides the CPU status for
3002 * the information returned by processor_info(2).
3005 cpu_set_state(cpu_t
*cpu
)
3007 ASSERT(MUTEX_HELD(&cpu_lock
));
3008 cpu
->cpu_type_info
.pi_state
= cpu_get_state(cpu
);
3009 cpu
->cpu_state_begin
= gethrestime_sec();
3010 pool_cpu_mod
= gethrtime();
3014 * Return offline/online/other status for the indicated CPU. Use only for
3015 * communication with user applications; cpu_flags provides the in-kernel
3019 cpu_get_state(cpu_t
*cpu
)
3021 ASSERT(MUTEX_HELD(&cpu_lock
));
3022 if (cpu
->cpu_flags
& CPU_POWEROFF
)
3023 return (P_POWEROFF
);
3024 else if (cpu
->cpu_flags
& CPU_FAULTED
)
3026 else if (cpu
->cpu_flags
& CPU_SPARE
)
3028 else if ((cpu
->cpu_flags
& (CPU_READY
| CPU_OFFLINE
)) != CPU_READY
)
3030 else if (cpu
->cpu_flags
& CPU_ENABLE
)
3037 * Return processor_info(2) state as a string.
3040 cpu_get_state_str(cpu_t
*cpu
)
3044 switch (cpu_get_state(cpu
)) {
3049 string
= PS_POWEROFF
;
3058 string
= PS_FAULTED
;
3061 string
= PS_OFFLINE
;
3071 * Export this CPU's statistics (cpu_stat_t and cpu_stats_t) as raw and named
3072 * kstats, respectively. This is done when a CPU is initialized or placed
3073 * online via p_online(2).
3076 cpu_stats_kstat_create(cpu_t
*cp
)
3078 int instance
= cp
->cpu_id
;
3079 char *module
= "cpu";
3080 char *class = "misc";
3084 ASSERT(MUTEX_HELD(&cpu_lock
));
3086 if (pool_pset_enabled())
3087 zoneid
= GLOBAL_ZONEID
;
3091 * Create named kstats
3093 #define CPU_STATS_KS_CREATE(name, tsize, update_func) \
3094 ksp = kstat_create_zone(module, instance, (name), class, \
3095 KSTAT_TYPE_NAMED, (tsize) / sizeof (kstat_named_t), 0, \
3097 if (ksp != NULL) { \
3098 ksp->ks_private = cp; \
3099 ksp->ks_update = (update_func); \
3100 kstat_install(ksp); \
3102 cmn_err(CE_WARN, "cpu: unable to create %s:%d:%s kstat", \
3103 module, instance, (name));
3105 CPU_STATS_KS_CREATE("sys", sizeof (cpu_sys_stats_ks_data_template
),
3106 cpu_sys_stats_ks_update
);
3107 CPU_STATS_KS_CREATE("vm", sizeof (cpu_vm_stats_ks_data_template
),
3108 cpu_vm_stats_ks_update
);
3111 * Export the familiar cpu_stat_t KSTAT_TYPE_RAW kstat.
3113 ksp
= kstat_create_zone("cpu_stat", cp
->cpu_id
, NULL
,
3114 "misc", KSTAT_TYPE_RAW
, sizeof (cpu_stat_t
), 0, zoneid
);
3116 ksp
->ks_update
= cpu_stat_ks_update
;
3117 ksp
->ks_private
= cp
;
3123 cpu_stats_kstat_destroy(cpu_t
*cp
)
3125 char ks_name
[KSTAT_STRLEN
];
3127 (void) sprintf(ks_name
, "cpu_stat%d", cp
->cpu_id
);
3128 kstat_delete_byname("cpu_stat", cp
->cpu_id
, ks_name
);
3130 kstat_delete_byname("cpu", cp
->cpu_id
, "sys");
3131 kstat_delete_byname("cpu", cp
->cpu_id
, "vm");
3135 cpu_sys_stats_ks_update(kstat_t
*ksp
, int rw
)
3137 cpu_t
*cp
= (cpu_t
*)ksp
->ks_private
;
3138 struct cpu_sys_stats_ks_data
*csskd
;
3139 cpu_sys_stats_t
*css
;
3140 hrtime_t msnsecs
[NCMSTATES
];
3143 if (rw
== KSTAT_WRITE
)
3146 csskd
= ksp
->ks_data
;
3147 css
= &cp
->cpu_stats
.sys
;
3150 * Read CPU mstate, but compare with the last values we
3151 * received to make sure that the returned kstats never
3155 get_cpu_mstate(cp
, msnsecs
);
3156 if (csskd
->cpu_nsec_idle
.value
.ui64
> msnsecs
[CMS_IDLE
])
3157 msnsecs
[CMS_IDLE
] = csskd
->cpu_nsec_idle
.value
.ui64
;
3158 if (csskd
->cpu_nsec_user
.value
.ui64
> msnsecs
[CMS_USER
])
3159 msnsecs
[CMS_USER
] = csskd
->cpu_nsec_user
.value
.ui64
;
3160 if (csskd
->cpu_nsec_kernel
.value
.ui64
> msnsecs
[CMS_SYSTEM
])
3161 msnsecs
[CMS_SYSTEM
] = csskd
->cpu_nsec_kernel
.value
.ui64
;
3163 bcopy(&cpu_sys_stats_ks_data_template
, ksp
->ks_data
,
3164 sizeof (cpu_sys_stats_ks_data_template
));
3166 csskd
->cpu_ticks_wait
.value
.ui64
= 0;
3167 csskd
->wait_ticks_io
.value
.ui64
= 0;
3169 csskd
->cpu_nsec_idle
.value
.ui64
= msnsecs
[CMS_IDLE
];
3170 csskd
->cpu_nsec_user
.value
.ui64
= msnsecs
[CMS_USER
];
3171 csskd
->cpu_nsec_kernel
.value
.ui64
= msnsecs
[CMS_SYSTEM
];
3172 csskd
->cpu_ticks_idle
.value
.ui64
=
3173 NSEC_TO_TICK(csskd
->cpu_nsec_idle
.value
.ui64
);
3174 csskd
->cpu_ticks_user
.value
.ui64
=
3175 NSEC_TO_TICK(csskd
->cpu_nsec_user
.value
.ui64
);
3176 csskd
->cpu_ticks_kernel
.value
.ui64
=
3177 NSEC_TO_TICK(csskd
->cpu_nsec_kernel
.value
.ui64
);
3178 csskd
->cpu_nsec_dtrace
.value
.ui64
= cp
->cpu_dtrace_nsec
;
3179 csskd
->dtrace_probes
.value
.ui64
= cp
->cpu_dtrace_probes
;
3180 csskd
->cpu_nsec_intr
.value
.ui64
= cp
->cpu_intrlast
;
3181 csskd
->cpu_load_intr
.value
.ui64
= cp
->cpu_intrload
;
3182 csskd
->bread
.value
.ui64
= css
->bread
;
3183 csskd
->bwrite
.value
.ui64
= css
->bwrite
;
3184 csskd
->lread
.value
.ui64
= css
->lread
;
3185 csskd
->lwrite
.value
.ui64
= css
->lwrite
;
3186 csskd
->phread
.value
.ui64
= css
->phread
;
3187 csskd
->phwrite
.value
.ui64
= css
->phwrite
;
3188 csskd
->pswitch
.value
.ui64
= css
->pswitch
;
3189 csskd
->trap
.value
.ui64
= css
->trap
;
3190 csskd
->intr
.value
.ui64
= 0;
3191 for (i
= 0; i
< PIL_MAX
; i
++)
3192 csskd
->intr
.value
.ui64
+= css
->intr
[i
];
3193 csskd
->syscall
.value
.ui64
= css
->syscall
;
3194 csskd
->sysread
.value
.ui64
= css
->sysread
;
3195 csskd
->syswrite
.value
.ui64
= css
->syswrite
;
3196 csskd
->sysfork
.value
.ui64
= css
->sysfork
;
3197 csskd
->sysvfork
.value
.ui64
= css
->sysvfork
;
3198 csskd
->sysexec
.value
.ui64
= css
->sysexec
;
3199 csskd
->readch
.value
.ui64
= css
->readch
;
3200 csskd
->writech
.value
.ui64
= css
->writech
;
3201 csskd
->rcvint
.value
.ui64
= css
->rcvint
;
3202 csskd
->xmtint
.value
.ui64
= css
->xmtint
;
3203 csskd
->mdmint
.value
.ui64
= css
->mdmint
;
3204 csskd
->rawch
.value
.ui64
= css
->rawch
;
3205 csskd
->canch
.value
.ui64
= css
->canch
;
3206 csskd
->outch
.value
.ui64
= css
->outch
;
3207 csskd
->msg
.value
.ui64
= css
->msg
;
3208 csskd
->sema
.value
.ui64
= css
->sema
;
3209 csskd
->namei
.value
.ui64
= css
->namei
;
3210 csskd
->ufsiget
.value
.ui64
= css
->ufsiget
;
3211 csskd
->ufsdirblk
.value
.ui64
= css
->ufsdirblk
;
3212 csskd
->ufsipage
.value
.ui64
= css
->ufsipage
;
3213 csskd
->ufsinopage
.value
.ui64
= css
->ufsinopage
;
3214 csskd
->procovf
.value
.ui64
= css
->procovf
;
3215 csskd
->intrthread
.value
.ui64
= 0;
3216 for (i
= 0; i
< LOCK_LEVEL
- 1; i
++)
3217 csskd
->intrthread
.value
.ui64
+= css
->intr
[i
];
3218 csskd
->intrblk
.value
.ui64
= css
->intrblk
;
3219 csskd
->intrunpin
.value
.ui64
= css
->intrunpin
;
3220 csskd
->idlethread
.value
.ui64
= css
->idlethread
;
3221 csskd
->inv_swtch
.value
.ui64
= css
->inv_swtch
;
3222 csskd
->nthreads
.value
.ui64
= css
->nthreads
;
3223 csskd
->cpumigrate
.value
.ui64
= css
->cpumigrate
;
3224 csskd
->xcalls
.value
.ui64
= css
->xcalls
;
3225 csskd
->mutex_adenters
.value
.ui64
= css
->mutex_adenters
;
3226 csskd
->rw_rdfails
.value
.ui64
= css
->rw_rdfails
;
3227 csskd
->rw_wrfails
.value
.ui64
= css
->rw_wrfails
;
3228 csskd
->modload
.value
.ui64
= css
->modload
;
3229 csskd
->modunload
.value
.ui64
= css
->modunload
;
3230 csskd
->bawrite
.value
.ui64
= css
->bawrite
;
3231 csskd
->iowait
.value
.ui64
= css
->iowait
;
3237 cpu_vm_stats_ks_update(kstat_t
*ksp
, int rw
)
3239 cpu_t
*cp
= (cpu_t
*)ksp
->ks_private
;
3240 struct cpu_vm_stats_ks_data
*cvskd
;
3241 cpu_vm_stats_t
*cvs
;
3243 if (rw
== KSTAT_WRITE
)
3246 cvs
= &cp
->cpu_stats
.vm
;
3247 cvskd
= ksp
->ks_data
;
3249 bcopy(&cpu_vm_stats_ks_data_template
, ksp
->ks_data
,
3250 sizeof (cpu_vm_stats_ks_data_template
));
3251 cvskd
->pgrec
.value
.ui64
= cvs
->pgrec
;
3252 cvskd
->pgfrec
.value
.ui64
= cvs
->pgfrec
;
3253 cvskd
->pgin
.value
.ui64
= cvs
->pgin
;
3254 cvskd
->pgpgin
.value
.ui64
= cvs
->pgpgin
;
3255 cvskd
->pgout
.value
.ui64
= cvs
->pgout
;
3256 cvskd
->pgpgout
.value
.ui64
= cvs
->pgpgout
;
3257 cvskd
->zfod
.value
.ui64
= cvs
->zfod
;
3258 cvskd
->dfree
.value
.ui64
= cvs
->dfree
;
3259 cvskd
->scan
.value
.ui64
= cvs
->scan
;
3260 cvskd
->rev
.value
.ui64
= cvs
->rev
;
3261 cvskd
->hat_fault
.value
.ui64
= cvs
->hat_fault
;
3262 cvskd
->as_fault
.value
.ui64
= cvs
->as_fault
;
3263 cvskd
->maj_fault
.value
.ui64
= cvs
->maj_fault
;
3264 cvskd
->cow_fault
.value
.ui64
= cvs
->cow_fault
;
3265 cvskd
->prot_fault
.value
.ui64
= cvs
->prot_fault
;
3266 cvskd
->softlock
.value
.ui64
= cvs
->softlock
;
3267 cvskd
->kernel_asflt
.value
.ui64
= cvs
->kernel_asflt
;
3268 cvskd
->pgrrun
.value
.ui64
= cvs
->pgrrun
;
3269 cvskd
->execpgin
.value
.ui64
= cvs
->execpgin
;
3270 cvskd
->execpgout
.value
.ui64
= cvs
->execpgout
;
3271 cvskd
->execfree
.value
.ui64
= cvs
->execfree
;
3272 cvskd
->anonpgin
.value
.ui64
= cvs
->anonpgin
;
3273 cvskd
->anonpgout
.value
.ui64
= cvs
->anonpgout
;
3274 cvskd
->anonfree
.value
.ui64
= cvs
->anonfree
;
3275 cvskd
->fspgin
.value
.ui64
= cvs
->fspgin
;
3276 cvskd
->fspgout
.value
.ui64
= cvs
->fspgout
;
3277 cvskd
->fsfree
.value
.ui64
= cvs
->fsfree
;
3283 cpu_stat_ks_update(kstat_t
*ksp
, int rw
)
3288 hrtime_t msnsecs
[NCMSTATES
];
3290 cso
= (cpu_stat_t
*)ksp
->ks_data
;
3291 cp
= (cpu_t
*)ksp
->ks_private
;
3293 if (rw
== KSTAT_WRITE
)
3297 * Read CPU mstate, but compare with the last values we
3298 * received to make sure that the returned kstats never
3302 get_cpu_mstate(cp
, msnsecs
);
3303 msnsecs
[CMS_IDLE
] = NSEC_TO_TICK(msnsecs
[CMS_IDLE
]);
3304 msnsecs
[CMS_USER
] = NSEC_TO_TICK(msnsecs
[CMS_USER
]);
3305 msnsecs
[CMS_SYSTEM
] = NSEC_TO_TICK(msnsecs
[CMS_SYSTEM
]);
3306 if (cso
->cpu_sysinfo
.cpu
[CPU_IDLE
] < msnsecs
[CMS_IDLE
])
3307 cso
->cpu_sysinfo
.cpu
[CPU_IDLE
] = msnsecs
[CMS_IDLE
];
3308 if (cso
->cpu_sysinfo
.cpu
[CPU_USER
] < msnsecs
[CMS_USER
])
3309 cso
->cpu_sysinfo
.cpu
[CPU_USER
] = msnsecs
[CMS_USER
];
3310 if (cso
->cpu_sysinfo
.cpu
[CPU_KERNEL
] < msnsecs
[CMS_SYSTEM
])
3311 cso
->cpu_sysinfo
.cpu
[CPU_KERNEL
] = msnsecs
[CMS_SYSTEM
];
3312 cso
->cpu_sysinfo
.cpu
[CPU_WAIT
] = 0;
3313 cso
->cpu_sysinfo
.wait
[W_IO
] = 0;
3314 cso
->cpu_sysinfo
.wait
[W_SWAP
] = 0;
3315 cso
->cpu_sysinfo
.wait
[W_PIO
] = 0;
3316 cso
->cpu_sysinfo
.bread
= CPU_STATS(cp
, sys
.bread
);
3317 cso
->cpu_sysinfo
.bwrite
= CPU_STATS(cp
, sys
.bwrite
);
3318 cso
->cpu_sysinfo
.lread
= CPU_STATS(cp
, sys
.lread
);
3319 cso
->cpu_sysinfo
.lwrite
= CPU_STATS(cp
, sys
.lwrite
);
3320 cso
->cpu_sysinfo
.phread
= CPU_STATS(cp
, sys
.phread
);
3321 cso
->cpu_sysinfo
.phwrite
= CPU_STATS(cp
, sys
.phwrite
);
3322 cso
->cpu_sysinfo
.pswitch
= CPU_STATS(cp
, sys
.pswitch
);
3323 cso
->cpu_sysinfo
.trap
= CPU_STATS(cp
, sys
.trap
);
3324 cso
->cpu_sysinfo
.intr
= 0;
3325 for (i
= 0; i
< PIL_MAX
; i
++)
3326 cso
->cpu_sysinfo
.intr
+= CPU_STATS(cp
, sys
.intr
[i
]);
3327 cso
->cpu_sysinfo
.syscall
= CPU_STATS(cp
, sys
.syscall
);
3328 cso
->cpu_sysinfo
.sysread
= CPU_STATS(cp
, sys
.sysread
);
3329 cso
->cpu_sysinfo
.syswrite
= CPU_STATS(cp
, sys
.syswrite
);
3330 cso
->cpu_sysinfo
.sysfork
= CPU_STATS(cp
, sys
.sysfork
);
3331 cso
->cpu_sysinfo
.sysvfork
= CPU_STATS(cp
, sys
.sysvfork
);
3332 cso
->cpu_sysinfo
.sysexec
= CPU_STATS(cp
, sys
.sysexec
);
3333 cso
->cpu_sysinfo
.readch
= CPU_STATS(cp
, sys
.readch
);
3334 cso
->cpu_sysinfo
.writech
= CPU_STATS(cp
, sys
.writech
);
3335 cso
->cpu_sysinfo
.rcvint
= CPU_STATS(cp
, sys
.rcvint
);
3336 cso
->cpu_sysinfo
.xmtint
= CPU_STATS(cp
, sys
.xmtint
);
3337 cso
->cpu_sysinfo
.mdmint
= CPU_STATS(cp
, sys
.mdmint
);
3338 cso
->cpu_sysinfo
.rawch
= CPU_STATS(cp
, sys
.rawch
);
3339 cso
->cpu_sysinfo
.canch
= CPU_STATS(cp
, sys
.canch
);
3340 cso
->cpu_sysinfo
.outch
= CPU_STATS(cp
, sys
.outch
);
3341 cso
->cpu_sysinfo
.msg
= CPU_STATS(cp
, sys
.msg
);
3342 cso
->cpu_sysinfo
.sema
= CPU_STATS(cp
, sys
.sema
);
3343 cso
->cpu_sysinfo
.namei
= CPU_STATS(cp
, sys
.namei
);
3344 cso
->cpu_sysinfo
.ufsiget
= CPU_STATS(cp
, sys
.ufsiget
);
3345 cso
->cpu_sysinfo
.ufsdirblk
= CPU_STATS(cp
, sys
.ufsdirblk
);
3346 cso
->cpu_sysinfo
.ufsipage
= CPU_STATS(cp
, sys
.ufsipage
);
3347 cso
->cpu_sysinfo
.ufsinopage
= CPU_STATS(cp
, sys
.ufsinopage
);
3348 cso
->cpu_sysinfo
.inodeovf
= 0;
3349 cso
->cpu_sysinfo
.fileovf
= 0;
3350 cso
->cpu_sysinfo
.procovf
= CPU_STATS(cp
, sys
.procovf
);
3351 cso
->cpu_sysinfo
.intrthread
= 0;
3352 for (i
= 0; i
< LOCK_LEVEL
- 1; i
++)
3353 cso
->cpu_sysinfo
.intrthread
+= CPU_STATS(cp
, sys
.intr
[i
]);
3354 cso
->cpu_sysinfo
.intrblk
= CPU_STATS(cp
, sys
.intrblk
);
3355 cso
->cpu_sysinfo
.idlethread
= CPU_STATS(cp
, sys
.idlethread
);
3356 cso
->cpu_sysinfo
.inv_swtch
= CPU_STATS(cp
, sys
.inv_swtch
);
3357 cso
->cpu_sysinfo
.nthreads
= CPU_STATS(cp
, sys
.nthreads
);
3358 cso
->cpu_sysinfo
.cpumigrate
= CPU_STATS(cp
, sys
.cpumigrate
);
3359 cso
->cpu_sysinfo
.xcalls
= CPU_STATS(cp
, sys
.xcalls
);
3360 cso
->cpu_sysinfo
.mutex_adenters
= CPU_STATS(cp
, sys
.mutex_adenters
);
3361 cso
->cpu_sysinfo
.rw_rdfails
= CPU_STATS(cp
, sys
.rw_rdfails
);
3362 cso
->cpu_sysinfo
.rw_wrfails
= CPU_STATS(cp
, sys
.rw_wrfails
);
3363 cso
->cpu_sysinfo
.modload
= CPU_STATS(cp
, sys
.modload
);
3364 cso
->cpu_sysinfo
.modunload
= CPU_STATS(cp
, sys
.modunload
);
3365 cso
->cpu_sysinfo
.bawrite
= CPU_STATS(cp
, sys
.bawrite
);
3366 cso
->cpu_sysinfo
.rw_enters
= 0;
3367 cso
->cpu_sysinfo
.win_uo_cnt
= 0;
3368 cso
->cpu_sysinfo
.win_uu_cnt
= 0;
3369 cso
->cpu_sysinfo
.win_so_cnt
= 0;
3370 cso
->cpu_sysinfo
.win_su_cnt
= 0;
3371 cso
->cpu_sysinfo
.win_suo_cnt
= 0;
3373 cso
->cpu_syswait
.iowait
= CPU_STATS(cp
, sys
.iowait
);
3374 cso
->cpu_syswait
.swap
= 0;
3375 cso
->cpu_syswait
.physio
= 0;
3377 cso
->cpu_vminfo
.pgrec
= CPU_STATS(cp
, vm
.pgrec
);
3378 cso
->cpu_vminfo
.pgfrec
= CPU_STATS(cp
, vm
.pgfrec
);
3379 cso
->cpu_vminfo
.pgin
= CPU_STATS(cp
, vm
.pgin
);
3380 cso
->cpu_vminfo
.pgpgin
= CPU_STATS(cp
, vm
.pgpgin
);
3381 cso
->cpu_vminfo
.pgout
= CPU_STATS(cp
, vm
.pgout
);
3382 cso
->cpu_vminfo
.pgpgout
= CPU_STATS(cp
, vm
.pgpgout
);
3383 cso
->cpu_vminfo
.zfod
= CPU_STATS(cp
, vm
.zfod
);
3384 cso
->cpu_vminfo
.dfree
= CPU_STATS(cp
, vm
.dfree
);
3385 cso
->cpu_vminfo
.scan
= CPU_STATS(cp
, vm
.scan
);
3386 cso
->cpu_vminfo
.rev
= CPU_STATS(cp
, vm
.rev
);
3387 cso
->cpu_vminfo
.hat_fault
= CPU_STATS(cp
, vm
.hat_fault
);
3388 cso
->cpu_vminfo
.as_fault
= CPU_STATS(cp
, vm
.as_fault
);
3389 cso
->cpu_vminfo
.maj_fault
= CPU_STATS(cp
, vm
.maj_fault
);
3390 cso
->cpu_vminfo
.cow_fault
= CPU_STATS(cp
, vm
.cow_fault
);
3391 cso
->cpu_vminfo
.prot_fault
= CPU_STATS(cp
, vm
.prot_fault
);
3392 cso
->cpu_vminfo
.softlock
= CPU_STATS(cp
, vm
.softlock
);
3393 cso
->cpu_vminfo
.kernel_asflt
= CPU_STATS(cp
, vm
.kernel_asflt
);
3394 cso
->cpu_vminfo
.pgrrun
= CPU_STATS(cp
, vm
.pgrrun
);
3395 cso
->cpu_vminfo
.execpgin
= CPU_STATS(cp
, vm
.execpgin
);
3396 cso
->cpu_vminfo
.execpgout
= CPU_STATS(cp
, vm
.execpgout
);
3397 cso
->cpu_vminfo
.execfree
= CPU_STATS(cp
, vm
.execfree
);
3398 cso
->cpu_vminfo
.anonpgin
= CPU_STATS(cp
, vm
.anonpgin
);
3399 cso
->cpu_vminfo
.anonpgout
= CPU_STATS(cp
, vm
.anonpgout
);
3400 cso
->cpu_vminfo
.anonfree
= CPU_STATS(cp
, vm
.anonfree
);
3401 cso
->cpu_vminfo
.fspgin
= CPU_STATS(cp
, vm
.fspgin
);
3402 cso
->cpu_vminfo
.fspgout
= CPU_STATS(cp
, vm
.fspgout
);
3403 cso
->cpu_vminfo
.fsfree
= CPU_STATS(cp
, vm
.fsfree
);