4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2012 by Delphix. All rights reserved.
27 * Architecture-independent CPU control functions.
30 #include <sys/types.h>
31 #include <sys/param.h>
33 #include <sys/thread.h>
34 #include <sys/cpuvar.h>
35 #include <sys/cpu_event.h>
36 #include <sys/kstat.h>
37 #include <sys/uadmin.h>
38 #include <sys/systm.h>
39 #include <sys/errno.h>
40 #include <sys/cmn_err.h>
41 #include <sys/procset.h>
42 #include <sys/processor.h>
43 #include <sys/debug.h>
44 #include <sys/cpupart.h>
49 #include <sys/kmem_impl.h> /* to set per-cpu kmem_cache offset */
50 #include <sys/atomic.h>
51 #include <sys/callb.h>
52 #include <sys/vtrace.h>
53 #include <sys/cyclic.h>
54 #include <sys/bitmap.h>
55 #include <sys/nvpair.h>
56 #include <sys/pool_pset.h>
57 #include <sys/msacct.h>
59 #include <sys/archsystm.h>
61 #if defined(__x86) || defined(__amd64)
62 #include <sys/x86_archext.h>
64 #include <sys/callo.h>
66 extern int mp_cpu_start(cpu_t
*);
67 extern int mp_cpu_stop(cpu_t
*);
68 extern int mp_cpu_poweron(cpu_t
*);
69 extern int mp_cpu_poweroff(cpu_t
*);
70 extern int mp_cpu_configure(int);
71 extern int mp_cpu_unconfigure(int);
72 extern void mp_cpu_faulted_enter(cpu_t
*);
73 extern void mp_cpu_faulted_exit(cpu_t
*);
75 extern int cmp_cpu_to_chip(processorid_t cpuid
);
77 extern char *cpu_fru_fmri(cpu_t
*cp
);
80 static void cpu_add_active_internal(cpu_t
*cp
);
81 static void cpu_remove_active(cpu_t
*cp
);
82 static void cpu_info_kstat_create(cpu_t
*cp
);
83 static void cpu_info_kstat_destroy(cpu_t
*cp
);
84 static void cpu_stats_kstat_create(cpu_t
*cp
);
85 static void cpu_stats_kstat_destroy(cpu_t
*cp
);
87 static int cpu_sys_stats_ks_update(kstat_t
*ksp
, int rw
);
88 static int cpu_vm_stats_ks_update(kstat_t
*ksp
, int rw
);
89 static int cpu_stat_ks_update(kstat_t
*ksp
, int rw
);
90 static int cpu_state_change_hooks(int, cpu_setup_t
, cpu_setup_t
);
93 * cpu_lock protects ncpus, ncpus_online, cpu_flag, cpu_list, cpu_active,
94 * max_cpu_seqid_ever, and dispatch queue reallocations. The lock ordering with
95 * respect to related locks is:
97 * cpu_lock --> thread_free_lock ---> p_lock ---> thread_lock()
99 * Warning: Certain sections of code do not use the cpu_lock when
100 * traversing the cpu_list (e.g. mutex_vector_enter(), clock()). Since
101 * all cpus are paused during modifications to this list, a solution
102 * to protect the list is too either disable kernel preemption while
103 * walking the list, *or* recheck the cpu_next pointer at each
104 * iteration in the loop. Note that in no cases can any cached
105 * copies of the cpu pointers be kept as they may become invalid.
108 cpu_t
*cpu_list
; /* list of all CPUs */
109 cpu_t
*clock_cpu_list
; /* used by clock to walk CPUs */
110 cpu_t
*cpu_active
; /* list of active CPUs */
111 static cpuset_t cpu_available
; /* set of available CPUs */
112 cpuset_t cpu_seqid_inuse
; /* which cpu_seqids are in use */
114 cpu_t
**cpu_seq
; /* ptrs to CPUs, indexed by seq_id */
117 * max_ncpus keeps the max cpus the system can have. Initially
118 * it's NCPU, but since most archs scan the devtree for cpus
119 * fairly early on during boot, the real max can be known before
120 * ncpus is set (useful for early NCPU based allocations).
122 int max_ncpus
= NCPU
;
124 * platforms that set max_ncpus to maxiumum number of cpus that can be
125 * dynamically added will set boot_max_ncpus to the number of cpus found
126 * at device tree scan time during boot.
128 int boot_max_ncpus
= -1;
131 * Maximum possible CPU id. This can never be >= NCPU since NCPU is
132 * used to size arrays that are indexed by CPU id.
134 processorid_t max_cpuid
= NCPU
- 1;
137 * Maximum cpu_seqid was given. This number can only grow and never shrink. It
138 * can be used to optimize NCPU loops to avoid going through CPUs which were
141 processorid_t max_cpu_seqid_ever
= 0;
144 int ncpus_online
= 1;
147 * CPU that we're trying to offline. Protected by cpu_lock.
152 * Can be raised to suppress further weakbinding, which are instead
153 * satisfied by disabling preemption. Must be raised/lowered under cpu_lock,
154 * while individual thread weakbinding synchronization is done under thread
157 int weakbindingbarrier
;
160 * Variables used in pause_cpus().
162 static volatile char safe_list
[NCPU
];
164 static struct _cpu_pause_info
{
165 int cp_spl
; /* spl saved in pause_cpus() */
166 volatile int cp_go
; /* Go signal sent after all ready */
167 int cp_count
; /* # of CPUs to pause */
168 ksema_t cp_sem
; /* synch pause_cpus & cpu_pause */
169 kthread_id_t cp_paused
;
170 void *(*cp_func
)(void *);
173 static kmutex_t pause_free_mutex
;
174 static kcondvar_t pause_free_cv
;
177 static struct cpu_sys_stats_ks_data
{
178 kstat_named_t cpu_ticks_idle
;
179 kstat_named_t cpu_ticks_user
;
180 kstat_named_t cpu_ticks_kernel
;
181 kstat_named_t cpu_ticks_wait
;
182 kstat_named_t cpu_nsec_idle
;
183 kstat_named_t cpu_nsec_user
;
184 kstat_named_t cpu_nsec_kernel
;
185 kstat_named_t cpu_nsec_dtrace
;
186 kstat_named_t cpu_nsec_intr
;
187 kstat_named_t cpu_load_intr
;
188 kstat_named_t wait_ticks_io
;
189 kstat_named_t dtrace_probes
;
191 kstat_named_t bwrite
;
193 kstat_named_t lwrite
;
194 kstat_named_t phread
;
195 kstat_named_t phwrite
;
196 kstat_named_t pswitch
;
199 kstat_named_t syscall
;
200 kstat_named_t sysread
;
201 kstat_named_t syswrite
;
202 kstat_named_t sysfork
;
203 kstat_named_t sysvfork
;
204 kstat_named_t sysexec
;
205 kstat_named_t readch
;
206 kstat_named_t writech
;
207 kstat_named_t rcvint
;
208 kstat_named_t xmtint
;
209 kstat_named_t mdmint
;
216 kstat_named_t ufsiget
;
217 kstat_named_t ufsdirblk
;
218 kstat_named_t ufsipage
;
219 kstat_named_t ufsinopage
;
220 kstat_named_t procovf
;
221 kstat_named_t intrthread
;
222 kstat_named_t intrblk
;
223 kstat_named_t intrunpin
;
224 kstat_named_t idlethread
;
225 kstat_named_t inv_swtch
;
226 kstat_named_t nthreads
;
227 kstat_named_t cpumigrate
;
228 kstat_named_t xcalls
;
229 kstat_named_t mutex_adenters
;
230 kstat_named_t rw_rdfails
;
231 kstat_named_t rw_wrfails
;
232 kstat_named_t modload
;
233 kstat_named_t modunload
;
234 kstat_named_t bawrite
;
235 kstat_named_t iowait
;
236 } cpu_sys_stats_ks_data_template
= {
237 { "cpu_ticks_idle", KSTAT_DATA_UINT64
},
238 { "cpu_ticks_user", KSTAT_DATA_UINT64
},
239 { "cpu_ticks_kernel", KSTAT_DATA_UINT64
},
240 { "cpu_ticks_wait", KSTAT_DATA_UINT64
},
241 { "cpu_nsec_idle", KSTAT_DATA_UINT64
},
242 { "cpu_nsec_user", KSTAT_DATA_UINT64
},
243 { "cpu_nsec_kernel", KSTAT_DATA_UINT64
},
244 { "cpu_nsec_dtrace", KSTAT_DATA_UINT64
},
245 { "cpu_nsec_intr", KSTAT_DATA_UINT64
},
246 { "cpu_load_intr", KSTAT_DATA_UINT64
},
247 { "wait_ticks_io", KSTAT_DATA_UINT64
},
248 { "dtrace_probes", KSTAT_DATA_UINT64
},
249 { "bread", KSTAT_DATA_UINT64
},
250 { "bwrite", KSTAT_DATA_UINT64
},
251 { "lread", KSTAT_DATA_UINT64
},
252 { "lwrite", KSTAT_DATA_UINT64
},
253 { "phread", KSTAT_DATA_UINT64
},
254 { "phwrite", KSTAT_DATA_UINT64
},
255 { "pswitch", KSTAT_DATA_UINT64
},
256 { "trap", KSTAT_DATA_UINT64
},
257 { "intr", KSTAT_DATA_UINT64
},
258 { "syscall", KSTAT_DATA_UINT64
},
259 { "sysread", KSTAT_DATA_UINT64
},
260 { "syswrite", KSTAT_DATA_UINT64
},
261 { "sysfork", KSTAT_DATA_UINT64
},
262 { "sysvfork", KSTAT_DATA_UINT64
},
263 { "sysexec", KSTAT_DATA_UINT64
},
264 { "readch", KSTAT_DATA_UINT64
},
265 { "writech", KSTAT_DATA_UINT64
},
266 { "rcvint", KSTAT_DATA_UINT64
},
267 { "xmtint", KSTAT_DATA_UINT64
},
268 { "mdmint", KSTAT_DATA_UINT64
},
269 { "rawch", KSTAT_DATA_UINT64
},
270 { "canch", KSTAT_DATA_UINT64
},
271 { "outch", KSTAT_DATA_UINT64
},
272 { "msg", KSTAT_DATA_UINT64
},
273 { "sema", KSTAT_DATA_UINT64
},
274 { "namei", KSTAT_DATA_UINT64
},
275 { "ufsiget", KSTAT_DATA_UINT64
},
276 { "ufsdirblk", KSTAT_DATA_UINT64
},
277 { "ufsipage", KSTAT_DATA_UINT64
},
278 { "ufsinopage", KSTAT_DATA_UINT64
},
279 { "procovf", KSTAT_DATA_UINT64
},
280 { "intrthread", KSTAT_DATA_UINT64
},
281 { "intrblk", KSTAT_DATA_UINT64
},
282 { "intrunpin", KSTAT_DATA_UINT64
},
283 { "idlethread", KSTAT_DATA_UINT64
},
284 { "inv_swtch", KSTAT_DATA_UINT64
},
285 { "nthreads", KSTAT_DATA_UINT64
},
286 { "cpumigrate", KSTAT_DATA_UINT64
},
287 { "xcalls", KSTAT_DATA_UINT64
},
288 { "mutex_adenters", KSTAT_DATA_UINT64
},
289 { "rw_rdfails", KSTAT_DATA_UINT64
},
290 { "rw_wrfails", KSTAT_DATA_UINT64
},
291 { "modload", KSTAT_DATA_UINT64
},
292 { "modunload", KSTAT_DATA_UINT64
},
293 { "bawrite", KSTAT_DATA_UINT64
},
294 { "iowait", KSTAT_DATA_UINT64
},
297 static struct cpu_vm_stats_ks_data
{
299 kstat_named_t pgfrec
;
301 kstat_named_t pgpgin
;
303 kstat_named_t pgpgout
;
304 kstat_named_t swapin
;
305 kstat_named_t pgswapin
;
306 kstat_named_t swapout
;
307 kstat_named_t pgswapout
;
312 kstat_named_t hat_fault
;
313 kstat_named_t as_fault
;
314 kstat_named_t maj_fault
;
315 kstat_named_t cow_fault
;
316 kstat_named_t prot_fault
;
317 kstat_named_t softlock
;
318 kstat_named_t kernel_asflt
;
319 kstat_named_t pgrrun
;
320 kstat_named_t execpgin
;
321 kstat_named_t execpgout
;
322 kstat_named_t execfree
;
323 kstat_named_t anonpgin
;
324 kstat_named_t anonpgout
;
325 kstat_named_t anonfree
;
326 kstat_named_t fspgin
;
327 kstat_named_t fspgout
;
328 kstat_named_t fsfree
;
329 } cpu_vm_stats_ks_data_template
= {
330 { "pgrec", KSTAT_DATA_UINT64
},
331 { "pgfrec", KSTAT_DATA_UINT64
},
332 { "pgin", KSTAT_DATA_UINT64
},
333 { "pgpgin", KSTAT_DATA_UINT64
},
334 { "pgout", KSTAT_DATA_UINT64
},
335 { "pgpgout", KSTAT_DATA_UINT64
},
336 { "swapin", KSTAT_DATA_UINT64
},
337 { "pgswapin", KSTAT_DATA_UINT64
},
338 { "swapout", KSTAT_DATA_UINT64
},
339 { "pgswapout", KSTAT_DATA_UINT64
},
340 { "zfod", KSTAT_DATA_UINT64
},
341 { "dfree", KSTAT_DATA_UINT64
},
342 { "scan", KSTAT_DATA_UINT64
},
343 { "rev", KSTAT_DATA_UINT64
},
344 { "hat_fault", KSTAT_DATA_UINT64
},
345 { "as_fault", KSTAT_DATA_UINT64
},
346 { "maj_fault", KSTAT_DATA_UINT64
},
347 { "cow_fault", KSTAT_DATA_UINT64
},
348 { "prot_fault", KSTAT_DATA_UINT64
},
349 { "softlock", KSTAT_DATA_UINT64
},
350 { "kernel_asflt", KSTAT_DATA_UINT64
},
351 { "pgrrun", KSTAT_DATA_UINT64
},
352 { "execpgin", KSTAT_DATA_UINT64
},
353 { "execpgout", KSTAT_DATA_UINT64
},
354 { "execfree", KSTAT_DATA_UINT64
},
355 { "anonpgin", KSTAT_DATA_UINT64
},
356 { "anonpgout", KSTAT_DATA_UINT64
},
357 { "anonfree", KSTAT_DATA_UINT64
},
358 { "fspgin", KSTAT_DATA_UINT64
},
359 { "fspgout", KSTAT_DATA_UINT64
},
360 { "fsfree", KSTAT_DATA_UINT64
},
364 * Force the specified thread to migrate to the appropriate processor.
365 * Called with thread lock held, returns with it dropped.
368 force_thread_migrate(kthread_id_t tp
)
370 ASSERT(THREAD_LOCK_HELD(tp
));
371 if (tp
== curthread
) {
372 THREAD_TRANSITION(tp
);
374 thread_unlock_nopreempt(tp
);
377 if (tp
->t_state
== TS_ONPROC
) {
379 } else if (tp
->t_state
== TS_RUN
) {
388 * Set affinity for a specified CPU.
389 * A reference count is incremented and the affinity is held until the
390 * reference count is decremented to zero by thread_affinity_clear().
391 * This is so regions of code requiring affinity can be nested.
392 * Caller needs to ensure that cpu_id remains valid, which can be
393 * done by holding cpu_lock across this call, unless the caller
394 * specifies CPU_CURRENT in which case the cpu_lock will be acquired
395 * by thread_affinity_set and CPU->cpu_id will be the target CPU.
398 thread_affinity_set(kthread_id_t t
, int cpu_id
)
403 ASSERT(!(t
== curthread
&& t
->t_weakbound_cpu
!= NULL
));
405 if ((c
= cpu_id
) == CPU_CURRENT
) {
406 mutex_enter(&cpu_lock
);
407 cpu_id
= CPU
->cpu_id
;
410 * We should be asserting that cpu_lock is held here, but
411 * the NCA code doesn't acquire it. The following assert
412 * should be uncommented when the NCA code is fixed.
414 * ASSERT(MUTEX_HELD(&cpu_lock));
416 ASSERT((cpu_id
>= 0) && (cpu_id
< NCPU
));
418 ASSERT(cp
!= NULL
); /* user must provide a good cpu_id */
420 * If there is already a hard affinity requested, and this affinity
421 * conflicts with that, panic.
424 if (t
->t_affinitycnt
> 0 && t
->t_bound_cpu
!= cp
) {
425 panic("affinity_set: setting %p but already bound to %p",
426 (void *)cp
, (void *)t
->t_bound_cpu
);
432 * Make sure we're running on the right CPU.
434 if (cp
!= t
->t_cpu
|| t
!= curthread
) {
435 force_thread_migrate(t
); /* drops thread lock */
440 if (c
== CPU_CURRENT
)
441 mutex_exit(&cpu_lock
);
445 * Wrapper for backward compatibility.
448 affinity_set(int cpu_id
)
450 thread_affinity_set(curthread
, cpu_id
);
454 * Decrement the affinity reservation count and if it becomes zero,
455 * clear the CPU affinity for the current thread, or set it to the user's
456 * software binding request.
459 thread_affinity_clear(kthread_id_t t
)
461 register processorid_t binding
;
464 if (--t
->t_affinitycnt
== 0) {
465 if ((binding
= t
->t_bind_cpu
) == PBIND_NONE
) {
467 * Adjust disp_max_unbound_pri if necessary.
469 disp_adjust_unbound_pri(t
);
470 t
->t_bound_cpu
= NULL
;
471 if (t
->t_cpu
->cpu_part
!= t
->t_cpupart
) {
472 force_thread_migrate(t
);
476 t
->t_bound_cpu
= cpu
[binding
];
478 * Make sure the thread is running on the bound CPU.
480 if (t
->t_cpu
!= t
->t_bound_cpu
) {
481 force_thread_migrate(t
);
482 return; /* already dropped lock */
490 * Wrapper for backward compatibility.
495 thread_affinity_clear(curthread
);
499 * Weak cpu affinity. Bind to the "current" cpu for short periods
500 * of time during which the thread must not block (but may be preempted).
501 * Use this instead of kpreempt_disable() when it is only "no migration"
502 * rather than "no preemption" semantics that are required - disabling
503 * preemption holds higher priority threads off of cpu and if the
504 * operation that is protected is more than momentary this is not good
507 * Weakly bound threads will not prevent a cpu from being offlined -
508 * we'll only run them on the cpu to which they are weakly bound but
509 * (because they do not block) we'll always be able to move them on to
510 * another cpu at offline time if we give them just a short moment to
511 * run during which they will unbind. To give a cpu a chance of offlining,
512 * however, we require a barrier to weak bindings that may be raised for a
513 * given cpu (offline/move code may set this and then wait a short time for
514 * existing weak bindings to drop); the cpu_inmotion pointer is that barrier.
516 * There are few restrictions on the calling context of thread_nomigrate.
517 * The caller must not hold the thread lock. Calls may be nested.
519 * After weakbinding a thread must not perform actions that may block.
520 * In particular it must not call thread_affinity_set; calling that when
521 * already weakbound is nonsensical anyway.
523 * If curthread is prevented from migrating for other reasons
524 * (kernel preemption disabled; high pil; strongly bound; interrupt thread)
525 * then the weak binding will succeed even if this cpu is the target of an
526 * offline/move request.
529 thread_nomigrate(void)
532 kthread_id_t t
= curthread
;
539 * A highlevel interrupt must not modify t_nomigrate or
540 * t_weakbound_cpu of the thread it has interrupted. A lowlevel
541 * interrupt thread cannot migrate and we can avoid the
542 * thread_lock call below by short-circuiting here. In either
543 * case we can just return since no migration is possible and
544 * the condition will persist (ie, when we test for these again
545 * in thread_allowmigrate they can't have changed). Migration
546 * is also impossible if we're at or above DISP_LEVEL pil.
548 if (CPU_ON_INTR(cp
) || t
->t_flag
& T_INTR_THREAD
||
549 getpil() >= DISP_LEVEL
) {
555 * We must be consistent with existing weak bindings. Since we
556 * may be interrupted between the increment of t_nomigrate and
557 * the store to t_weakbound_cpu below we cannot assume that
558 * t_weakbound_cpu will be set if t_nomigrate is. Note that we
559 * cannot assert t_weakbound_cpu == t_bind_cpu since that is not
562 if (t
->t_nomigrate
&& t
->t_weakbound_cpu
&& t
->t_weakbound_cpu
!= cp
) {
564 panic("thread_nomigrate: binding to %p but already "
565 "bound to %p", (void *)cp
,
566 (void *)t
->t_weakbound_cpu
);
570 * At this point we have preemption disabled and we don't yet hold
571 * the thread lock. So it's possible that somebody else could
572 * set t_bind_cpu here and not be able to force us across to the
573 * new cpu (since we have preemption disabled).
575 thread_lock(curthread
);
578 * If further weak bindings are being (temporarily) suppressed then
579 * we'll settle for disabling kernel preemption (which assures
580 * no migration provided the thread does not block which it is
581 * not allowed to if using thread_nomigrate). We must remember
582 * this disposition so we can take appropriate action in
583 * thread_allowmigrate. If this is a nested call and the
584 * thread is already weakbound then fall through as normal.
585 * We remember the decision to settle for kpreempt_disable through
586 * negative nesting counting in t_nomigrate. Once a thread has had one
587 * weakbinding request satisfied in this way any further (nested)
588 * requests will continue to be satisfied in the same way,
589 * even if weak bindings have recommenced.
591 if (t
->t_nomigrate
< 0 || weakbindingbarrier
&& t
->t_nomigrate
== 0) {
593 thread_unlock(curthread
);
594 return; /* with kpreempt_disable still active */
598 * We hold thread_lock so t_bind_cpu cannot change. We could,
599 * however, be running on a different cpu to which we are t_bound_cpu
600 * to (as explained above). If we grant the weak binding request
601 * in that case then the dispatcher must favour our weak binding
602 * over our strong (in which case, just as when preemption is
603 * disabled, we can continue to run on a cpu other than the one to
604 * which we are strongbound; the difference in this case is that
605 * this thread can be preempted and so can appear on the dispatch
606 * queues of a cpu other than the one it is strongbound to).
608 * If the cpu we are running on does not appear to be a current
609 * offline target (we check cpu_inmotion to determine this - since
610 * we don't hold cpu_lock we may not see a recent store to that,
611 * so it's possible that we at times can grant a weak binding to a
612 * cpu that is an offline target, but that one request will not
613 * prevent the offline from succeeding) then we will always grant
614 * the weak binding request. This includes the case above where
615 * we grant a weakbinding not commensurate with our strong binding.
617 * If our cpu does appear to be an offline target then we're inclined
618 * not to grant the weakbinding request just yet - we'd prefer to
619 * migrate to another cpu and grant the request there. The
620 * exceptions are those cases where going through preemption code
621 * will not result in us changing cpu:
623 * . interrupts have already bypassed this case (see above)
624 * . we are already weakbound to this cpu (dispatcher code will
625 * always return us to the weakbound cpu)
626 * . preemption was disabled even before we disabled it above
627 * . we are strongbound to this cpu (if we're strongbound to
628 * another and not yet running there the trip through the
629 * dispatcher will move us to the strongbound cpu and we
630 * will grant the weak binding there)
632 if (cp
!= cpu_inmotion
|| t
->t_nomigrate
> 0 || t
->t_preempt
> 1 ||
633 t
->t_bound_cpu
== cp
) {
635 * Don't be tempted to store to t_weakbound_cpu only on
636 * the first nested bind request - if we're interrupted
637 * after the increment of t_nomigrate and before the
638 * store to t_weakbound_cpu and the interrupt calls
639 * thread_nomigrate then the assertion in thread_allowmigrate
643 t
->t_weakbound_cpu
= cp
;
645 thread_unlock(curthread
);
647 * Now that we have dropped the thread_lock another thread
648 * can set our t_weakbound_cpu, and will try to migrate us
649 * to the strongbound cpu (which will not be prevented by
650 * preemption being disabled since we're about to enable
651 * preemption). We have granted the weakbinding to the current
652 * cpu, so again we are in the position that is is is possible
653 * that our weak and strong bindings differ. Again this
654 * is catered for by dispatcher code which will favour our
660 * Move to another cpu before granting the request by
661 * forcing this thread through preemption code. When we
662 * get to set{front,back}dq called from CL_PREEMPT()
663 * cpu_choose() will be used to select a cpu to queue
664 * us on - that will see cpu_inmotion and take
665 * steps to avoid returning us to this cpu.
667 cp
->cpu_kprunrun
= 1;
668 thread_unlock(curthread
);
669 kpreempt_enable(); /* will call preempt() */
675 thread_allowmigrate(void)
677 kthread_id_t t
= curthread
;
679 ASSERT(t
->t_weakbound_cpu
== CPU
||
680 (t
->t_nomigrate
< 0 && t
->t_preempt
> 0) ||
681 CPU_ON_INTR(CPU
) || t
->t_flag
& T_INTR_THREAD
||
682 getpil() >= DISP_LEVEL
);
684 if (CPU_ON_INTR(CPU
) || (t
->t_flag
& T_INTR_THREAD
) ||
685 getpil() >= DISP_LEVEL
)
688 if (t
->t_nomigrate
< 0) {
690 * This thread was granted "weak binding" in the
691 * stronger form of kernel preemption disabling.
692 * Undo a level of nesting for both t_nomigrate
697 } else if (--t
->t_nomigrate
== 0) {
699 * Time to drop the weak binding. We need to cater
700 * for the case where we're weakbound to a different
701 * cpu than that to which we're strongbound (a very
702 * temporary arrangement that must only persist until
703 * weak binding drops). We don't acquire thread_lock
704 * here so even as this code executes t_bound_cpu
705 * may be changing. So we disable preemption and
706 * a) in the case that t_bound_cpu changes while we
707 * have preemption disabled kprunrun will be set
708 * asynchronously, and b) if before disabling
709 * preemption we were already on a different cpu to
710 * our t_bound_cpu then we set kprunrun ourselves
711 * to force a trip through the dispatcher when
712 * preemption is enabled.
715 if (t
->t_bound_cpu
&&
716 t
->t_weakbound_cpu
!= t
->t_bound_cpu
)
717 CPU
->cpu_kprunrun
= 1;
718 t
->t_weakbound_cpu
= NULL
;
725 * weakbinding_stop can be used to temporarily cause weakbindings made
726 * with thread_nomigrate to be satisfied through the stronger action of
727 * kpreempt_disable. weakbinding_start recommences normal weakbinding.
731 weakbinding_stop(void)
733 ASSERT(MUTEX_HELD(&cpu_lock
));
734 weakbindingbarrier
= 1;
735 membar_producer(); /* make visible before subsequent thread_lock */
739 weakbinding_start(void)
741 ASSERT(MUTEX_HELD(&cpu_lock
));
742 weakbindingbarrier
= 0;
751 * This routine is called to place the CPUs in a safe place so that
752 * one of them can be taken off line or placed on line. What we are
753 * trying to do here is prevent a thread from traversing the list
754 * of active CPUs while we are changing it or from getting placed on
755 * the run queue of a CPU that has just gone off line. We do this by
756 * creating a thread with the highest possible prio for each CPU and
757 * having it call this routine. The advantage of this method is that
758 * we can eliminate all checks for CPU_ACTIVE in the disp routines.
759 * This makes disp faster at the expense of making p_online() slower
760 * which is a good trade off.
766 struct _cpu_pause_info
*cpi
= &cpu_pause_info
;
767 volatile char *safe
= &safe_list
[index
];
770 ASSERT((curthread
->t_bound_cpu
!= NULL
) || (*safe
== PAUSE_DIE
));
772 while (*safe
!= PAUSE_DIE
) {
774 membar_enter(); /* make sure stores are flushed */
775 sema_v(&cpi
->cp_sem
); /* signal requesting thread */
778 * Wait here until all pause threads are running. That
779 * indicates that it's safe to do the spl. Until
780 * cpu_pause_info.cp_go is set, we don't want to spl
781 * because that might block clock interrupts needed
782 * to preempt threads on other CPUs.
784 while (cpi
->cp_go
== 0)
787 * Even though we are at the highest disp prio, we need
788 * to block out all interrupts below LOCK_LEVEL so that
789 * an intr doesn't come in, wake up a thread, and call
790 * setbackdq/setfrontdq.
794 * if cp_func has been set then call it using index as the
795 * argument, currently only used by cpr_suspend_cpus().
796 * This function is used as the code to execute on the
797 * "paused" cpu's when a machine comes out of a sleep state
798 * and CPU's were powered off. (could also be used for
799 * hotplugging CPU's).
801 if (cpi
->cp_func
!= NULL
)
802 (*cpi
->cp_func
)((void *)lindex
);
804 mach_cpu_pause(safe
);
808 * Waiting is at an end. Switch out of cpu_pause
809 * loop and resume useful work.
814 mutex_enter(&pause_free_mutex
);
816 cv_broadcast(&pause_free_cv
);
817 mutex_exit(&pause_free_mutex
);
821 * Allow the cpus to start running again.
828 ASSERT(MUTEX_HELD(&cpu_lock
));
829 ASSERT(cpu_pause_info
.cp_paused
);
830 cpu_pause_info
.cp_paused
= NULL
;
831 for (i
= 0; i
< NCPU
; i
++)
832 safe_list
[i
] = PAUSE_IDLE
;
833 membar_enter(); /* make sure stores are flushed */
835 splx(cpu_pause_info
.cp_spl
);
840 * Allocate a pause thread for a CPU.
843 cpu_pause_alloc(cpu_t
*cp
)
846 long cpun
= cp
->cpu_id
;
849 * Note, v.v_nglobpris will not change value as long as I hold
852 t
= thread_create(NULL
, 0, cpu_pause
, (void *)cpun
,
853 0, &p0
, TS_STOPPED
, v
.v_nglobpris
- 1);
856 t
->t_disp_queue
= cp
->cpu_disp
;
857 t
->t_affinitycnt
= 1;
860 cp
->cpu_pause_thread
= t
;
862 * Registering a thread in the callback table is usually done
863 * in the initialization code of the thread. In this
864 * case, we do it right after thread creation because the
865 * thread itself may never run, and we need to register the
866 * fact that it is safe for cpr suspend.
868 CALLB_CPR_INIT_SAFE(t
, "cpu_pause");
872 * Free a pause thread for a CPU.
875 cpu_pause_free(cpu_t
*cp
)
878 int cpun
= cp
->cpu_id
;
880 ASSERT(MUTEX_HELD(&cpu_lock
));
882 * We have to get the thread and tell him to die.
884 if ((t
= cp
->cpu_pause_thread
) == NULL
) {
885 ASSERT(safe_list
[cpun
] == PAUSE_IDLE
);
889 t
->t_cpu
= CPU
; /* disp gets upset if last cpu is quiesced. */
890 t
->t_bound_cpu
= NULL
; /* Must un-bind; cpu may not be running. */
891 t
->t_pri
= v
.v_nglobpris
- 1;
892 ASSERT(safe_list
[cpun
] == PAUSE_IDLE
);
893 safe_list
[cpun
] = PAUSE_DIE
;
894 THREAD_TRANSITION(t
);
896 thread_unlock_nopreempt(t
);
899 * If we don't wait for the thread to actually die, it may try to
900 * run on the wrong cpu as part of an actual call to pause_cpus().
902 mutex_enter(&pause_free_mutex
);
903 while (safe_list
[cpun
] != PAUSE_DEAD
) {
904 cv_wait(&pause_free_cv
, &pause_free_mutex
);
906 mutex_exit(&pause_free_mutex
);
907 safe_list
[cpun
] = PAUSE_IDLE
;
909 cp
->cpu_pause_thread
= NULL
;
913 * Initialize basic structures for pausing CPUs.
918 sema_init(&cpu_pause_info
.cp_sem
, 0, NULL
, SEMA_DEFAULT
, NULL
);
920 * Create initial CPU pause thread.
922 cpu_pause_alloc(CPU
);
926 * Start the threads used to pause another CPU.
929 cpu_pause_start(processorid_t cpu_id
)
934 for (i
= 0; i
< NCPU
; i
++) {
939 if (!CPU_IN_SET(cpu_available
, i
) || (i
== cpu_id
)) {
940 safe_list
[i
] = PAUSE_WAIT
;
945 * Skip CPU if it is quiesced or not yet started.
947 if ((cp
->cpu_flags
& (CPU_QUIESCED
| CPU_READY
)) != CPU_READY
) {
948 safe_list
[i
] = PAUSE_WAIT
;
953 * Start this CPU's pause thread.
955 t
= cp
->cpu_pause_thread
;
958 * Reset the priority, since nglobpris may have
959 * changed since the thread was created, if someone
960 * has loaded the RT (or some other) scheduling
963 t
->t_pri
= v
.v_nglobpris
- 1;
964 THREAD_TRANSITION(t
);
966 thread_unlock_nopreempt(t
);
974 * Pause all of the CPUs except the one we are on by creating a high
975 * priority thread bound to those CPUs.
977 * Note that one must be extremely careful regarding code
978 * executed while CPUs are paused. Since a CPU may be paused
979 * while a thread scheduling on that CPU is holding an adaptive
980 * lock, code executed with CPUs paused must not acquire adaptive
981 * (or low-level spin) locks. Also, such code must not block,
982 * since the thread that is supposed to initiate the wakeup may
985 * With a few exceptions, the restrictions on code executed with CPUs
986 * paused match those for code executed at high-level interrupt
990 pause_cpus(cpu_t
*off_cp
, void *(*func
)(void *))
992 processorid_t cpu_id
;
994 struct _cpu_pause_info
*cpi
= &cpu_pause_info
;
996 ASSERT(MUTEX_HELD(&cpu_lock
));
997 ASSERT(cpi
->cp_paused
== NULL
);
1000 for (i
= 0; i
< NCPU
; i
++)
1001 safe_list
[i
] = PAUSE_IDLE
;
1004 cpi
->cp_func
= func
;
1007 * If running on the cpu that is going offline, get off it.
1008 * This is so that it won't be necessary to rechoose a CPU
1012 cpu_id
= off_cp
->cpu_next_part
->cpu_id
;
1014 cpu_id
= CPU
->cpu_id
;
1015 affinity_set(cpu_id
);
1018 * Start the pause threads and record how many were started
1020 cpi
->cp_count
= cpu_pause_start(cpu_id
);
1023 * Now wait for all CPUs to be running the pause thread.
1025 while (cpi
->cp_count
> 0) {
1027 * Spin reading the count without grabbing the disp
1028 * lock to make sure we don't prevent the pause
1029 * threads from getting the lock.
1031 while (sema_held(&cpi
->cp_sem
))
1033 if (sema_tryp(&cpi
->cp_sem
))
1036 cpi
->cp_go
= 1; /* all have reached cpu_pause */
1039 * Now wait for all CPUs to spl. (Transition from PAUSE_READY
1042 for (i
= 0; i
< NCPU
; i
++) {
1043 while (safe_list
[i
] != PAUSE_WAIT
)
1046 cpi
->cp_spl
= splhigh(); /* block dispatcher on this CPU */
1047 cpi
->cp_paused
= curthread
;
1051 * Check whether the current thread has CPUs paused
1056 if (cpu_pause_info
.cp_paused
!= NULL
) {
1057 ASSERT(cpu_pause_info
.cp_paused
== curthread
);
1064 cpu_get_all(processorid_t cpun
)
1066 ASSERT(MUTEX_HELD(&cpu_lock
));
1068 if (cpun
>= NCPU
|| cpun
< 0 || !CPU_IN_SET(cpu_available
, cpun
))
1074 * Check whether cpun is a valid processor id and whether it should be
1075 * visible from the current zone. If it is, return a pointer to the
1076 * associated CPU structure.
1079 cpu_get(processorid_t cpun
)
1083 ASSERT(MUTEX_HELD(&cpu_lock
));
1084 c
= cpu_get_all(cpun
);
1085 if (c
!= NULL
&& !INGLOBALZONE(curproc
) && pool_pset_enabled() &&
1086 zone_pset_get(curproc
->p_zone
) != cpupart_query_cpu(c
))
1092 * The following functions should be used to check CPU states in the kernel.
1093 * They should be invoked with cpu_lock held. Kernel subsystems interested
1094 * in CPU states should *not* use cpu_get_state() and various P_ONLINE/etc
1095 * states. Those are for user-land (and system call) use only.
1099 * Determine whether the CPU is online and handling interrupts.
1102 cpu_is_online(cpu_t
*cpu
)
1104 ASSERT(MUTEX_HELD(&cpu_lock
));
1105 return (cpu_flagged_online(cpu
->cpu_flags
));
1109 * Determine whether the CPU is offline (this includes spare and faulted).
1112 cpu_is_offline(cpu_t
*cpu
)
1114 ASSERT(MUTEX_HELD(&cpu_lock
));
1115 return (cpu_flagged_offline(cpu
->cpu_flags
));
1119 * Determine whether the CPU is powered off.
1122 cpu_is_poweredoff(cpu_t
*cpu
)
1124 ASSERT(MUTEX_HELD(&cpu_lock
));
1125 return (cpu_flagged_poweredoff(cpu
->cpu_flags
));
1129 * Determine whether the CPU is handling interrupts.
1132 cpu_is_nointr(cpu_t
*cpu
)
1134 ASSERT(MUTEX_HELD(&cpu_lock
));
1135 return (cpu_flagged_nointr(cpu
->cpu_flags
));
1139 * Determine whether the CPU is active (scheduling threads).
1142 cpu_is_active(cpu_t
*cpu
)
1144 ASSERT(MUTEX_HELD(&cpu_lock
));
1145 return (cpu_flagged_active(cpu
->cpu_flags
));
1149 * Same as above, but these require cpu_flags instead of cpu_t pointers.
1152 cpu_flagged_online(cpu_flag_t cpu_flags
)
1154 return (cpu_flagged_active(cpu_flags
) &&
1155 (cpu_flags
& CPU_ENABLE
));
1159 cpu_flagged_offline(cpu_flag_t cpu_flags
)
1161 return (((cpu_flags
& CPU_POWEROFF
) == 0) &&
1162 ((cpu_flags
& (CPU_READY
| CPU_OFFLINE
)) != CPU_READY
));
1166 cpu_flagged_poweredoff(cpu_flag_t cpu_flags
)
1168 return ((cpu_flags
& CPU_POWEROFF
) == CPU_POWEROFF
);
1172 cpu_flagged_nointr(cpu_flag_t cpu_flags
)
1174 return (cpu_flagged_active(cpu_flags
) &&
1175 (cpu_flags
& CPU_ENABLE
) == 0);
1179 cpu_flagged_active(cpu_flag_t cpu_flags
)
1181 return (((cpu_flags
& (CPU_POWEROFF
| CPU_FAULTED
| CPU_SPARE
)) == 0) &&
1182 ((cpu_flags
& (CPU_READY
| CPU_OFFLINE
)) == CPU_READY
));
1186 * Bring the indicated CPU online.
1189 cpu_online(cpu_t
*cp
)
1194 * Handle on-line request.
1195 * This code must put the new CPU on the active list before
1196 * starting it because it will not be paused, and will start
1197 * using the active list immediately. The real start occurs
1198 * when the CPU_QUIESCED flag is turned off.
1201 ASSERT(MUTEX_HELD(&cpu_lock
));
1204 * Put all the cpus into a known safe place.
1205 * No mutexes can be entered while CPUs are paused.
1207 error
= mp_cpu_start(cp
); /* arch-dep hook */
1209 pg_cpupart_in(cp
, cp
->cpu_part
);
1210 pause_cpus(NULL
, NULL
);
1211 cpu_add_active_internal(cp
);
1212 if (cp
->cpu_flags
& CPU_FAULTED
) {
1213 cp
->cpu_flags
&= ~CPU_FAULTED
;
1214 mp_cpu_faulted_exit(cp
);
1216 cp
->cpu_flags
&= ~(CPU_QUIESCED
| CPU_OFFLINE
| CPU_FROZEN
|
1218 CPU_NEW_GENERATION(cp
);
1220 cpu_stats_kstat_create(cp
);
1221 cpu_create_intrstat(cp
);
1222 lgrp_kstat_create(cp
);
1223 cpu_state_change_notify(cp
->cpu_id
, CPU_ON
);
1224 cpu_intr_enable(cp
); /* arch-dep hook */
1225 cpu_state_change_notify(cp
->cpu_id
, CPU_INTR_ON
);
1229 * This has to be called only after cyclic_online(). This
1230 * function uses cyclics.
1232 callout_cpu_online(cp
);
1233 poke_cpu(cp
->cpu_id
);
1240 * Take the indicated CPU offline.
1243 cpu_offline(cpu_t
*cp
, int flags
)
1250 int callout_off
= 0;
1253 int (*bound_func
)(struct cpu
*, int);
1258 boolean_t unbind_all_threads
= (flags
& CPU_FORCED
) != 0;
1260 ASSERT(MUTEX_HELD(&cpu_lock
));
1263 * If we're going from faulted or spare to offline, just
1264 * clear these flags and update CPU state.
1266 if (cp
->cpu_flags
& (CPU_FAULTED
| CPU_SPARE
)) {
1267 if (cp
->cpu_flags
& CPU_FAULTED
) {
1268 cp
->cpu_flags
&= ~CPU_FAULTED
;
1269 mp_cpu_faulted_exit(cp
);
1271 cp
->cpu_flags
&= ~CPU_SPARE
;
1277 * Handle off-line request.
1281 * Don't offline last online CPU in partition
1283 if (ncpus_online
<= 1 || pp
->cp_ncpus
<= 1 || cpu_intr_count(cp
) < 2)
1286 * Unbind all soft-bound threads bound to our CPU and hard bound threads
1287 * if we were asked to.
1289 error
= cpu_unbind(cp
->cpu_id
, unbind_all_threads
);
1293 * We shouldn't be bound to this CPU ourselves.
1295 if (curthread
->t_bound_cpu
== cp
)
1299 * Tell interested parties that this CPU is going offline.
1301 CPU_NEW_GENERATION(cp
);
1302 cpu_state_change_notify(cp
->cpu_id
, CPU_OFF
);
1305 * Tell the PG subsystem that the CPU is leaving the partition
1307 pg_cpupart_out(cp
, pp
);
1310 * Take the CPU out of interrupt participation so we won't find
1311 * bound kernel threads. If the architecture cannot completely
1312 * shut off interrupts on the CPU, don't quiesce it, but don't
1313 * run anything but interrupt thread... this is indicated by
1314 * the CPU_OFFLINE flag being on but the CPU_QUIESCE flag being
1317 intr_enable
= cp
->cpu_flags
& CPU_ENABLE
;
1319 no_quiesce
= cpu_intr_disable(cp
);
1322 * Record that we are aiming to offline this cpu. This acts as
1323 * a barrier to further weak binding requests in thread_nomigrate
1324 * and also causes cpu_choose, disp_lowpri_cpu and setfrontdq to
1325 * lean away from this cpu. Further strong bindings are already
1326 * avoided since we hold cpu_lock. Since threads that are set
1327 * runnable around now and others coming off the target cpu are
1328 * directed away from the target, existing strong and weak bindings
1329 * (especially the latter) to the target cpu stand maximum chance of
1330 * being able to unbind during the short delay loop below (if other
1331 * unbound threads compete they may not see cpu in time to unbind
1332 * even if they would do so immediately.
1338 * Check for kernel threads (strong or weak) bound to that CPU.
1339 * Strongly bound threads may not unbind, and we'll have to return
1340 * EBUSY. Weakly bound threads should always disappear - we've
1341 * stopped more weak binding with cpu_inmotion and existing
1342 * bindings will drain imminently (they may not block). Nonetheless
1343 * we will wait for a fixed period for all bound threads to disappear.
1344 * Inactive interrupt threads are OK (they'll be in TS_FREE
1345 * state). If test finds some bound threads, wait a few ticks
1346 * to give short-lived threads (such as interrupts) chance to
1347 * complete. Note that if no_quiesce is set, i.e. this cpu
1348 * is required to service interrupts, then we take the route
1349 * that permits interrupt threads to be active (or bypassed).
1351 bound_func
= no_quiesce
? disp_bound_threads
: disp_bound_anythreads
;
1353 again
: for (loop_count
= 0; (*bound_func
)(cp
, 0); loop_count
++) {
1354 if (loop_count
>= 5) {
1355 error
= EBUSY
; /* some threads still bound */
1360 * If some threads were assigned, give them
1361 * a chance to complete or move.
1363 * This assumes that the clock_thread is not bound
1364 * to any CPU, because the clock_thread is needed to
1365 * do the delay(hz/100).
1367 * Note: we still hold the cpu_lock while waiting for
1368 * the next clock tick. This is OK since it isn't
1369 * needed for anything else except processor_bind(2),
1370 * and system initialization. If we drop the lock,
1371 * we would risk another p_online disabling the last
1377 if (error
== 0 && callout_off
== 0) {
1378 callout_cpu_offline(cp
);
1382 if (error
== 0 && cyclic_off
== 0) {
1383 if (!cyclic_offline(cp
)) {
1385 * We must have bound cyclics...
1394 * Call mp_cpu_stop() to perform any special operations
1395 * needed for this machine architecture to offline a CPU.
1398 error
= mp_cpu_stop(cp
); /* arch-dep hook */
1401 * If that all worked, take the CPU offline and decrement
1406 * Put all the cpus into a known safe place.
1407 * No mutexes can be entered while CPUs are paused.
1409 pause_cpus(cp
, NULL
);
1411 * Repeat the operation, if necessary, to make sure that
1412 * all outstanding low-level interrupts run to completion
1413 * before we set the CPU_QUIESCED flag. It's also possible
1414 * that a thread has weak bound to the cpu despite our raising
1415 * cpu_inmotion above since it may have loaded that
1416 * value before the barrier became visible (this would have
1417 * to be the thread that was on the target cpu at the time
1418 * we raised the barrier).
1420 if ((!no_quiesce
&& cp
->cpu_intr_actv
!= 0) ||
1421 (*bound_func
)(cp
, 1)) {
1423 (void) mp_cpu_start(cp
);
1426 ncp
= cp
->cpu_next_part
;
1427 cpu_lpl
= cp
->cpu_lpl
;
1428 ASSERT(cpu_lpl
!= NULL
);
1431 * Remove the CPU from the list of active CPUs.
1433 cpu_remove_active(cp
);
1436 * Walk the active process list and look for threads
1437 * whose home lgroup needs to be updated, or
1438 * the last CPU they run on is the one being offlined now.
1441 ASSERT(curthread
->t_cpu
!= cp
);
1442 for (p
= practive
; p
!= NULL
; p
= p
->p_next
) {
1452 ASSERT(t
->t_lpl
!= NULL
);
1454 * Taking last CPU in lpl offline
1455 * Rehome thread if it is in this lpl
1456 * Otherwise, update the count of how many
1457 * threads are in this CPU's lgroup but have
1461 if (cpu_lpl
->lpl_ncpu
== 0) {
1462 if (t
->t_lpl
== cpu_lpl
)
1466 else if (t
->t_lpl
->lpl_lgrpid
==
1467 cpu_lpl
->lpl_lgrpid
)
1470 ASSERT(t
->t_lpl
->lpl_ncpu
> 0);
1473 * Update CPU last ran on if it was this CPU
1475 if (t
->t_cpu
== cp
&& t
->t_bound_cpu
!= cp
)
1476 t
->t_cpu
= disp_lowpri_cpu(ncp
,
1477 t
->t_lpl
, t
->t_pri
, NULL
);
1478 ASSERT(t
->t_cpu
!= cp
|| t
->t_bound_cpu
== cp
||
1479 t
->t_weakbound_cpu
== cp
);
1482 } while (t
!= p
->p_tlist
);
1485 * Didn't find any threads in the same lgroup as this
1486 * CPU with a different lpl, so remove the lgroup from
1487 * the process lgroup bitmask.
1490 if (lgrp_diff_lpl
== 0)
1491 klgrpset_del(p
->p_lgrpset
, cpu_lpl
->lpl_lgrpid
);
1495 * Walk thread list looking for threads that need to be
1496 * rehomed, since there are some threads that are not in
1497 * their process's p_tlist.
1502 ASSERT(t
!= NULL
&& t
->t_lpl
!= NULL
);
1505 * Rehome threads with same lpl as this CPU when this
1506 * is the last CPU in the lpl.
1509 if ((cpu_lpl
->lpl_ncpu
== 0) && (t
->t_lpl
== cpu_lpl
))
1511 lgrp_choose(t
, t
->t_cpupart
), 1);
1513 ASSERT(t
->t_lpl
->lpl_ncpu
> 0);
1516 * Update CPU last ran on if it was this CPU
1519 if (t
->t_cpu
== cp
&& t
->t_bound_cpu
!= cp
) {
1520 t
->t_cpu
= disp_lowpri_cpu(ncp
,
1521 t
->t_lpl
, t
->t_pri
, NULL
);
1523 ASSERT(t
->t_cpu
!= cp
|| t
->t_bound_cpu
== cp
||
1524 t
->t_weakbound_cpu
== cp
);
1527 } while (t
!= curthread
);
1528 ASSERT((cp
->cpu_flags
& (CPU_FAULTED
| CPU_SPARE
)) == 0);
1529 cp
->cpu_flags
|= CPU_OFFLINE
;
1530 disp_cpu_inactive(cp
);
1532 cp
->cpu_flags
|= CPU_QUIESCED
;
1535 cpu_inmotion
= NULL
;
1537 cpu_stats_kstat_destroy(cp
);
1538 cpu_delete_intrstat(cp
);
1539 lgrp_kstat_destroy(cp
);
1543 cpu_inmotion
= NULL
;
1546 * If we failed, re-enable interrupts.
1547 * Do this even if cpu_intr_disable returned an error, because
1548 * it may have partially disabled interrupts.
1550 if (error
&& intr_enable
)
1551 cpu_intr_enable(cp
);
1554 * If we failed, but managed to offline the cyclic subsystem on this
1555 * CPU, bring it back online.
1557 if (error
&& cyclic_off
)
1561 * If we failed, but managed to offline callouts on this CPU,
1562 * bring it back online.
1564 if (error
&& callout_off
)
1565 callout_cpu_online(cp
);
1568 * If we failed, tell the PG subsystem that the CPU is back
1570 pg_cpupart_in(cp
, pp
);
1573 * If we failed, we need to notify everyone that this CPU is back on.
1576 CPU_NEW_GENERATION(cp
);
1577 cpu_state_change_notify(cp
->cpu_id
, CPU_ON
);
1578 cpu_state_change_notify(cp
->cpu_id
, CPU_INTR_ON
);
1585 * Mark the indicated CPU as faulted, taking it offline.
1588 cpu_faulted(cpu_t
*cp
, int flags
)
1592 ASSERT(MUTEX_HELD(&cpu_lock
));
1593 ASSERT(!cpu_is_poweredoff(cp
));
1595 if (cpu_is_offline(cp
)) {
1596 cp
->cpu_flags
&= ~CPU_SPARE
;
1597 cp
->cpu_flags
|= CPU_FAULTED
;
1598 mp_cpu_faulted_enter(cp
);
1603 if ((error
= cpu_offline(cp
, flags
)) == 0) {
1604 cp
->cpu_flags
|= CPU_FAULTED
;
1605 mp_cpu_faulted_enter(cp
);
1613 * Mark the indicated CPU as a spare, taking it offline.
1616 cpu_spare(cpu_t
*cp
, int flags
)
1620 ASSERT(MUTEX_HELD(&cpu_lock
));
1621 ASSERT(!cpu_is_poweredoff(cp
));
1623 if (cpu_is_offline(cp
)) {
1624 if (cp
->cpu_flags
& CPU_FAULTED
) {
1625 cp
->cpu_flags
&= ~CPU_FAULTED
;
1626 mp_cpu_faulted_exit(cp
);
1628 cp
->cpu_flags
|= CPU_SPARE
;
1633 if ((error
= cpu_offline(cp
, flags
)) == 0) {
1634 cp
->cpu_flags
|= CPU_SPARE
;
1642 * Take the indicated CPU from poweroff to offline.
1645 cpu_poweron(cpu_t
*cp
)
1647 int error
= ENOTSUP
;
1649 ASSERT(MUTEX_HELD(&cpu_lock
));
1650 ASSERT(cpu_is_poweredoff(cp
));
1652 error
= mp_cpu_poweron(cp
); /* arch-dep hook */
1660 * Take the indicated CPU from any inactive state to powered off.
1663 cpu_poweroff(cpu_t
*cp
)
1665 int error
= ENOTSUP
;
1667 ASSERT(MUTEX_HELD(&cpu_lock
));
1668 ASSERT(cpu_is_offline(cp
));
1670 if (!(cp
->cpu_flags
& CPU_QUIESCED
))
1671 return (EBUSY
); /* not completely idle */
1673 error
= mp_cpu_poweroff(cp
); /* arch-dep hook */
1681 * Initialize the Sequential CPU id lookup table
1688 tbl
= kmem_zalloc(sizeof (struct cpu
*) * max_ncpus
, KM_SLEEP
);
1695 * Initialize the CPU lists for the first CPU.
1698 cpu_list_init(cpu_t
*cp
)
1703 clock_cpu_list
= cp
;
1705 cp
->cpu_next_onln
= cp
;
1706 cp
->cpu_prev_onln
= cp
;
1710 CPUSET_ADD(cpu_seqid_inuse
, 0);
1713 * Bootstrap cpu_seq using cpu_list
1714 * The cpu_seq[] table will be dynamically allocated
1715 * when kmem later becomes available (but before going MP)
1717 cpu_seq
= &cpu_list
;
1719 cp
->cpu_cache_offset
= KMEM_CPU_CACHE_OFFSET(cp
->cpu_seqid
);
1720 cp_default
.cp_cpulist
= cp
;
1721 cp_default
.cp_ncpus
= 1;
1722 cp
->cpu_next_part
= cp
;
1723 cp
->cpu_prev_part
= cp
;
1724 cp
->cpu_part
= &cp_default
;
1726 CPUSET_ADD(cpu_available
, cp
->cpu_id
);
1730 * Insert a CPU into the list of available CPUs.
1733 cpu_add_unit(cpu_t
*cp
)
1737 ASSERT(MUTEX_HELD(&cpu_lock
));
1738 ASSERT(cpu_list
!= NULL
); /* list started in cpu_list_init */
1740 lgrp_config(LGRP_CONFIG_CPU_ADD
, (uintptr_t)cp
, 0);
1743 * Note: most users of the cpu_list will grab the
1744 * cpu_lock to insure that it isn't modified. However,
1745 * certain users can't or won't do that. To allow this
1746 * we pause the other cpus. Users who walk the list
1747 * without cpu_lock, must disable kernel preemption
1748 * to insure that the list isn't modified underneath
1749 * them. Also, any cached pointers to cpu structures
1750 * must be revalidated by checking to see if the
1751 * cpu_next pointer points to itself. This check must
1752 * be done with the cpu_lock held or kernel preemption
1753 * disabled. This check relies upon the fact that
1754 * old cpu structures are not free'ed or cleared after
1755 * then are removed from the cpu_list.
1757 * Note that the clock code walks the cpu list dereferencing
1758 * the cpu_part pointer, so we need to initialize it before
1759 * adding the cpu to the list.
1761 cp
->cpu_part
= &cp_default
;
1762 pause_cpus(NULL
, NULL
);
1763 cp
->cpu_next
= cpu_list
;
1764 cp
->cpu_prev
= cpu_list
->cpu_prev
;
1765 cpu_list
->cpu_prev
->cpu_next
= cp
;
1766 cpu_list
->cpu_prev
= cp
;
1769 for (seqid
= 0; CPU_IN_SET(cpu_seqid_inuse
, seqid
); seqid
++)
1771 CPUSET_ADD(cpu_seqid_inuse
, seqid
);
1772 cp
->cpu_seqid
= seqid
;
1774 if (seqid
> max_cpu_seqid_ever
)
1775 max_cpu_seqid_ever
= seqid
;
1777 ASSERT(ncpus
< max_ncpus
);
1779 cp
->cpu_cache_offset
= KMEM_CPU_CACHE_OFFSET(cp
->cpu_seqid
);
1780 cpu
[cp
->cpu_id
] = cp
;
1781 CPUSET_ADD(cpu_available
, cp
->cpu_id
);
1782 cpu_seq
[cp
->cpu_seqid
] = cp
;
1785 * allocate a pause thread for this CPU.
1787 cpu_pause_alloc(cp
);
1790 * So that new CPUs won't have NULL prev_onln and next_onln pointers,
1791 * link them into a list of just that CPU.
1792 * This is so that disp_lowpri_cpu will work for thread_create in
1793 * pause_cpus() when called from the startup thread in a new CPU.
1795 cp
->cpu_next_onln
= cp
;
1796 cp
->cpu_prev_onln
= cp
;
1797 cpu_info_kstat_create(cp
);
1798 cp
->cpu_next_part
= cp
;
1799 cp
->cpu_prev_part
= cp
;
1801 init_cpu_mstate(cp
, CMS_SYSTEM
);
1803 pool_pset_mod
= gethrtime();
1807 * Do the opposite of cpu_add_unit().
1810 cpu_del_unit(int cpuid
)
1812 struct cpu
*cp
, *cpnext
;
1814 ASSERT(MUTEX_HELD(&cpu_lock
));
1818 ASSERT(cp
->cpu_next_onln
== cp
);
1819 ASSERT(cp
->cpu_prev_onln
== cp
);
1820 ASSERT(cp
->cpu_next_part
== cp
);
1821 ASSERT(cp
->cpu_prev_part
== cp
);
1824 * Tear down the CPU's physical ID cache, and update any
1827 pg_cpu_fini(cp
, NULL
);
1828 pghw_physid_destroy(cp
);
1831 * Destroy kstat stuff.
1833 cpu_info_kstat_destroy(cp
);
1834 term_cpu_mstate(cp
);
1836 * Free up pause thread.
1839 CPUSET_DEL(cpu_available
, cp
->cpu_id
);
1840 cpu
[cp
->cpu_id
] = NULL
;
1841 cpu_seq
[cp
->cpu_seqid
] = NULL
;
1844 * The clock thread and mutex_vector_enter cannot hold the
1845 * cpu_lock while traversing the cpu list, therefore we pause
1846 * all other threads by pausing the other cpus. These, and any
1847 * other routines holding cpu pointers while possibly sleeping
1848 * must be sure to call kpreempt_disable before processing the
1849 * list and be sure to check that the cpu has not been deleted
1850 * after any sleeps (check cp->cpu_next != NULL). We guarantee
1851 * to keep the deleted cpu structure around.
1853 * Note that this MUST be done AFTER cpu_available
1854 * has been updated so that we don't waste time
1855 * trying to pause the cpu we're trying to delete.
1857 pause_cpus(NULL
, NULL
);
1859 cpnext
= cp
->cpu_next
;
1860 cp
->cpu_prev
->cpu_next
= cp
->cpu_next
;
1861 cp
->cpu_next
->cpu_prev
= cp
->cpu_prev
;
1866 * Signals that the cpu has been deleted (see above).
1868 cp
->cpu_next
= NULL
;
1869 cp
->cpu_prev
= NULL
;
1873 CPUSET_DEL(cpu_seqid_inuse
, cp
->cpu_seqid
);
1875 lgrp_config(LGRP_CONFIG_CPU_DEL
, (uintptr_t)cp
, 0);
1877 pool_pset_mod
= gethrtime();
1881 * Add a CPU to the list of active CPUs.
1882 * This routine must not get any locks, because other CPUs are paused.
1885 cpu_add_active_internal(cpu_t
*cp
)
1887 cpupart_t
*pp
= cp
->cpu_part
;
1889 ASSERT(MUTEX_HELD(&cpu_lock
));
1890 ASSERT(cpu_list
!= NULL
); /* list started in cpu_list_init */
1894 cp
->cpu_next_onln
= cpu_active
;
1895 cp
->cpu_prev_onln
= cpu_active
->cpu_prev_onln
;
1896 cpu_active
->cpu_prev_onln
->cpu_next_onln
= cp
;
1897 cpu_active
->cpu_prev_onln
= cp
;
1899 if (pp
->cp_cpulist
) {
1900 cp
->cpu_next_part
= pp
->cp_cpulist
;
1901 cp
->cpu_prev_part
= pp
->cp_cpulist
->cpu_prev_part
;
1902 pp
->cp_cpulist
->cpu_prev_part
->cpu_next_part
= cp
;
1903 pp
->cp_cpulist
->cpu_prev_part
= cp
;
1905 ASSERT(pp
->cp_ncpus
== 0);
1906 pp
->cp_cpulist
= cp
->cpu_next_part
= cp
->cpu_prev_part
= cp
;
1909 if (pp
->cp_ncpus
== 1) {
1910 cp_numparts_nonempty
++;
1911 ASSERT(cp_numparts_nonempty
!= 0);
1915 lgrp_config(LGRP_CONFIG_CPU_ONLINE
, (uintptr_t)cp
, 0);
1917 bzero(&cp
->cpu_loadavg
, sizeof (cp
->cpu_loadavg
));
1921 * Add a CPU to the list of active CPUs.
1922 * This is called from machine-dependent layers when a new CPU is started.
1925 cpu_add_active(cpu_t
*cp
)
1927 pg_cpupart_in(cp
, cp
->cpu_part
);
1929 pause_cpus(NULL
, NULL
);
1930 cpu_add_active_internal(cp
);
1933 cpu_stats_kstat_create(cp
);
1934 cpu_create_intrstat(cp
);
1935 lgrp_kstat_create(cp
);
1936 cpu_state_change_notify(cp
->cpu_id
, CPU_INIT
);
1941 * Remove a CPU from the list of active CPUs.
1942 * This routine must not get any locks, because other CPUs are paused.
1946 cpu_remove_active(cpu_t
*cp
)
1948 cpupart_t
*pp
= cp
->cpu_part
;
1950 ASSERT(MUTEX_HELD(&cpu_lock
));
1951 ASSERT(cp
->cpu_next_onln
!= cp
); /* not the last one */
1952 ASSERT(cp
->cpu_prev_onln
!= cp
); /* not the last one */
1954 pg_cpu_inactive(cp
);
1956 lgrp_config(LGRP_CONFIG_CPU_OFFLINE
, (uintptr_t)cp
, 0);
1958 if (cp
== clock_cpu_list
)
1959 clock_cpu_list
= cp
->cpu_next_onln
;
1961 cp
->cpu_prev_onln
->cpu_next_onln
= cp
->cpu_next_onln
;
1962 cp
->cpu_next_onln
->cpu_prev_onln
= cp
->cpu_prev_onln
;
1963 if (cpu_active
== cp
) {
1964 cpu_active
= cp
->cpu_next_onln
;
1966 cp
->cpu_next_onln
= cp
;
1967 cp
->cpu_prev_onln
= cp
;
1969 cp
->cpu_prev_part
->cpu_next_part
= cp
->cpu_next_part
;
1970 cp
->cpu_next_part
->cpu_prev_part
= cp
->cpu_prev_part
;
1971 if (pp
->cp_cpulist
== cp
) {
1972 pp
->cp_cpulist
= cp
->cpu_next_part
;
1973 ASSERT(pp
->cp_cpulist
!= cp
);
1975 cp
->cpu_next_part
= cp
;
1976 cp
->cpu_prev_part
= cp
;
1978 if (pp
->cp_ncpus
== 0) {
1979 cp_numparts_nonempty
--;
1980 ASSERT(cp_numparts_nonempty
!= 0);
1985 * Routine used to setup a newly inserted CPU in preparation for starting
1989 cpu_configure(int cpuid
)
1993 ASSERT(MUTEX_HELD(&cpu_lock
));
1996 * Some structures are statically allocated based upon
1997 * the maximum number of cpus the system supports. Do not
1998 * try to add anything beyond this limit.
2000 if (cpuid
< 0 || cpuid
>= NCPU
) {
2004 if ((cpu
[cpuid
] != NULL
) && (cpu
[cpuid
]->cpu_flags
!= 0)) {
2008 if ((retval
= mp_cpu_configure(cpuid
)) != 0) {
2012 cpu
[cpuid
]->cpu_flags
= CPU_QUIESCED
| CPU_OFFLINE
| CPU_POWEROFF
;
2013 cpu_set_state(cpu
[cpuid
]);
2014 retval
= cpu_state_change_hooks(cpuid
, CPU_CONFIG
, CPU_UNCONFIG
);
2016 (void) mp_cpu_unconfigure(cpuid
);
2022 * Routine used to cleanup a CPU that has been powered off. This will
2023 * destroy all per-cpu information related to this cpu.
2026 cpu_unconfigure(int cpuid
)
2030 ASSERT(MUTEX_HELD(&cpu_lock
));
2032 if (cpu
[cpuid
] == NULL
) {
2036 if (cpu
[cpuid
]->cpu_flags
== 0) {
2040 if ((cpu
[cpuid
]->cpu_flags
& CPU_POWEROFF
) == 0) {
2044 if (cpu
[cpuid
]->cpu_props
!= NULL
) {
2045 (void) nvlist_free(cpu
[cpuid
]->cpu_props
);
2046 cpu
[cpuid
]->cpu_props
= NULL
;
2049 error
= cpu_state_change_hooks(cpuid
, CPU_UNCONFIG
, CPU_CONFIG
);
2054 return (mp_cpu_unconfigure(cpuid
));
2058 * Routines for registering and de-registering cpu_setup callback functions.
2061 * These routines must not be called from a driver's attach(9E) or
2062 * detach(9E) entry point.
2064 * NOTE: CPU callbacks should not block. They are called with cpu_lock held.
2068 * Ideally, these would be dynamically allocated and put into a linked
2069 * list; however that is not feasible because the registration routine
2070 * has to be available before the kmem allocator is working (in fact,
2071 * it is called by the kmem allocator init code). In any case, there
2072 * are quite a few extra entries for future users.
2074 #define NCPU_SETUPS 20
2077 cpu_setup_func_t
*func
;
2079 } cpu_setups
[NCPU_SETUPS
];
2082 register_cpu_setup_func(cpu_setup_func_t
*func
, void *arg
)
2086 ASSERT(MUTEX_HELD(&cpu_lock
));
2088 for (i
= 0; i
< NCPU_SETUPS
; i
++)
2089 if (cpu_setups
[i
].func
== NULL
)
2091 if (i
>= NCPU_SETUPS
)
2092 cmn_err(CE_PANIC
, "Ran out of cpu_setup callback entries");
2094 cpu_setups
[i
].func
= func
;
2095 cpu_setups
[i
].arg
= arg
;
2099 unregister_cpu_setup_func(cpu_setup_func_t
*func
, void *arg
)
2103 ASSERT(MUTEX_HELD(&cpu_lock
));
2105 for (i
= 0; i
< NCPU_SETUPS
; i
++)
2106 if ((cpu_setups
[i
].func
== func
) &&
2107 (cpu_setups
[i
].arg
== arg
))
2109 if (i
>= NCPU_SETUPS
)
2110 cmn_err(CE_PANIC
, "Could not find cpu_setup callback to "
2113 cpu_setups
[i
].func
= NULL
;
2114 cpu_setups
[i
].arg
= 0;
2118 * Call any state change hooks for this CPU, ignore any errors.
2121 cpu_state_change_notify(int id
, cpu_setup_t what
)
2125 ASSERT(MUTEX_HELD(&cpu_lock
));
2127 for (i
= 0; i
< NCPU_SETUPS
; i
++) {
2128 if (cpu_setups
[i
].func
!= NULL
) {
2129 cpu_setups
[i
].func(what
, id
, cpu_setups
[i
].arg
);
2135 * Call any state change hooks for this CPU, undo it if error found.
2138 cpu_state_change_hooks(int id
, cpu_setup_t what
, cpu_setup_t undo
)
2143 ASSERT(MUTEX_HELD(&cpu_lock
));
2145 for (i
= 0; i
< NCPU_SETUPS
; i
++) {
2146 if (cpu_setups
[i
].func
!= NULL
) {
2147 retval
= cpu_setups
[i
].func(what
, id
,
2150 for (i
--; i
>= 0; i
--) {
2151 if (cpu_setups
[i
].func
!= NULL
)
2152 cpu_setups
[i
].func(undo
,
2153 id
, cpu_setups
[i
].arg
);
2163 * Export information about this CPU via the kstat mechanism.
2166 kstat_named_t ci_state
;
2167 kstat_named_t ci_state_begin
;
2168 kstat_named_t ci_cpu_type
;
2169 kstat_named_t ci_fpu_type
;
2170 kstat_named_t ci_clock_MHz
;
2171 kstat_named_t ci_chip_id
;
2172 kstat_named_t ci_implementation
;
2173 kstat_named_t ci_brandstr
;
2174 kstat_named_t ci_core_id
;
2175 kstat_named_t ci_curr_clock_Hz
;
2176 kstat_named_t ci_supp_freq_Hz
;
2177 kstat_named_t ci_pg_id
;
2178 #if defined(__sparcv9)
2179 kstat_named_t ci_device_ID
;
2180 kstat_named_t ci_cpu_fru
;
2183 kstat_named_t ci_vendorstr
;
2184 kstat_named_t ci_family
;
2185 kstat_named_t ci_model
;
2186 kstat_named_t ci_step
;
2187 kstat_named_t ci_clogid
;
2188 kstat_named_t ci_pkg_core_id
;
2189 kstat_named_t ci_ncpuperchip
;
2190 kstat_named_t ci_ncoreperchip
;
2191 kstat_named_t ci_max_cstates
;
2192 kstat_named_t ci_curr_cstate
;
2193 kstat_named_t ci_cacheid
;
2194 kstat_named_t ci_sktstr
;
2196 } cpu_info_template
= {
2197 { "state", KSTAT_DATA_CHAR
},
2198 { "state_begin", KSTAT_DATA_LONG
},
2199 { "cpu_type", KSTAT_DATA_CHAR
},
2200 { "fpu_type", KSTAT_DATA_CHAR
},
2201 { "clock_MHz", KSTAT_DATA_LONG
},
2202 { "chip_id", KSTAT_DATA_LONG
},
2203 { "implementation", KSTAT_DATA_STRING
},
2204 { "brand", KSTAT_DATA_STRING
},
2205 { "core_id", KSTAT_DATA_LONG
},
2206 { "current_clock_Hz", KSTAT_DATA_UINT64
},
2207 { "supported_frequencies_Hz", KSTAT_DATA_STRING
},
2208 { "pg_id", KSTAT_DATA_LONG
},
2209 #if defined(__sparcv9)
2210 { "device_ID", KSTAT_DATA_UINT64
},
2211 { "cpu_fru", KSTAT_DATA_STRING
},
2214 { "vendor_id", KSTAT_DATA_STRING
},
2215 { "family", KSTAT_DATA_INT32
},
2216 { "model", KSTAT_DATA_INT32
},
2217 { "stepping", KSTAT_DATA_INT32
},
2218 { "clog_id", KSTAT_DATA_INT32
},
2219 { "pkg_core_id", KSTAT_DATA_LONG
},
2220 { "ncpu_per_chip", KSTAT_DATA_INT32
},
2221 { "ncore_per_chip", KSTAT_DATA_INT32
},
2222 { "supported_max_cstates", KSTAT_DATA_INT32
},
2223 { "current_cstate", KSTAT_DATA_INT32
},
2224 { "cache_id", KSTAT_DATA_INT32
},
2225 { "socket_type", KSTAT_DATA_STRING
},
2229 static kmutex_t cpu_info_template_lock
;
2232 cpu_info_kstat_update(kstat_t
*ksp
, int rw
)
2234 cpu_t
*cp
= ksp
->ks_private
;
2235 const char *pi_state
;
2237 if (rw
== KSTAT_WRITE
)
2241 /* Is the cpu still initialising itself? */
2242 if (cpuid_checkpass(cp
, 1) == 0)
2245 switch (cp
->cpu_type_info
.pi_state
) {
2247 pi_state
= PS_ONLINE
;
2250 pi_state
= PS_POWEROFF
;
2253 pi_state
= PS_NOINTR
;
2256 pi_state
= PS_FAULTED
;
2259 pi_state
= PS_SPARE
;
2262 pi_state
= PS_OFFLINE
;
2265 pi_state
= "unknown";
2267 (void) strcpy(cpu_info_template
.ci_state
.value
.c
, pi_state
);
2268 cpu_info_template
.ci_state_begin
.value
.l
= cp
->cpu_state_begin
;
2269 (void) strncpy(cpu_info_template
.ci_cpu_type
.value
.c
,
2270 cp
->cpu_type_info
.pi_processor_type
, 15);
2271 (void) strncpy(cpu_info_template
.ci_fpu_type
.value
.c
,
2272 cp
->cpu_type_info
.pi_fputypes
, 15);
2273 cpu_info_template
.ci_clock_MHz
.value
.l
= cp
->cpu_type_info
.pi_clock
;
2274 cpu_info_template
.ci_chip_id
.value
.l
=
2275 pg_plat_hw_instance_id(cp
, PGHW_CHIP
);
2276 kstat_named_setstr(&cpu_info_template
.ci_implementation
,
2278 kstat_named_setstr(&cpu_info_template
.ci_brandstr
, cp
->cpu_brandstr
);
2279 cpu_info_template
.ci_core_id
.value
.l
= pg_plat_get_core_id(cp
);
2280 cpu_info_template
.ci_curr_clock_Hz
.value
.ui64
=
2282 cpu_info_template
.ci_pg_id
.value
.l
=
2283 cp
->cpu_pg
&& cp
->cpu_pg
->cmt_lineage
?
2284 cp
->cpu_pg
->cmt_lineage
->pg_id
: -1;
2285 kstat_named_setstr(&cpu_info_template
.ci_supp_freq_Hz
,
2286 cp
->cpu_supp_freqs
);
2287 #if defined(__sparcv9)
2288 cpu_info_template
.ci_device_ID
.value
.ui64
=
2289 cpunodes
[cp
->cpu_id
].device_id
;
2290 kstat_named_setstr(&cpu_info_template
.ci_cpu_fru
, cpu_fru_fmri(cp
));
2293 kstat_named_setstr(&cpu_info_template
.ci_vendorstr
,
2294 cpuid_getvendorstr(cp
));
2295 cpu_info_template
.ci_family
.value
.l
= cpuid_getfamily(cp
);
2296 cpu_info_template
.ci_model
.value
.l
= cpuid_getmodel(cp
);
2297 cpu_info_template
.ci_step
.value
.l
= cpuid_getstep(cp
);
2298 cpu_info_template
.ci_clogid
.value
.l
= cpuid_get_clogid(cp
);
2299 cpu_info_template
.ci_ncpuperchip
.value
.l
= cpuid_get_ncpu_per_chip(cp
);
2300 cpu_info_template
.ci_ncoreperchip
.value
.l
=
2301 cpuid_get_ncore_per_chip(cp
);
2302 cpu_info_template
.ci_pkg_core_id
.value
.l
= cpuid_get_pkgcoreid(cp
);
2303 cpu_info_template
.ci_max_cstates
.value
.l
= cp
->cpu_m
.max_cstates
;
2304 cpu_info_template
.ci_curr_cstate
.value
.l
= cpu_idle_get_cpu_state(cp
);
2305 cpu_info_template
.ci_cacheid
.value
.i32
= cpuid_get_cacheid(cp
);
2306 kstat_named_setstr(&cpu_info_template
.ci_sktstr
,
2307 cpuid_getsocketstr(cp
));
2314 cpu_info_kstat_create(cpu_t
*cp
)
2318 ASSERT(MUTEX_HELD(&cpu_lock
));
2320 if (pool_pset_enabled())
2321 zoneid
= GLOBAL_ZONEID
;
2324 if ((cp
->cpu_info_kstat
= kstat_create_zone("cpu_info", cp
->cpu_id
,
2325 NULL
, "misc", KSTAT_TYPE_NAMED
,
2326 sizeof (cpu_info_template
) / sizeof (kstat_named_t
),
2327 KSTAT_FLAG_VIRTUAL
| KSTAT_FLAG_VAR_SIZE
, zoneid
)) != NULL
) {
2328 cp
->cpu_info_kstat
->ks_data_size
+= 2 * CPU_IDSTRLEN
;
2329 #if defined(__sparcv9)
2330 cp
->cpu_info_kstat
->ks_data_size
+=
2331 strlen(cpu_fru_fmri(cp
)) + 1;
2334 cp
->cpu_info_kstat
->ks_data_size
+= X86_VENDOR_STRLEN
;
2336 if (cp
->cpu_supp_freqs
!= NULL
)
2337 cp
->cpu_info_kstat
->ks_data_size
+=
2338 strlen(cp
->cpu_supp_freqs
) + 1;
2339 cp
->cpu_info_kstat
->ks_lock
= &cpu_info_template_lock
;
2340 cp
->cpu_info_kstat
->ks_data
= &cpu_info_template
;
2341 cp
->cpu_info_kstat
->ks_private
= cp
;
2342 cp
->cpu_info_kstat
->ks_update
= cpu_info_kstat_update
;
2343 kstat_install(cp
->cpu_info_kstat
);
2348 cpu_info_kstat_destroy(cpu_t
*cp
)
2350 ASSERT(MUTEX_HELD(&cpu_lock
));
2352 kstat_delete(cp
->cpu_info_kstat
);
2353 cp
->cpu_info_kstat
= NULL
;
2357 * Create and install kstats for the boot CPU.
2360 cpu_kstat_init(cpu_t
*cp
)
2362 mutex_enter(&cpu_lock
);
2363 cpu_info_kstat_create(cp
);
2364 cpu_stats_kstat_create(cp
);
2365 cpu_create_intrstat(cp
);
2367 mutex_exit(&cpu_lock
);
2371 * Make visible to the zone that subset of the cpu information that would be
2372 * initialized when a cpu is configured (but still offline).
2375 cpu_visibility_configure(cpu_t
*cp
, zone_t
*zone
)
2377 zoneid_t zoneid
= zone
? zone
->zone_id
: ALL_ZONES
;
2379 ASSERT(MUTEX_HELD(&cpu_lock
));
2380 ASSERT(pool_pset_enabled());
2383 if (zoneid
!= ALL_ZONES
&& zoneid
!= GLOBAL_ZONEID
) {
2385 ASSERT(zone
->zone_ncpus
<= ncpus
);
2387 if (cp
->cpu_info_kstat
!= NULL
)
2388 kstat_zone_add(cp
->cpu_info_kstat
, zoneid
);
2392 * Make visible to the zone that subset of the cpu information that would be
2393 * initialized when a previously configured cpu is onlined.
2396 cpu_visibility_online(cpu_t
*cp
, zone_t
*zone
)
2399 char name
[sizeof ("cpu_stat") + 10]; /* enough for 32-bit cpuids */
2400 zoneid_t zoneid
= zone
? zone
->zone_id
: ALL_ZONES
;
2403 ASSERT(MUTEX_HELD(&cpu_lock
));
2404 ASSERT(pool_pset_enabled());
2406 ASSERT(cpu_is_active(cp
));
2409 if (zoneid
!= ALL_ZONES
&& zoneid
!= GLOBAL_ZONEID
) {
2410 zone
->zone_ncpus_online
++;
2411 ASSERT(zone
->zone_ncpus_online
<= ncpus_online
);
2413 (void) snprintf(name
, sizeof (name
), "cpu_stat%d", cpun
);
2414 if ((ksp
= kstat_hold_byname("cpu_stat", cpun
, name
, ALL_ZONES
))
2416 kstat_zone_add(ksp
, zoneid
);
2419 if ((ksp
= kstat_hold_byname("cpu", cpun
, "sys", ALL_ZONES
)) != NULL
) {
2420 kstat_zone_add(ksp
, zoneid
);
2423 if ((ksp
= kstat_hold_byname("cpu", cpun
, "vm", ALL_ZONES
)) != NULL
) {
2424 kstat_zone_add(ksp
, zoneid
);
2427 if ((ksp
= kstat_hold_byname("cpu", cpun
, "intrstat", ALL_ZONES
)) !=
2429 kstat_zone_add(ksp
, zoneid
);
2435 * Update relevant kstats such that cpu is now visible to processes
2436 * executing in specified zone.
2439 cpu_visibility_add(cpu_t
*cp
, zone_t
*zone
)
2441 cpu_visibility_configure(cp
, zone
);
2442 if (cpu_is_active(cp
))
2443 cpu_visibility_online(cp
, zone
);
2447 * Make invisible to the zone that subset of the cpu information that would be
2448 * torn down when a previously offlined cpu is unconfigured.
2451 cpu_visibility_unconfigure(cpu_t
*cp
, zone_t
*zone
)
2453 zoneid_t zoneid
= zone
? zone
->zone_id
: ALL_ZONES
;
2455 ASSERT(MUTEX_HELD(&cpu_lock
));
2456 ASSERT(pool_pset_enabled());
2459 if (zoneid
!= ALL_ZONES
&& zoneid
!= GLOBAL_ZONEID
) {
2460 ASSERT(zone
->zone_ncpus
!= 0);
2463 if (cp
->cpu_info_kstat
)
2464 kstat_zone_remove(cp
->cpu_info_kstat
, zoneid
);
2468 * Make invisible to the zone that subset of the cpu information that would be
2469 * torn down when a cpu is offlined (but still configured).
2472 cpu_visibility_offline(cpu_t
*cp
, zone_t
*zone
)
2475 char name
[sizeof ("cpu_stat") + 10]; /* enough for 32-bit cpuids */
2476 zoneid_t zoneid
= zone
? zone
->zone_id
: ALL_ZONES
;
2479 ASSERT(MUTEX_HELD(&cpu_lock
));
2480 ASSERT(pool_pset_enabled());
2482 ASSERT(cpu_is_active(cp
));
2485 if (zoneid
!= ALL_ZONES
&& zoneid
!= GLOBAL_ZONEID
) {
2486 ASSERT(zone
->zone_ncpus_online
!= 0);
2487 zone
->zone_ncpus_online
--;
2490 if ((ksp
= kstat_hold_byname("cpu", cpun
, "intrstat", ALL_ZONES
)) !=
2492 kstat_zone_remove(ksp
, zoneid
);
2495 if ((ksp
= kstat_hold_byname("cpu", cpun
, "vm", ALL_ZONES
)) != NULL
) {
2496 kstat_zone_remove(ksp
, zoneid
);
2499 if ((ksp
= kstat_hold_byname("cpu", cpun
, "sys", ALL_ZONES
)) != NULL
) {
2500 kstat_zone_remove(ksp
, zoneid
);
2503 (void) snprintf(name
, sizeof (name
), "cpu_stat%d", cpun
);
2504 if ((ksp
= kstat_hold_byname("cpu_stat", cpun
, name
, ALL_ZONES
))
2506 kstat_zone_remove(ksp
, zoneid
);
2512 * Update relevant kstats such that cpu is no longer visible to processes
2513 * executing in specified zone.
2516 cpu_visibility_remove(cpu_t
*cp
, zone_t
*zone
)
2518 if (cpu_is_active(cp
))
2519 cpu_visibility_offline(cp
, zone
);
2520 cpu_visibility_unconfigure(cp
, zone
);
2524 * Bind a thread to a CPU as requested.
2527 cpu_bind_thread(kthread_id_t tp
, processorid_t bind
, processorid_t
*obind
,
2530 processorid_t binding
;
2533 ASSERT(MUTEX_HELD(&cpu_lock
));
2534 ASSERT(MUTEX_HELD(&ttoproc(tp
)->p_lock
));
2539 * Record old binding, but change the obind, which was initialized
2540 * to PBIND_NONE, only if this thread has a binding. This avoids
2541 * reporting PBIND_NONE for a process when some LWPs are bound.
2543 binding
= tp
->t_bind_cpu
;
2544 if (binding
!= PBIND_NONE
)
2545 *obind
= binding
; /* record old binding */
2549 /* Just return the old binding */
2553 case PBIND_QUERY_TYPE
:
2554 /* Return the binding type */
2555 *obind
= TB_CPU_IS_SOFT(tp
) ? PBIND_SOFT
: PBIND_HARD
;
2561 * Set soft binding for this thread and return the actual
2564 TB_CPU_SOFT_SET(tp
);
2570 * Set hard binding for this thread and return the actual
2573 TB_CPU_HARD_SET(tp
);
2582 * If this thread/LWP cannot be bound because of permission
2583 * problems, just note that and return success so that the
2584 * other threads/LWPs will be bound. This is the way
2585 * processor_bind() is defined to work.
2587 * Binding will get EPERM if the thread is of system class
2588 * or hasprocperm() fails.
2590 if (tp
->t_cid
== 0 || !hasprocperm(tp
->t_cred
, CRED())) {
2597 if (binding
!= PBIND_NONE
) {
2598 cp
= cpu_get((processorid_t
)binding
);
2600 * Make sure binding is valid and is in right partition.
2602 if (cp
== NULL
|| tp
->t_cpupart
!= cp
->cpu_part
) {
2608 tp
->t_bind_cpu
= binding
; /* set new binding */
2611 * If there is no system-set reason for affinity, set
2612 * the t_bound_cpu field to reflect the binding.
2614 if (tp
->t_affinitycnt
== 0) {
2615 if (binding
== PBIND_NONE
) {
2617 * We may need to adjust disp_max_unbound_pri
2618 * since we're becoming unbound.
2620 disp_adjust_unbound_pri(tp
);
2622 tp
->t_bound_cpu
= NULL
; /* set new binding */
2625 * Move thread to lgroup with strongest affinity
2628 if (tp
->t_lgrp_affinity
)
2629 lgrp_move_thread(tp
,
2630 lgrp_choose(tp
, tp
->t_cpupart
), 1);
2632 if (tp
->t_state
== TS_ONPROC
&&
2633 tp
->t_cpu
->cpu_part
!= tp
->t_cpupart
)
2638 tp
->t_bound_cpu
= cp
;
2639 ASSERT(cp
->cpu_lpl
!= NULL
);
2642 * Set home to lgroup with most affinity containing CPU
2643 * that thread is being bound or minimum bounding
2644 * lgroup if no affinities set
2646 if (tp
->t_lgrp_affinity
)
2647 lpl
= lgrp_affinity_best(tp
, tp
->t_cpupart
,
2648 LGRP_NONE
, B_FALSE
);
2652 if (tp
->t_lpl
!= lpl
) {
2653 /* can't grab cpu_lock */
2654 lgrp_move_thread(tp
, lpl
, 1);
2658 * Make the thread switch to the bound CPU.
2659 * If the thread is runnable, we need to
2660 * requeue it even if t_cpu is already set
2661 * to the right CPU, since it may be on a
2662 * kpreempt queue and need to move to a local
2663 * queue. We could check t_disp_queue to
2664 * avoid unnecessary overhead if it's already
2665 * on the right queue, but since this isn't
2666 * a performance-critical operation it doesn't
2667 * seem worth the extra code and complexity.
2669 * If the thread is weakbound to the cpu then it will
2670 * resist the new binding request until the weak
2671 * binding drops. The cpu_surrender or requeueing
2672 * below could be skipped in such cases (since it
2673 * will have no effect), but that would require
2674 * thread_allowmigrate to acquire thread_lock so
2675 * we'll take the very occasional hit here instead.
2677 if (tp
->t_state
== TS_ONPROC
) {
2679 } else if (tp
->t_state
== TS_RUN
) {
2680 cpu_t
*ocp
= tp
->t_cpu
;
2685 * Either on the bound CPU's disp queue now,
2686 * or swapped out or on the swap queue.
2688 ASSERT(tp
->t_disp_queue
== cp
->cpu_disp
||
2689 tp
->t_weakbound_cpu
== ocp
||
2690 (tp
->t_schedflag
& (TS_LOAD
| TS_ON_SWAPQ
))
2697 * Our binding has changed; set TP_CHANGEBIND.
2699 tp
->t_proc_flag
|= TP_CHANGEBIND
;
2707 #if CPUSET_WORDS > 1
2710 * Functions for implementing cpuset operations when a cpuset is more
2711 * than one word. On platforms where a cpuset is a single word these
2712 * are implemented as macros in cpuvar.h.
2716 cpuset_all(cpuset_t
*s
)
2720 for (i
= 0; i
< CPUSET_WORDS
; i
++)
2725 cpuset_all_but(cpuset_t
*s
, uint_t cpu
)
2728 CPUSET_DEL(*s
, cpu
);
2732 cpuset_only(cpuset_t
*s
, uint_t cpu
)
2735 CPUSET_ADD(*s
, cpu
);
2739 cpuset_isnull(cpuset_t
*s
)
2743 for (i
= 0; i
< CPUSET_WORDS
; i
++)
2744 if (s
->cpub
[i
] != 0)
2750 cpuset_cmp(cpuset_t
*s1
, cpuset_t
*s2
)
2754 for (i
= 0; i
< CPUSET_WORDS
; i
++)
2755 if (s1
->cpub
[i
] != s2
->cpub
[i
])
2761 cpuset_find(cpuset_t
*s
)
2765 uint_t cpu
= (uint_t
)-1;
2768 * Find a cpu in the cpuset
2770 for (i
= 0; i
< CPUSET_WORDS
; i
++) {
2771 cpu
= (uint_t
)(lowbit(s
->cpub
[i
]) - 1);
2772 if (cpu
!= (uint_t
)-1) {
2773 cpu
+= i
* BT_NBIPUL
;
2781 cpuset_bounds(cpuset_t
*s
, uint_t
*smallestid
, uint_t
*largestid
)
2787 * First, find the smallest cpu id in the set.
2789 for (i
= 0; i
< CPUSET_WORDS
; i
++) {
2790 if (s
->cpub
[i
] != 0) {
2791 bit
= (uint_t
)(lowbit(s
->cpub
[i
]) - 1);
2792 ASSERT(bit
!= (uint_t
)-1);
2793 *smallestid
= bit
+ (i
* BT_NBIPUL
);
2796 * Now find the largest cpu id in
2797 * the set and return immediately.
2798 * Done in an inner loop to avoid
2799 * having to break out of the first
2802 for (j
= CPUSET_WORDS
- 1; j
>= i
; j
--) {
2803 if (s
->cpub
[j
] != 0) {
2804 bit
= (uint_t
)(highbit(s
->cpub
[j
]) - 1);
2805 ASSERT(bit
!= (uint_t
)-1);
2806 *largestid
= bit
+ (j
* BT_NBIPUL
);
2807 ASSERT(*largestid
>= *smallestid
);
2813 * If this code is reached, a
2814 * smallestid was found, but not a
2815 * largestid. The cpuset must have
2816 * been changed during the course
2817 * of this function call.
2822 *smallestid
= *largestid
= CPUSET_NOTINSET
;
2825 #endif /* CPUSET_WORDS */
2828 * Unbind threads bound to specified CPU.
2830 * If `unbind_all_threads' is true, unbind all user threads bound to a given
2831 * CPU. Otherwise unbind all soft-bound user threads.
2834 cpu_unbind(processorid_t cpu
, boolean_t unbind_all_threads
)
2836 processorid_t obind
;
2842 ASSERT(MUTEX_HELD(&cpu_lock
));
2844 mutex_enter(&pidlock
);
2845 for (pp
= practive
; pp
!= NULL
; pp
= pp
->p_next
) {
2846 mutex_enter(&pp
->p_lock
);
2849 * Skip zombies, kernel processes, and processes in
2850 * other zones, if called from a non-global zone.
2852 if (tp
== NULL
|| (pp
->p_flag
& SSYS
) ||
2853 !HASZONEACCESS(curproc
, pp
->p_zone
->zone_id
)) {
2854 mutex_exit(&pp
->p_lock
);
2858 if (tp
->t_bind_cpu
!= cpu
)
2861 * Skip threads with hard binding when
2862 * `unbind_all_threads' is not specified.
2864 if (!unbind_all_threads
&& TB_CPU_IS_HARD(tp
))
2866 err
= cpu_bind_thread(tp
, PBIND_NONE
, &obind
, &berr
);
2869 } while ((tp
= tp
->t_forw
) != pp
->p_tlist
);
2870 mutex_exit(&pp
->p_lock
);
2872 mutex_exit(&pidlock
);
2880 * Destroy all remaining bound threads on a cpu.
2883 cpu_destroy_bound_threads(cpu_t
*cp
)
2886 register kthread_id_t t
, tlist
, tnext
;
2889 * Destroy all remaining bound threads on the cpu. This
2890 * should include both the interrupt threads and the idle thread.
2891 * This requires some care, since we need to traverse the
2892 * thread list with the pidlock mutex locked, but thread_free
2893 * also locks the pidlock mutex. So, we collect the threads
2894 * we're going to reap in a list headed by "tlist", then we
2895 * unlock the pidlock mutex and traverse the tlist list,
2896 * doing thread_free's on the thread's. Simple, n'est pas?
2897 * Also, this depends on thread_free not mucking with the
2898 * t_next and t_prev links of the thread.
2901 if ((t
= curthread
) != NULL
) {
2904 mutex_enter(&pidlock
);
2907 if (t
->t_bound_cpu
== cp
) {
2910 * We've found a bound thread, carefully unlink
2911 * it out of the thread list, and add it to
2912 * our "tlist". We "know" we don't have to
2913 * worry about unlinking curthread (the thread
2914 * that is executing this code).
2916 t
->t_next
->t_prev
= t
->t_prev
;
2917 t
->t_prev
->t_next
= t
->t_next
;
2920 ASSERT(t
->t_cid
== syscid
);
2921 /* wake up anyone blocked in thread_join */
2922 cv_broadcast(&t
->t_joincv
);
2924 * t_lwp set by interrupt threads and not
2929 * Pause and idle threads always have
2930 * t_state set to TS_ONPROC.
2932 t
->t_state
= TS_FREE
;
2933 t
->t_prev
= NULL
; /* Just in case */
2936 } while ((t
= tnext
) != curthread
);
2938 mutex_exit(&pidlock
);
2941 for (t
= tlist
; t
!= NULL
; t
= tnext
) {
2949 * Update the cpu_supp_freqs of this cpu. This information is returned
2950 * as part of cpu_info kstats. If the cpu_info_kstat exists already, then
2951 * maintain the kstat data size.
2954 cpu_set_supp_freqs(cpu_t
*cp
, const char *freqs
)
2956 char clkstr
[sizeof ("18446744073709551615") + 1]; /* ui64 MAX */
2957 const char *lfreqs
= clkstr
;
2958 boolean_t kstat_exists
= B_FALSE
;
2963 * A NULL pointer means we only support one speed.
2966 (void) snprintf(clkstr
, sizeof (clkstr
), "%"PRIu64
,
2967 cp
->cpu_curr_clock
);
2972 * Make sure the frequency doesn't change while a snapshot is
2973 * going on. Of course, we only need to worry about this if
2976 if ((ksp
= cp
->cpu_info_kstat
) != NULL
) {
2977 mutex_enter(ksp
->ks_lock
);
2978 kstat_exists
= B_TRUE
;
2982 * Free any previously allocated string and if the kstat
2983 * already exists, then update its data size.
2985 if (cp
->cpu_supp_freqs
!= NULL
) {
2986 len
= strlen(cp
->cpu_supp_freqs
) + 1;
2987 kmem_free(cp
->cpu_supp_freqs
, len
);
2989 ksp
->ks_data_size
-= len
;
2993 * Allocate the new string and set the pointer.
2995 len
= strlen(lfreqs
) + 1;
2996 cp
->cpu_supp_freqs
= kmem_alloc(len
, KM_SLEEP
);
2997 (void) strcpy(cp
->cpu_supp_freqs
, lfreqs
);
3000 * If the kstat already exists then update the data size and
3004 ksp
->ks_data_size
+= len
;
3005 mutex_exit(ksp
->ks_lock
);
3010 * Indicate the current CPU's clock freqency (in Hz).
3011 * The calling context must be such that CPU references are safe.
3014 cpu_set_curr_clock(uint64_t new_clk
)
3018 old_clk
= CPU
->cpu_curr_clock
;
3019 CPU
->cpu_curr_clock
= new_clk
;
3022 * The cpu-change-speed DTrace probe exports the frequency in Hz
3024 DTRACE_PROBE3(cpu__change__speed
, processorid_t
, CPU
->cpu_id
,
3025 uint64_t, old_clk
, uint64_t, new_clk
);
3029 * processor_info(2) and p_online(2) status support functions
3030 * The constants returned by the cpu_get_state() and cpu_get_state_str() are
3031 * for use in communicating processor state information to userland. Kernel
3032 * subsystems should only be using the cpu_flags value directly. Subsystems
3033 * modifying cpu_flags should record the state change via a call to the
3038 * Update the pi_state of this CPU. This function provides the CPU status for
3039 * the information returned by processor_info(2).
3042 cpu_set_state(cpu_t
*cpu
)
3044 ASSERT(MUTEX_HELD(&cpu_lock
));
3045 cpu
->cpu_type_info
.pi_state
= cpu_get_state(cpu
);
3046 cpu
->cpu_state_begin
= gethrestime_sec();
3047 pool_cpu_mod
= gethrtime();
3051 * Return offline/online/other status for the indicated CPU. Use only for
3052 * communication with user applications; cpu_flags provides the in-kernel
3056 cpu_get_state(cpu_t
*cpu
)
3058 ASSERT(MUTEX_HELD(&cpu_lock
));
3059 if (cpu
->cpu_flags
& CPU_POWEROFF
)
3060 return (P_POWEROFF
);
3061 else if (cpu
->cpu_flags
& CPU_FAULTED
)
3063 else if (cpu
->cpu_flags
& CPU_SPARE
)
3065 else if ((cpu
->cpu_flags
& (CPU_READY
| CPU_OFFLINE
)) != CPU_READY
)
3067 else if (cpu
->cpu_flags
& CPU_ENABLE
)
3074 * Return processor_info(2) state as a string.
3077 cpu_get_state_str(cpu_t
*cpu
)
3081 switch (cpu_get_state(cpu
)) {
3086 string
= PS_POWEROFF
;
3095 string
= PS_FAULTED
;
3098 string
= PS_OFFLINE
;
3108 * Export this CPU's statistics (cpu_stat_t and cpu_stats_t) as raw and named
3109 * kstats, respectively. This is done when a CPU is initialized or placed
3110 * online via p_online(2).
3113 cpu_stats_kstat_create(cpu_t
*cp
)
3115 int instance
= cp
->cpu_id
;
3116 char *module
= "cpu";
3117 char *class = "misc";
3121 ASSERT(MUTEX_HELD(&cpu_lock
));
3123 if (pool_pset_enabled())
3124 zoneid
= GLOBAL_ZONEID
;
3128 * Create named kstats
3130 #define CPU_STATS_KS_CREATE(name, tsize, update_func) \
3131 ksp = kstat_create_zone(module, instance, (name), class, \
3132 KSTAT_TYPE_NAMED, (tsize) / sizeof (kstat_named_t), 0, \
3134 if (ksp != NULL) { \
3135 ksp->ks_private = cp; \
3136 ksp->ks_update = (update_func); \
3137 kstat_install(ksp); \
3139 cmn_err(CE_WARN, "cpu: unable to create %s:%d:%s kstat", \
3140 module, instance, (name));
3142 CPU_STATS_KS_CREATE("sys", sizeof (cpu_sys_stats_ks_data_template
),
3143 cpu_sys_stats_ks_update
);
3144 CPU_STATS_KS_CREATE("vm", sizeof (cpu_vm_stats_ks_data_template
),
3145 cpu_vm_stats_ks_update
);
3148 * Export the familiar cpu_stat_t KSTAT_TYPE_RAW kstat.
3150 ksp
= kstat_create_zone("cpu_stat", cp
->cpu_id
, NULL
,
3151 "misc", KSTAT_TYPE_RAW
, sizeof (cpu_stat_t
), 0, zoneid
);
3153 ksp
->ks_update
= cpu_stat_ks_update
;
3154 ksp
->ks_private
= cp
;
3160 cpu_stats_kstat_destroy(cpu_t
*cp
)
3162 char ks_name
[KSTAT_STRLEN
];
3164 (void) sprintf(ks_name
, "cpu_stat%d", cp
->cpu_id
);
3165 kstat_delete_byname("cpu_stat", cp
->cpu_id
, ks_name
);
3167 kstat_delete_byname("cpu", cp
->cpu_id
, "sys");
3168 kstat_delete_byname("cpu", cp
->cpu_id
, "vm");
3172 cpu_sys_stats_ks_update(kstat_t
*ksp
, int rw
)
3174 cpu_t
*cp
= (cpu_t
*)ksp
->ks_private
;
3175 struct cpu_sys_stats_ks_data
*csskd
;
3176 cpu_sys_stats_t
*css
;
3177 hrtime_t msnsecs
[NCMSTATES
];
3180 if (rw
== KSTAT_WRITE
)
3183 csskd
= ksp
->ks_data
;
3184 css
= &cp
->cpu_stats
.sys
;
3187 * Read CPU mstate, but compare with the last values we
3188 * received to make sure that the returned kstats never
3192 get_cpu_mstate(cp
, msnsecs
);
3193 if (csskd
->cpu_nsec_idle
.value
.ui64
> msnsecs
[CMS_IDLE
])
3194 msnsecs
[CMS_IDLE
] = csskd
->cpu_nsec_idle
.value
.ui64
;
3195 if (csskd
->cpu_nsec_user
.value
.ui64
> msnsecs
[CMS_USER
])
3196 msnsecs
[CMS_USER
] = csskd
->cpu_nsec_user
.value
.ui64
;
3197 if (csskd
->cpu_nsec_kernel
.value
.ui64
> msnsecs
[CMS_SYSTEM
])
3198 msnsecs
[CMS_SYSTEM
] = csskd
->cpu_nsec_kernel
.value
.ui64
;
3200 bcopy(&cpu_sys_stats_ks_data_template
, ksp
->ks_data
,
3201 sizeof (cpu_sys_stats_ks_data_template
));
3203 csskd
->cpu_ticks_wait
.value
.ui64
= 0;
3204 csskd
->wait_ticks_io
.value
.ui64
= 0;
3206 csskd
->cpu_nsec_idle
.value
.ui64
= msnsecs
[CMS_IDLE
];
3207 csskd
->cpu_nsec_user
.value
.ui64
= msnsecs
[CMS_USER
];
3208 csskd
->cpu_nsec_kernel
.value
.ui64
= msnsecs
[CMS_SYSTEM
];
3209 csskd
->cpu_ticks_idle
.value
.ui64
=
3210 NSEC_TO_TICK(csskd
->cpu_nsec_idle
.value
.ui64
);
3211 csskd
->cpu_ticks_user
.value
.ui64
=
3212 NSEC_TO_TICK(csskd
->cpu_nsec_user
.value
.ui64
);
3213 csskd
->cpu_ticks_kernel
.value
.ui64
=
3214 NSEC_TO_TICK(csskd
->cpu_nsec_kernel
.value
.ui64
);
3215 csskd
->cpu_nsec_dtrace
.value
.ui64
= cp
->cpu_dtrace_nsec
;
3216 csskd
->dtrace_probes
.value
.ui64
= cp
->cpu_dtrace_probes
;
3217 csskd
->cpu_nsec_intr
.value
.ui64
= cp
->cpu_intrlast
;
3218 csskd
->cpu_load_intr
.value
.ui64
= cp
->cpu_intrload
;
3219 csskd
->bread
.value
.ui64
= css
->bread
;
3220 csskd
->bwrite
.value
.ui64
= css
->bwrite
;
3221 csskd
->lread
.value
.ui64
= css
->lread
;
3222 csskd
->lwrite
.value
.ui64
= css
->lwrite
;
3223 csskd
->phread
.value
.ui64
= css
->phread
;
3224 csskd
->phwrite
.value
.ui64
= css
->phwrite
;
3225 csskd
->pswitch
.value
.ui64
= css
->pswitch
;
3226 csskd
->trap
.value
.ui64
= css
->trap
;
3227 csskd
->intr
.value
.ui64
= 0;
3228 for (i
= 0; i
< PIL_MAX
; i
++)
3229 csskd
->intr
.value
.ui64
+= css
->intr
[i
];
3230 csskd
->syscall
.value
.ui64
= css
->syscall
;
3231 csskd
->sysread
.value
.ui64
= css
->sysread
;
3232 csskd
->syswrite
.value
.ui64
= css
->syswrite
;
3233 csskd
->sysfork
.value
.ui64
= css
->sysfork
;
3234 csskd
->sysvfork
.value
.ui64
= css
->sysvfork
;
3235 csskd
->sysexec
.value
.ui64
= css
->sysexec
;
3236 csskd
->readch
.value
.ui64
= css
->readch
;
3237 csskd
->writech
.value
.ui64
= css
->writech
;
3238 csskd
->rcvint
.value
.ui64
= css
->rcvint
;
3239 csskd
->xmtint
.value
.ui64
= css
->xmtint
;
3240 csskd
->mdmint
.value
.ui64
= css
->mdmint
;
3241 csskd
->rawch
.value
.ui64
= css
->rawch
;
3242 csskd
->canch
.value
.ui64
= css
->canch
;
3243 csskd
->outch
.value
.ui64
= css
->outch
;
3244 csskd
->msg
.value
.ui64
= css
->msg
;
3245 csskd
->sema
.value
.ui64
= css
->sema
;
3246 csskd
->namei
.value
.ui64
= css
->namei
;
3247 csskd
->ufsiget
.value
.ui64
= css
->ufsiget
;
3248 csskd
->ufsdirblk
.value
.ui64
= css
->ufsdirblk
;
3249 csskd
->ufsipage
.value
.ui64
= css
->ufsipage
;
3250 csskd
->ufsinopage
.value
.ui64
= css
->ufsinopage
;
3251 csskd
->procovf
.value
.ui64
= css
->procovf
;
3252 csskd
->intrthread
.value
.ui64
= 0;
3253 for (i
= 0; i
< LOCK_LEVEL
- 1; i
++)
3254 csskd
->intrthread
.value
.ui64
+= css
->intr
[i
];
3255 csskd
->intrblk
.value
.ui64
= css
->intrblk
;
3256 csskd
->intrunpin
.value
.ui64
= css
->intrunpin
;
3257 csskd
->idlethread
.value
.ui64
= css
->idlethread
;
3258 csskd
->inv_swtch
.value
.ui64
= css
->inv_swtch
;
3259 csskd
->nthreads
.value
.ui64
= css
->nthreads
;
3260 csskd
->cpumigrate
.value
.ui64
= css
->cpumigrate
;
3261 csskd
->xcalls
.value
.ui64
= css
->xcalls
;
3262 csskd
->mutex_adenters
.value
.ui64
= css
->mutex_adenters
;
3263 csskd
->rw_rdfails
.value
.ui64
= css
->rw_rdfails
;
3264 csskd
->rw_wrfails
.value
.ui64
= css
->rw_wrfails
;
3265 csskd
->modload
.value
.ui64
= css
->modload
;
3266 csskd
->modunload
.value
.ui64
= css
->modunload
;
3267 csskd
->bawrite
.value
.ui64
= css
->bawrite
;
3268 csskd
->iowait
.value
.ui64
= css
->iowait
;
3274 cpu_vm_stats_ks_update(kstat_t
*ksp
, int rw
)
3276 cpu_t
*cp
= (cpu_t
*)ksp
->ks_private
;
3277 struct cpu_vm_stats_ks_data
*cvskd
;
3278 cpu_vm_stats_t
*cvs
;
3280 if (rw
== KSTAT_WRITE
)
3283 cvs
= &cp
->cpu_stats
.vm
;
3284 cvskd
= ksp
->ks_data
;
3286 bcopy(&cpu_vm_stats_ks_data_template
, ksp
->ks_data
,
3287 sizeof (cpu_vm_stats_ks_data_template
));
3288 cvskd
->pgrec
.value
.ui64
= cvs
->pgrec
;
3289 cvskd
->pgfrec
.value
.ui64
= cvs
->pgfrec
;
3290 cvskd
->pgin
.value
.ui64
= cvs
->pgin
;
3291 cvskd
->pgpgin
.value
.ui64
= cvs
->pgpgin
;
3292 cvskd
->pgout
.value
.ui64
= cvs
->pgout
;
3293 cvskd
->pgpgout
.value
.ui64
= cvs
->pgpgout
;
3294 cvskd
->swapin
.value
.ui64
= cvs
->swapin
;
3295 cvskd
->pgswapin
.value
.ui64
= cvs
->pgswapin
;
3296 cvskd
->swapout
.value
.ui64
= cvs
->swapout
;
3297 cvskd
->pgswapout
.value
.ui64
= cvs
->pgswapout
;
3298 cvskd
->zfod
.value
.ui64
= cvs
->zfod
;
3299 cvskd
->dfree
.value
.ui64
= cvs
->dfree
;
3300 cvskd
->scan
.value
.ui64
= cvs
->scan
;
3301 cvskd
->rev
.value
.ui64
= cvs
->rev
;
3302 cvskd
->hat_fault
.value
.ui64
= cvs
->hat_fault
;
3303 cvskd
->as_fault
.value
.ui64
= cvs
->as_fault
;
3304 cvskd
->maj_fault
.value
.ui64
= cvs
->maj_fault
;
3305 cvskd
->cow_fault
.value
.ui64
= cvs
->cow_fault
;
3306 cvskd
->prot_fault
.value
.ui64
= cvs
->prot_fault
;
3307 cvskd
->softlock
.value
.ui64
= cvs
->softlock
;
3308 cvskd
->kernel_asflt
.value
.ui64
= cvs
->kernel_asflt
;
3309 cvskd
->pgrrun
.value
.ui64
= cvs
->pgrrun
;
3310 cvskd
->execpgin
.value
.ui64
= cvs
->execpgin
;
3311 cvskd
->execpgout
.value
.ui64
= cvs
->execpgout
;
3312 cvskd
->execfree
.value
.ui64
= cvs
->execfree
;
3313 cvskd
->anonpgin
.value
.ui64
= cvs
->anonpgin
;
3314 cvskd
->anonpgout
.value
.ui64
= cvs
->anonpgout
;
3315 cvskd
->anonfree
.value
.ui64
= cvs
->anonfree
;
3316 cvskd
->fspgin
.value
.ui64
= cvs
->fspgin
;
3317 cvskd
->fspgout
.value
.ui64
= cvs
->fspgout
;
3318 cvskd
->fsfree
.value
.ui64
= cvs
->fsfree
;
3324 cpu_stat_ks_update(kstat_t
*ksp
, int rw
)
3329 hrtime_t msnsecs
[NCMSTATES
];
3331 cso
= (cpu_stat_t
*)ksp
->ks_data
;
3332 cp
= (cpu_t
*)ksp
->ks_private
;
3334 if (rw
== KSTAT_WRITE
)
3338 * Read CPU mstate, but compare with the last values we
3339 * received to make sure that the returned kstats never
3343 get_cpu_mstate(cp
, msnsecs
);
3344 msnsecs
[CMS_IDLE
] = NSEC_TO_TICK(msnsecs
[CMS_IDLE
]);
3345 msnsecs
[CMS_USER
] = NSEC_TO_TICK(msnsecs
[CMS_USER
]);
3346 msnsecs
[CMS_SYSTEM
] = NSEC_TO_TICK(msnsecs
[CMS_SYSTEM
]);
3347 if (cso
->cpu_sysinfo
.cpu
[CPU_IDLE
] < msnsecs
[CMS_IDLE
])
3348 cso
->cpu_sysinfo
.cpu
[CPU_IDLE
] = msnsecs
[CMS_IDLE
];
3349 if (cso
->cpu_sysinfo
.cpu
[CPU_USER
] < msnsecs
[CMS_USER
])
3350 cso
->cpu_sysinfo
.cpu
[CPU_USER
] = msnsecs
[CMS_USER
];
3351 if (cso
->cpu_sysinfo
.cpu
[CPU_KERNEL
] < msnsecs
[CMS_SYSTEM
])
3352 cso
->cpu_sysinfo
.cpu
[CPU_KERNEL
] = msnsecs
[CMS_SYSTEM
];
3353 cso
->cpu_sysinfo
.cpu
[CPU_WAIT
] = 0;
3354 cso
->cpu_sysinfo
.wait
[W_IO
] = 0;
3355 cso
->cpu_sysinfo
.wait
[W_SWAP
] = 0;
3356 cso
->cpu_sysinfo
.wait
[W_PIO
] = 0;
3357 cso
->cpu_sysinfo
.bread
= CPU_STATS(cp
, sys
.bread
);
3358 cso
->cpu_sysinfo
.bwrite
= CPU_STATS(cp
, sys
.bwrite
);
3359 cso
->cpu_sysinfo
.lread
= CPU_STATS(cp
, sys
.lread
);
3360 cso
->cpu_sysinfo
.lwrite
= CPU_STATS(cp
, sys
.lwrite
);
3361 cso
->cpu_sysinfo
.phread
= CPU_STATS(cp
, sys
.phread
);
3362 cso
->cpu_sysinfo
.phwrite
= CPU_STATS(cp
, sys
.phwrite
);
3363 cso
->cpu_sysinfo
.pswitch
= CPU_STATS(cp
, sys
.pswitch
);
3364 cso
->cpu_sysinfo
.trap
= CPU_STATS(cp
, sys
.trap
);
3365 cso
->cpu_sysinfo
.intr
= 0;
3366 for (i
= 0; i
< PIL_MAX
; i
++)
3367 cso
->cpu_sysinfo
.intr
+= CPU_STATS(cp
, sys
.intr
[i
]);
3368 cso
->cpu_sysinfo
.syscall
= CPU_STATS(cp
, sys
.syscall
);
3369 cso
->cpu_sysinfo
.sysread
= CPU_STATS(cp
, sys
.sysread
);
3370 cso
->cpu_sysinfo
.syswrite
= CPU_STATS(cp
, sys
.syswrite
);
3371 cso
->cpu_sysinfo
.sysfork
= CPU_STATS(cp
, sys
.sysfork
);
3372 cso
->cpu_sysinfo
.sysvfork
= CPU_STATS(cp
, sys
.sysvfork
);
3373 cso
->cpu_sysinfo
.sysexec
= CPU_STATS(cp
, sys
.sysexec
);
3374 cso
->cpu_sysinfo
.readch
= CPU_STATS(cp
, sys
.readch
);
3375 cso
->cpu_sysinfo
.writech
= CPU_STATS(cp
, sys
.writech
);
3376 cso
->cpu_sysinfo
.rcvint
= CPU_STATS(cp
, sys
.rcvint
);
3377 cso
->cpu_sysinfo
.xmtint
= CPU_STATS(cp
, sys
.xmtint
);
3378 cso
->cpu_sysinfo
.mdmint
= CPU_STATS(cp
, sys
.mdmint
);
3379 cso
->cpu_sysinfo
.rawch
= CPU_STATS(cp
, sys
.rawch
);
3380 cso
->cpu_sysinfo
.canch
= CPU_STATS(cp
, sys
.canch
);
3381 cso
->cpu_sysinfo
.outch
= CPU_STATS(cp
, sys
.outch
);
3382 cso
->cpu_sysinfo
.msg
= CPU_STATS(cp
, sys
.msg
);
3383 cso
->cpu_sysinfo
.sema
= CPU_STATS(cp
, sys
.sema
);
3384 cso
->cpu_sysinfo
.namei
= CPU_STATS(cp
, sys
.namei
);
3385 cso
->cpu_sysinfo
.ufsiget
= CPU_STATS(cp
, sys
.ufsiget
);
3386 cso
->cpu_sysinfo
.ufsdirblk
= CPU_STATS(cp
, sys
.ufsdirblk
);
3387 cso
->cpu_sysinfo
.ufsipage
= CPU_STATS(cp
, sys
.ufsipage
);
3388 cso
->cpu_sysinfo
.ufsinopage
= CPU_STATS(cp
, sys
.ufsinopage
);
3389 cso
->cpu_sysinfo
.inodeovf
= 0;
3390 cso
->cpu_sysinfo
.fileovf
= 0;
3391 cso
->cpu_sysinfo
.procovf
= CPU_STATS(cp
, sys
.procovf
);
3392 cso
->cpu_sysinfo
.intrthread
= 0;
3393 for (i
= 0; i
< LOCK_LEVEL
- 1; i
++)
3394 cso
->cpu_sysinfo
.intrthread
+= CPU_STATS(cp
, sys
.intr
[i
]);
3395 cso
->cpu_sysinfo
.intrblk
= CPU_STATS(cp
, sys
.intrblk
);
3396 cso
->cpu_sysinfo
.idlethread
= CPU_STATS(cp
, sys
.idlethread
);
3397 cso
->cpu_sysinfo
.inv_swtch
= CPU_STATS(cp
, sys
.inv_swtch
);
3398 cso
->cpu_sysinfo
.nthreads
= CPU_STATS(cp
, sys
.nthreads
);
3399 cso
->cpu_sysinfo
.cpumigrate
= CPU_STATS(cp
, sys
.cpumigrate
);
3400 cso
->cpu_sysinfo
.xcalls
= CPU_STATS(cp
, sys
.xcalls
);
3401 cso
->cpu_sysinfo
.mutex_adenters
= CPU_STATS(cp
, sys
.mutex_adenters
);
3402 cso
->cpu_sysinfo
.rw_rdfails
= CPU_STATS(cp
, sys
.rw_rdfails
);
3403 cso
->cpu_sysinfo
.rw_wrfails
= CPU_STATS(cp
, sys
.rw_wrfails
);
3404 cso
->cpu_sysinfo
.modload
= CPU_STATS(cp
, sys
.modload
);
3405 cso
->cpu_sysinfo
.modunload
= CPU_STATS(cp
, sys
.modunload
);
3406 cso
->cpu_sysinfo
.bawrite
= CPU_STATS(cp
, sys
.bawrite
);
3407 cso
->cpu_sysinfo
.rw_enters
= 0;
3408 cso
->cpu_sysinfo
.win_uo_cnt
= 0;
3409 cso
->cpu_sysinfo
.win_uu_cnt
= 0;
3410 cso
->cpu_sysinfo
.win_so_cnt
= 0;
3411 cso
->cpu_sysinfo
.win_su_cnt
= 0;
3412 cso
->cpu_sysinfo
.win_suo_cnt
= 0;
3414 cso
->cpu_syswait
.iowait
= CPU_STATS(cp
, sys
.iowait
);
3415 cso
->cpu_syswait
.swap
= 0;
3416 cso
->cpu_syswait
.physio
= 0;
3418 cso
->cpu_vminfo
.pgrec
= CPU_STATS(cp
, vm
.pgrec
);
3419 cso
->cpu_vminfo
.pgfrec
= CPU_STATS(cp
, vm
.pgfrec
);
3420 cso
->cpu_vminfo
.pgin
= CPU_STATS(cp
, vm
.pgin
);
3421 cso
->cpu_vminfo
.pgpgin
= CPU_STATS(cp
, vm
.pgpgin
);
3422 cso
->cpu_vminfo
.pgout
= CPU_STATS(cp
, vm
.pgout
);
3423 cso
->cpu_vminfo
.pgpgout
= CPU_STATS(cp
, vm
.pgpgout
);
3424 cso
->cpu_vminfo
.swapin
= CPU_STATS(cp
, vm
.swapin
);
3425 cso
->cpu_vminfo
.pgswapin
= CPU_STATS(cp
, vm
.pgswapin
);
3426 cso
->cpu_vminfo
.swapout
= CPU_STATS(cp
, vm
.swapout
);
3427 cso
->cpu_vminfo
.pgswapout
= CPU_STATS(cp
, vm
.pgswapout
);
3428 cso
->cpu_vminfo
.zfod
= CPU_STATS(cp
, vm
.zfod
);
3429 cso
->cpu_vminfo
.dfree
= CPU_STATS(cp
, vm
.dfree
);
3430 cso
->cpu_vminfo
.scan
= CPU_STATS(cp
, vm
.scan
);
3431 cso
->cpu_vminfo
.rev
= CPU_STATS(cp
, vm
.rev
);
3432 cso
->cpu_vminfo
.hat_fault
= CPU_STATS(cp
, vm
.hat_fault
);
3433 cso
->cpu_vminfo
.as_fault
= CPU_STATS(cp
, vm
.as_fault
);
3434 cso
->cpu_vminfo
.maj_fault
= CPU_STATS(cp
, vm
.maj_fault
);
3435 cso
->cpu_vminfo
.cow_fault
= CPU_STATS(cp
, vm
.cow_fault
);
3436 cso
->cpu_vminfo
.prot_fault
= CPU_STATS(cp
, vm
.prot_fault
);
3437 cso
->cpu_vminfo
.softlock
= CPU_STATS(cp
, vm
.softlock
);
3438 cso
->cpu_vminfo
.kernel_asflt
= CPU_STATS(cp
, vm
.kernel_asflt
);
3439 cso
->cpu_vminfo
.pgrrun
= CPU_STATS(cp
, vm
.pgrrun
);
3440 cso
->cpu_vminfo
.execpgin
= CPU_STATS(cp
, vm
.execpgin
);
3441 cso
->cpu_vminfo
.execpgout
= CPU_STATS(cp
, vm
.execpgout
);
3442 cso
->cpu_vminfo
.execfree
= CPU_STATS(cp
, vm
.execfree
);
3443 cso
->cpu_vminfo
.anonpgin
= CPU_STATS(cp
, vm
.anonpgin
);
3444 cso
->cpu_vminfo
.anonpgout
= CPU_STATS(cp
, vm
.anonpgout
);
3445 cso
->cpu_vminfo
.anonfree
= CPU_STATS(cp
, vm
.anonfree
);
3446 cso
->cpu_vminfo
.fspgin
= CPU_STATS(cp
, vm
.fspgin
);
3447 cso
->cpu_vminfo
.fspgout
= CPU_STATS(cp
, vm
.fspgout
);
3448 cso
->cpu_vminfo
.fsfree
= CPU_STATS(cp
, vm
.fsfree
);