4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 * Copyright (c) 2009, Intel Corporation.
27 * All rights reserved.
30 #include <sys/cpu_pm.h>
31 #include <sys/x86_archext.h>
34 #include <sys/machsystm.h>
35 #include <sys/archsystm.h>
37 #include <sys/acpi/acpi.h>
38 #include <sys/acpica.h>
39 #include <sys/cpupm.h>
40 #include <sys/cpu_idle.h>
41 #include <sys/cpu_acpi.h>
42 #include <sys/cpupm_throttle.h>
43 #include <sys/dtrace.h>
47 * This callback is used to build the PPM CPU domains once
48 * a CPU device has been started. The callback is initialized
49 * by the PPM driver to point to a routine that will build the
52 void (*cpupm_ppm_alloc_pstate_domains
)(cpu_t
*);
55 * This callback is used to remove CPU from the PPM CPU domains
56 * when the cpu driver is detached. The callback is initialized
57 * by the PPM driver to point to a routine that will remove CPU
60 void (*cpupm_ppm_free_pstate_domains
)(cpu_t
*);
63 * This callback is used to redefine the topspeed for a CPU device.
64 * Since all CPUs in a domain should have identical properties, this
65 * callback is initialized by the PPM driver to point to a routine
66 * that will redefine the topspeed for all devices in a CPU domain.
67 * This callback is exercised whenever an ACPI _PPC change notification
68 * is received by the CPU driver.
70 void (*cpupm_redefine_topspeed
)(void *);
73 * This callback is used by the PPM driver to call into the CPU driver
74 * to find a CPU's current topspeed (i.e., it's current ACPI _PPC value).
76 void (*cpupm_set_topspeed_callb
)(void *, int);
79 * This callback is used by the PPM driver to call into the CPU driver
80 * to set a new topspeed for a CPU.
82 int (*cpupm_get_topspeed_callb
)(void *);
84 static void cpupm_event_notify_handler(ACPI_HANDLE
, UINT32
, void *);
85 static void cpupm_free_notify_handlers(cpu_t
*);
86 static void cpupm_power_manage_notifications(void *);
89 * Until proven otherwise, all power states are manageable.
91 static uint32_t cpupm_enabled
= CPUPM_ALL_STATES
;
93 cpupm_state_domains_t
*cpupm_pstate_domains
= NULL
;
94 cpupm_state_domains_t
*cpupm_tstate_domains
= NULL
;
95 cpupm_state_domains_t
*cpupm_cstate_domains
= NULL
;
100 * cpupm_cs_sample_interval is the length of time we wait before
101 * recalculating c-state statistics. When a CPU goes idle it checks
102 * to see if it has been longer than cpupm_cs_sample_interval since it last
103 * caculated which C-state to go to.
105 * cpupm_cs_idle_cost_tunable is the ratio of time CPU spends executing + idle
106 * divided by time spent in the idle state transitions.
107 * A value of 10 means the CPU will not spend more than 1/10 of its time
108 * in idle latency. The worst case performance will be 90% of non Deep C-state
111 * cpupm_cs_idle_save_tunable is how long we must stay in a deeper C-state
112 * before it is worth going there. Expressed as a multiple of latency.
114 uint32_t cpupm_cs_sample_interval
= 100*1000*1000; /* 100 milliseconds */
115 uint32_t cpupm_cs_idle_cost_tunable
= 10; /* work time / latency cost */
116 uint32_t cpupm_cs_idle_save_tunable
= 2; /* idle power savings */
117 uint16_t cpupm_C2_idle_pct_tunable
= 70;
118 uint16_t cpupm_C3_idle_pct_tunable
= 80;
121 extern boolean_t
cpupm_intel_init(cpu_t
*);
122 extern boolean_t
cpupm_amd_init(cpu_t
*);
124 typedef struct cpupm_vendor
{
125 boolean_t (*cpuv_init
)(cpu_t
*);
129 * Table of supported vendors.
131 static cpupm_vendor_t cpupm_vendors
[] = {
139 * Initialize the machine.
140 * See if a module exists for managing power for this CPU.
144 cpupm_init(cpu_t
*cp
)
147 cpupm_vendor_t
*vendors
;
148 cpupm_mach_state_t
*mach_state
;
149 struct machcpu
*mcpu
= &(cp
->cpu_m
);
150 static boolean_t first
= B_TRUE
;
155 mach_state
= cp
->cpu_m
.mcpu_pm_mach_state
=
156 kmem_zalloc(sizeof (cpupm_mach_state_t
), KM_SLEEP
);
157 mach_state
->ms_caps
= CPUPM_NO_STATES
;
158 mutex_init(&mach_state
->ms_lock
, NULL
, MUTEX_DRIVER
, NULL
);
160 mach_state
->ms_acpi_handle
= cpu_acpi_init(cp
);
161 if (mach_state
->ms_acpi_handle
== NULL
) {
163 cmn_err(CE_WARN
, "!cpupm_init: processor %d: "
164 "unable to get ACPI handle", cp
->cpu_id
);
165 cmn_err(CE_NOTE
, "!CPU power management will not function.");
172 * Loop through the CPU management module table and see if
173 * any of the modules implement CPU power management
176 for (vendors
= cpupm_vendors
; vendors
->cpuv_init
!= NULL
; vendors
++) {
177 if (vendors
->cpuv_init(cp
))
182 * Nope, we can't power manage this CPU.
184 if (vendors
== NULL
) {
192 * If P-state support exists for this system, then initialize it.
194 if (mach_state
->ms_pstate
.cma_ops
!= NULL
) {
195 ret
= mach_state
->ms_pstate
.cma_ops
->cpus_init(cp
);
197 mach_state
->ms_pstate
.cma_ops
= NULL
;
198 cpupm_disable(CPUPM_P_STATES
);
200 nspeeds
= cpupm_get_speeds(cp
, &speeds
);
202 cmn_err(CE_NOTE
, "!cpupm_init: processor %d:"
203 " no speeds to manage", cp
->cpu_id
);
205 cpupm_set_supp_freqs(cp
, speeds
, nspeeds
);
206 cpupm_free_speeds(speeds
, nspeeds
);
207 mach_state
->ms_caps
|= CPUPM_P_STATES
;
211 cpupm_disable(CPUPM_P_STATES
);
214 if (mach_state
->ms_tstate
.cma_ops
!= NULL
) {
215 ret
= mach_state
->ms_tstate
.cma_ops
->cpus_init(cp
);
217 mach_state
->ms_tstate
.cma_ops
= NULL
;
218 cpupm_disable(CPUPM_T_STATES
);
220 mach_state
->ms_caps
|= CPUPM_T_STATES
;
223 cpupm_disable(CPUPM_T_STATES
);
227 * If C-states support exists for this system, then initialize it.
229 if (mach_state
->ms_cstate
.cma_ops
!= NULL
) {
230 ret
= mach_state
->ms_cstate
.cma_ops
->cpus_init(cp
);
232 mach_state
->ms_cstate
.cma_ops
= NULL
;
233 mcpu
->max_cstates
= CPU_ACPI_C1
;
234 cpupm_disable(CPUPM_C_STATES
);
235 idle_cpu
= non_deep_idle_cpu
;
236 disp_enq_thread
= non_deep_idle_disp_enq_thread
;
237 } else if (cpu_deep_cstates_supported()) {
238 mcpu
->max_cstates
= cpu_acpi_get_max_cstates(
239 mach_state
->ms_acpi_handle
);
240 if (mcpu
->max_cstates
> CPU_ACPI_C1
) {
241 (void) cstate_timer_callback(
242 CST_EVENT_MULTIPLE_CSTATES
);
243 cp
->cpu_m
.mcpu_idle_cpu
= cpu_acpi_idle
;
244 mcpu
->mcpu_idle_type
= CPU_ACPI_C1
;
245 disp_enq_thread
= cstate_wakeup
;
247 (void) cstate_timer_callback(
248 CST_EVENT_ONE_CSTATE
);
250 mach_state
->ms_caps
|= CPUPM_C_STATES
;
252 mcpu
->max_cstates
= CPU_ACPI_C1
;
253 idle_cpu
= non_deep_idle_cpu
;
254 disp_enq_thread
= non_deep_idle_disp_enq_thread
;
257 cpupm_disable(CPUPM_C_STATES
);
261 if (mach_state
->ms_caps
== CPUPM_NO_STATES
) {
268 if ((mach_state
->ms_caps
& CPUPM_T_STATES
) ||
269 (mach_state
->ms_caps
& CPUPM_P_STATES
) ||
270 (mach_state
->ms_caps
& CPUPM_C_STATES
)) {
272 acpica_write_cpupm_capabilities(
273 mach_state
->ms_caps
& CPUPM_P_STATES
,
274 mach_state
->ms_caps
& CPUPM_C_STATES
);
276 if (mach_state
->ms_caps
& CPUPM_T_STATES
) {
277 cpupm_throttle_manage_notification(cp
);
279 if (mach_state
->ms_caps
& CPUPM_C_STATES
) {
280 cpuidle_manage_cstates(cp
);
282 if (mach_state
->ms_caps
& CPUPM_P_STATES
) {
283 cpupm_power_manage_notifications(cp
);
285 cpupm_add_notify_handler(cp
, cpupm_event_notify_handler
, cp
);
292 * Free any resources allocated during cpupm initialization or cpupm start.
296 cpupm_free(cpu_t
*cp
, boolean_t cpupm_stop
)
299 cpupm_mach_state_t
*mach_state
=
300 (cpupm_mach_state_t
*)cp
->cpu_m
.mcpu_pm_mach_state
;
302 if (mach_state
== NULL
)
305 if (mach_state
->ms_pstate
.cma_ops
!= NULL
) {
307 mach_state
->ms_pstate
.cma_ops
->cpus_stop(cp
);
309 mach_state
->ms_pstate
.cma_ops
->cpus_fini(cp
);
310 mach_state
->ms_pstate
.cma_ops
= NULL
;
313 if (mach_state
->ms_tstate
.cma_ops
!= NULL
) {
315 mach_state
->ms_tstate
.cma_ops
->cpus_stop(cp
);
317 mach_state
->ms_tstate
.cma_ops
->cpus_fini(cp
);
318 mach_state
->ms_tstate
.cma_ops
= NULL
;
321 if (mach_state
->ms_cstate
.cma_ops
!= NULL
) {
323 mach_state
->ms_cstate
.cma_ops
->cpus_stop(cp
);
325 mach_state
->ms_cstate
.cma_ops
->cpus_fini(cp
);
327 mach_state
->ms_cstate
.cma_ops
= NULL
;
330 cpupm_free_notify_handlers(cp
);
332 if (mach_state
->ms_acpi_handle
!= NULL
) {
333 cpu_acpi_fini(mach_state
->ms_acpi_handle
);
334 mach_state
->ms_acpi_handle
= NULL
;
337 mutex_destroy(&mach_state
->ms_lock
);
338 kmem_free(mach_state
, sizeof (cpupm_mach_state_t
));
339 cp
->cpu_m
.mcpu_pm_mach_state
= NULL
;
344 cpupm_fini(cpu_t
*cp
)
347 * call (*cpus_fini)() ops to release the cpupm resource
348 * in the P/C/T-state driver
350 cpupm_free(cp
, B_FALSE
);
354 cpupm_start(cpu_t
*cp
)
360 cpupm_stop(cpu_t
*cp
)
363 * call (*cpus_stop)() ops to reclaim the cpupm resource
364 * in the P/C/T-state driver
366 cpupm_free(cp
, B_TRUE
);
370 * If A CPU has started and at least one power state is manageable,
371 * then the CPU is ready for power management.
374 cpupm_is_ready(cpu_t
*cp
)
377 cpupm_mach_state_t
*mach_state
=
378 (cpupm_mach_state_t
*)cp
->cpu_m
.mcpu_pm_mach_state
;
379 uint32_t cpupm_caps
= mach_state
->ms_caps
;
381 if (cpupm_enabled
== CPUPM_NO_STATES
)
384 if ((cpupm_caps
& CPUPM_T_STATES
) ||
385 (cpupm_caps
& CPUPM_P_STATES
) ||
386 (cpupm_caps
& CPUPM_C_STATES
))
391 _NOTE(ARGUNUSED(cp
));
397 cpupm_is_enabled(uint32_t state
)
399 return ((cpupm_enabled
& state
) == state
);
403 * By default, all states are enabled.
406 cpupm_disable(uint32_t state
)
409 if (state
& CPUPM_P_STATES
) {
410 cpupm_free_domains(&cpupm_pstate_domains
);
412 if (state
& CPUPM_T_STATES
) {
413 cpupm_free_domains(&cpupm_tstate_domains
);
415 if (state
& CPUPM_C_STATES
) {
416 cpupm_free_domains(&cpupm_cstate_domains
);
418 cpupm_enabled
&= ~state
;
422 * Allocate power domains for C,P and T States
425 cpupm_alloc_domains(cpu_t
*cp
, int state
)
427 cpupm_mach_state_t
*mach_state
=
428 (cpupm_mach_state_t
*)(cp
->cpu_m
.mcpu_pm_mach_state
);
429 cpu_acpi_handle_t handle
= mach_state
->ms_acpi_handle
;
430 cpupm_state_domains_t
**dom_ptr
;
431 cpupm_state_domains_t
*dptr
;
432 cpupm_state_domains_t
**mach_dom_state_ptr
;
438 if (CPU_ACPI_IS_OBJ_CACHED(handle
, CPU_ACPI_PSD_CACHED
)) {
439 domain
= CPU_ACPI_PSD(handle
).sd_domain
;
440 type
= CPU_ACPI_PSD(handle
).sd_type
;
442 if (MUTEX_HELD(&cpu_lock
)) {
443 domain
= cpuid_get_chipid(cp
);
445 mutex_enter(&cpu_lock
);
446 domain
= cpuid_get_chipid(cp
);
447 mutex_exit(&cpu_lock
);
449 type
= CPU_ACPI_HW_ALL
;
451 dom_ptr
= &cpupm_pstate_domains
;
452 mach_dom_state_ptr
= &mach_state
->ms_pstate
.cma_domain
;
455 if (CPU_ACPI_IS_OBJ_CACHED(handle
, CPU_ACPI_TSD_CACHED
)) {
456 domain
= CPU_ACPI_TSD(handle
).sd_domain
;
457 type
= CPU_ACPI_TSD(handle
).sd_type
;
459 if (MUTEX_HELD(&cpu_lock
)) {
460 domain
= cpuid_get_chipid(cp
);
462 mutex_enter(&cpu_lock
);
463 domain
= cpuid_get_chipid(cp
);
464 mutex_exit(&cpu_lock
);
466 type
= CPU_ACPI_HW_ALL
;
468 dom_ptr
= &cpupm_tstate_domains
;
469 mach_dom_state_ptr
= &mach_state
->ms_tstate
.cma_domain
;
472 if (CPU_ACPI_IS_OBJ_CACHED(handle
, CPU_ACPI_CSD_CACHED
)) {
473 domain
= CPU_ACPI_CSD(handle
).sd_domain
;
474 type
= CPU_ACPI_CSD(handle
).sd_type
;
476 if (MUTEX_HELD(&cpu_lock
)) {
477 domain
= cpuid_get_coreid(cp
);
479 mutex_enter(&cpu_lock
);
480 domain
= cpuid_get_coreid(cp
);
481 mutex_exit(&cpu_lock
);
483 type
= CPU_ACPI_HW_ALL
;
485 dom_ptr
= &cpupm_cstate_domains
;
486 mach_dom_state_ptr
= &mach_state
->ms_cstate
.cma_domain
;
492 for (dptr
= *dom_ptr
; dptr
!= NULL
; dptr
= dptr
->pm_next
) {
493 if (dptr
->pm_domain
== domain
)
497 /* new domain is created and linked at the head */
499 dptr
= kmem_zalloc(sizeof (cpupm_state_domains_t
), KM_SLEEP
);
500 dptr
->pm_domain
= domain
;
501 dptr
->pm_type
= type
;
502 dptr
->pm_next
= *dom_ptr
;
503 mutex_init(&dptr
->pm_lock
, NULL
, MUTEX_SPIN
,
504 (void *)ipltospl(DISP_LEVEL
));
505 CPUSET_ZERO(dptr
->pm_cpus
);
508 CPUSET_ADD(dptr
->pm_cpus
, cp
->cpu_id
);
509 *mach_dom_state_ptr
= dptr
;
513 * Free C, P or T state power domains
516 cpupm_free_domains(cpupm_state_domains_t
**dom_ptr
)
518 cpupm_state_domains_t
*this_domain
, *next_domain
;
520 this_domain
= *dom_ptr
;
521 while (this_domain
!= NULL
) {
522 next_domain
= this_domain
->pm_next
;
523 mutex_destroy(&this_domain
->pm_lock
);
524 kmem_free((void *)this_domain
,
525 sizeof (cpupm_state_domains_t
));
526 this_domain
= next_domain
;
532 * Remove CPU from C, P or T state power domains
535 cpupm_remove_domains(cpu_t
*cp
, int state
, cpupm_state_domains_t
**dom_ptr
)
537 cpupm_mach_state_t
*mach_state
=
538 (cpupm_mach_state_t
*)(cp
->cpu_m
.mcpu_pm_mach_state
);
539 cpupm_state_domains_t
*dptr
;
546 pm_domain
= mach_state
->ms_pstate
.cma_domain
->pm_domain
;
549 pm_domain
= mach_state
->ms_tstate
.cma_domain
->pm_domain
;
552 pm_domain
= mach_state
->ms_cstate
.cma_domain
->pm_domain
;
559 * Find the CPU C, P or T state power domain
561 for (dptr
= *dom_ptr
; dptr
!= NULL
; dptr
= dptr
->pm_next
) {
562 if (dptr
->pm_domain
== pm_domain
)
567 * return if no matched domain found
573 * We found one matched power domain, remove CPU from its cpuset.
574 * pm_lock(spin lock) here to avoid the race conditions between
575 * event change notification and cpu remove.
577 mutex_enter(&dptr
->pm_lock
);
578 if (CPU_IN_SET(dptr
->pm_cpus
, cp
->cpu_id
))
579 CPUSET_DEL(dptr
->pm_cpus
, cp
->cpu_id
);
580 mutex_exit(&dptr
->pm_lock
);
584 cpupm_alloc_ms_cstate(cpu_t
*cp
)
586 cpupm_mach_state_t
*mach_state
;
587 cpupm_mach_acpi_state_t
*ms_cstate
;
589 mach_state
= (cpupm_mach_state_t
*)(cp
->cpu_m
.mcpu_pm_mach_state
);
590 ms_cstate
= &mach_state
->ms_cstate
;
591 ASSERT(ms_cstate
->cma_state
.cstate
== NULL
);
592 ms_cstate
->cma_state
.cstate
= kmem_zalloc(sizeof (cma_c_state_t
),
594 ms_cstate
->cma_state
.cstate
->cs_next_cstate
= CPU_ACPI_C1
;
598 cpupm_free_ms_cstate(cpu_t
*cp
)
600 cpupm_mach_state_t
*mach_state
=
601 (cpupm_mach_state_t
*)(cp
->cpu_m
.mcpu_pm_mach_state
);
602 cpupm_mach_acpi_state_t
*ms_cstate
= &mach_state
->ms_cstate
;
604 if (ms_cstate
->cma_state
.cstate
!= NULL
) {
605 kmem_free(ms_cstate
->cma_state
.cstate
, sizeof (cma_c_state_t
));
606 ms_cstate
->cma_state
.cstate
= NULL
;
611 cpupm_state_change(cpu_t
*cp
, int level
, int state
)
613 cpupm_mach_state_t
*mach_state
=
614 (cpupm_mach_state_t
*)(cp
->cpu_m
.mcpu_pm_mach_state
);
615 cpupm_state_ops_t
*state_ops
;
616 cpupm_state_domains_t
*state_domain
;
619 DTRACE_PROBE2(cpupm__state__change
, cpu_t
*, cp
, int, level
);
621 if (mach_state
== NULL
) {
627 state_ops
= mach_state
->ms_pstate
.cma_ops
;
628 state_domain
= mach_state
->ms_pstate
.cma_domain
;
631 state_ops
= mach_state
->ms_tstate
.cma_ops
;
632 state_domain
= mach_state
->ms_tstate
.cma_domain
;
638 switch (state_domain
->pm_type
) {
639 case CPU_ACPI_SW_ANY
:
641 * A request on any CPU in the domain transitions the domain
643 CPUSET_ONLY(set
, cp
->cpu_id
);
644 state_ops
->cpus_change(set
, level
);
646 case CPU_ACPI_SW_ALL
:
648 * All CPUs in the domain must request the transition
650 case CPU_ACPI_HW_ALL
:
652 * P/T-state transitions are coordinated by the hardware
653 * For now, request the transition on all CPUs in the domain,
654 * but looking ahead we can probably be smarter about this.
656 mutex_enter(&state_domain
->pm_lock
);
657 state_ops
->cpus_change(state_domain
->pm_cpus
, level
);
658 mutex_exit(&state_domain
->pm_lock
);
661 cmn_err(CE_NOTE
, "Unknown domain coordination type: %d",
662 state_domain
->pm_type
);
667 * CPU PM interfaces exposed to the CPU power manager
671 cpupm_plat_domain_id(cpu_t
*cp
, cpupm_dtype_t type
)
673 cpupm_mach_state_t
*mach_state
=
674 (cpupm_mach_state_t
*)(cp
->cpu_m
.mcpu_pm_mach_state
);
676 if ((mach_state
== NULL
) || (!cpupm_is_enabled(CPUPM_P_STATES
) &&
677 !cpupm_is_enabled(CPUPM_C_STATES
))) {
678 return (CPUPM_NO_DOMAIN
);
680 if (type
== CPUPM_DTYPE_ACTIVE
) {
682 * Return P-State domain for the specified CPU
684 if (mach_state
->ms_pstate
.cma_domain
) {
685 return (mach_state
->ms_pstate
.cma_domain
->pm_domain
);
687 } else if (type
== CPUPM_DTYPE_IDLE
) {
689 * Return C-State domain for the specified CPU
691 if (mach_state
->ms_cstate
.cma_domain
) {
692 return (mach_state
->ms_cstate
.cma_domain
->pm_domain
);
695 return (CPUPM_NO_DOMAIN
);
700 cpupm_plat_state_enumerate(cpu_t
*cp
, cpupm_dtype_t type
,
701 cpupm_state_t
*states
)
707 * Idle domain support unimplemented
709 if (type
!= CPUPM_DTYPE_ACTIVE
) {
712 nspeeds
= cpupm_get_speeds(cp
, &speeds
);
715 * If the caller passes NULL for states, just return the
718 if (states
!= NULL
) {
719 for (i
= 0; i
< nspeeds
; i
++) {
720 states
[i
].cps_speed
= speeds
[i
];
721 states
[i
].cps_handle
= (cpupm_handle_t
)i
;
724 cpupm_free_speeds(speeds
, nspeeds
);
730 cpupm_plat_change_state(cpu_t
*cp
, cpupm_state_t
*state
)
732 if (!cpupm_is_ready(cp
))
735 cpupm_state_change(cp
, (int)state
->cps_handle
, CPUPM_P_STATES
);
742 * Note: It is the responsibility of the users of
743 * cpupm_get_speeds() to free the memory allocated
744 * for speeds using cpupm_free_speeds()
747 cpupm_get_speeds(cpu_t
*cp
, int **speeds
)
750 cpupm_mach_state_t
*mach_state
=
751 (cpupm_mach_state_t
*)cp
->cpu_m
.mcpu_pm_mach_state
;
752 return (cpu_acpi_get_speeds(mach_state
->ms_acpi_handle
, speeds
));
760 cpupm_free_speeds(int *speeds
, uint_t nspeeds
)
763 cpu_acpi_free_speeds(speeds
, nspeeds
);
768 * All CPU instances have been initialized successfully.
771 cpupm_power_ready(cpu_t
*cp
)
773 return (cpupm_is_enabled(CPUPM_P_STATES
) && cpupm_is_ready(cp
));
777 * All CPU instances have been initialized successfully.
780 cpupm_throttle_ready(cpu_t
*cp
)
782 return (cpupm_is_enabled(CPUPM_T_STATES
) && cpupm_is_ready(cp
));
786 * All CPU instances have been initialized successfully.
789 cpupm_cstate_ready(cpu_t
*cp
)
791 return (cpupm_is_enabled(CPUPM_C_STATES
) && cpupm_is_ready(cp
));
795 cpupm_notify_handler(ACPI_HANDLE obj
, UINT32 val
, void *ctx
)
798 cpupm_mach_state_t
*mach_state
=
799 (cpupm_mach_state_t
*)(cp
->cpu_m
.mcpu_pm_mach_state
);
800 cpupm_notification_t
*entry
;
802 mutex_enter(&mach_state
->ms_lock
);
803 for (entry
= mach_state
->ms_handlers
; entry
!= NULL
;
804 entry
= entry
->nq_next
) {
805 entry
->nq_handler(obj
, val
, entry
->nq_ctx
);
807 mutex_exit(&mach_state
->ms_lock
);
812 cpupm_add_notify_handler(cpu_t
*cp
, CPUPM_NOTIFY_HANDLER handler
, void *ctx
)
815 cpupm_mach_state_t
*mach_state
=
816 (cpupm_mach_state_t
*)cp
->cpu_m
.mcpu_pm_mach_state
;
817 cpupm_notification_t
*entry
;
819 entry
= kmem_zalloc(sizeof (cpupm_notification_t
), KM_SLEEP
);
820 entry
->nq_handler
= handler
;
822 mutex_enter(&mach_state
->ms_lock
);
823 if (mach_state
->ms_handlers
== NULL
) {
824 entry
->nq_next
= NULL
;
825 mach_state
->ms_handlers
= entry
;
826 cpu_acpi_install_notify_handler(mach_state
->ms_acpi_handle
,
827 cpupm_notify_handler
, cp
);
830 entry
->nq_next
= mach_state
->ms_handlers
;
831 mach_state
->ms_handlers
= entry
;
833 mutex_exit(&mach_state
->ms_lock
);
839 cpupm_free_notify_handlers(cpu_t
*cp
)
842 cpupm_mach_state_t
*mach_state
=
843 (cpupm_mach_state_t
*)cp
->cpu_m
.mcpu_pm_mach_state
;
844 cpupm_notification_t
*entry
;
845 cpupm_notification_t
*next
;
847 mutex_enter(&mach_state
->ms_lock
);
848 if (mach_state
->ms_handlers
== NULL
) {
849 mutex_exit(&mach_state
->ms_lock
);
852 if (mach_state
->ms_acpi_handle
!= NULL
) {
853 cpu_acpi_remove_notify_handler(mach_state
->ms_acpi_handle
,
854 cpupm_notify_handler
);
856 entry
= mach_state
->ms_handlers
;
857 while (entry
!= NULL
) {
858 next
= entry
->nq_next
;
859 kmem_free(entry
, sizeof (cpupm_notification_t
));
862 mach_state
->ms_handlers
= NULL
;
863 mutex_exit(&mach_state
->ms_lock
);
868 * Get the current max speed from the ACPI _PPC object
872 cpupm_get_top_speed(cpu_t
*cp
)
875 cpupm_mach_state_t
*mach_state
;
876 cpu_acpi_handle_t handle
;
882 (cpupm_mach_state_t
*)cp
->cpu_m
.mcpu_pm_mach_state
;
883 handle
= mach_state
->ms_acpi_handle
;
885 cpu_acpi_cache_ppc(handle
);
886 plat_level
= CPU_ACPI_PPC(handle
);
888 nspeeds
= CPU_ACPI_PSTATES_COUNT(handle
);
890 max_level
= nspeeds
- 1;
891 if ((plat_level
< 0) || (plat_level
> max_level
)) {
892 cmn_err(CE_NOTE
, "!cpupm_get_top_speed: CPU %d: "
893 "_PPC out of range %d", cp
->cpu_id
, plat_level
);
904 * This notification handler is called whenever the ACPI _PPC
905 * object changes. The _PPC is a sort of governor on power levels.
906 * It sets an upper threshold on which, _PSS defined, power levels
907 * are usuable. The _PPC value is dynamic and may change as properties
908 * (i.e., thermal or AC source) of the system change.
912 cpupm_power_manage_notifications(void *ctx
)
917 top_speed
= cpupm_get_top_speed(cp
);
918 cpupm_redefine_max_activepwr_state(cp
, top_speed
);
923 cpupm_event_notify_handler(ACPI_HANDLE obj
, UINT32 val
, void *ctx
)
928 cpupm_mach_state_t
*mach_state
=
929 (cpupm_mach_state_t
*)(cp
->cpu_m
.mcpu_pm_mach_state
);
931 if (mach_state
== NULL
)
935 * Currently, we handle _TPC,_CST and _PPC change notifications.
937 if (val
== CPUPM_TPC_CHANGE_NOTIFICATION
&&
938 mach_state
->ms_caps
& CPUPM_T_STATES
) {
939 cpupm_throttle_manage_notification(ctx
);
940 } else if (val
== CPUPM_CST_CHANGE_NOTIFICATION
&&
941 mach_state
->ms_caps
& CPUPM_C_STATES
) {
942 cpuidle_manage_cstates(ctx
);
943 } else if (val
== CPUPM_PPC_CHANGE_NOTIFICATION
&&
944 mach_state
->ms_caps
& CPUPM_P_STATES
) {
945 cpupm_power_manage_notifications(ctx
);
951 * Update cpupm cstate data each time CPU exits idle.
954 cpupm_wakeup_cstate_data(cma_c_state_t
*cs_data
, hrtime_t end
)
956 cs_data
->cs_idle_exit
= end
;
960 * Determine next cstate based on cpupm data.
961 * Update cpupm cstate data each time CPU goes idle.
962 * Do as much as possible in the idle state bookkeeping function because the
963 * performance impact while idle is minimal compared to in the wakeup function
964 * when there is real work to do.
967 cpupm_next_cstate(cma_c_state_t
*cs_data
, cpu_acpi_cstate_t
*cstates
,
968 uint32_t cs_count
, hrtime_t start
)
971 hrtime_t ave_interval
;
972 hrtime_t ave_idle_time
;
973 uint32_t i
, smpl_cnt
;
975 duration
= cs_data
->cs_idle_exit
- cs_data
->cs_idle_enter
;
976 scalehrtime(&duration
);
977 cs_data
->cs_idle
+= duration
;
978 cs_data
->cs_idle_enter
= start
;
980 smpl_cnt
= ++cs_data
->cs_cnt
;
981 cs_data
->cs_smpl_len
= start
- cs_data
->cs_smpl_start
;
982 scalehrtime(&cs_data
->cs_smpl_len
);
983 if (cs_data
->cs_smpl_len
> cpupm_cs_sample_interval
) {
984 cs_data
->cs_smpl_idle
= cs_data
->cs_idle
;
985 cs_data
->cs_idle
= 0;
986 cs_data
->cs_smpl_idle_pct
= ((100 * cs_data
->cs_smpl_idle
) /
987 cs_data
->cs_smpl_len
);
989 cs_data
->cs_smpl_start
= start
;
993 * Strand level C-state policy
994 * The cpu_acpi_cstate_t *cstates array is not required to
995 * have an entry for both CPU_ACPI_C2 and CPU_ACPI_C3.
996 * There are cs_count entries in the cstates array.
997 * cs_data->cs_next_cstate contains the index of the next
998 * C-state this CPU should enter.
1000 ASSERT(cstates
[0].cs_type
== CPU_ACPI_C1
);
1003 * Will CPU be idle long enough to save power?
1005 ave_idle_time
= (cs_data
->cs_smpl_idle
/ smpl_cnt
) / 1000;
1006 for (i
= 1; i
< cs_count
; ++i
) {
1007 if (ave_idle_time
< (cstates
[i
].cs_latency
*
1008 cpupm_cs_idle_save_tunable
)) {
1010 DTRACE_PROBE2(cpupm__next__cstate
, cpu_t
*,
1016 * Wakeup often (even when non-idle time is very short)?
1017 * Some producer/consumer type loads fall into this category.
1019 ave_interval
= (cs_data
->cs_smpl_len
/ smpl_cnt
) / 1000;
1020 for (i
= 1; i
< cs_count
; ++i
) {
1021 if (ave_interval
<= (cstates
[i
].cs_latency
*
1022 cpupm_cs_idle_cost_tunable
)) {
1024 DTRACE_PROBE2(cpupm__next__cstate
, cpu_t
*,
1025 CPU
, int, (CPU_MAX_CSTATES
+ i
));
1032 for (i
= 1; i
< cs_count
; ++i
) {
1033 switch (cstates
[i
].cs_type
) {
1035 if (cs_data
->cs_smpl_idle_pct
<
1036 cpupm_C2_idle_pct_tunable
) {
1038 DTRACE_PROBE2(cpupm__next__cstate
,
1040 ((2 * CPU_MAX_CSTATES
) + i
));
1045 if (cs_data
->cs_smpl_idle_pct
<
1046 cpupm_C3_idle_pct_tunable
) {
1048 DTRACE_PROBE2(cpupm__next__cstate
,
1050 ((2 * CPU_MAX_CSTATES
) + i
));
1056 cs_data
->cs_next_cstate
= cs_count
- 1;
1059 return (cs_data
->cs_next_cstate
);