2 * Copyright (c) 2010,2016 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * - Monitor the cpu load and adjusts cpu and cpu power domain
38 * performance accordingly.
39 * - Monitor battery life. Alarm alerts and shutdown the machine
40 * if battery life goes low.
43 #define _KERNEL_STRUCTURES
44 #include <sys/types.h>
45 #include <sys/sysctl.h>
46 #include <sys/kinfo.h>
48 #include <sys/queue.h>
49 #include <sys/soundcard.h>
50 #include <sys/sensors.h>
52 #include <machine/cpufunc.h>
53 #include <machine/cpumask.h>
64 #define MAXDOM MAXCPU /* worst case, 1 cpu per domain */
69 #define NFREQ_MONPERF 0x0001
70 #define NFREQ_ADJPERF 0x0002
71 #define NFREQ_CPUTEMP 0x0004
73 #define NFREQ_ALL (NFREQ_MONPERF | NFREQ_ADJPERF | NFREQ_CPUTEMP)
76 TAILQ_ENTRY(cpu_pwrdom
) dom_link
;
79 cpumask_t dom_cpumask
;
84 double cpu_uavg
; /* used for speeding up */
85 double cpu_davg
; /* used for slowing down */
91 static void usage(void);
92 static void get_ncpus(void);
93 static void mon_cputemp(void);
96 static void get_uschedcpus(void);
97 static void set_uschedcpus(void);
100 static int has_perfbias(void);
101 static void set_perfbias(int, int);
103 /* acpi(4) P-state */
104 static void acpi_getcpufreq_str(int, int *, int *);
105 static int acpi_getcpufreq_bin(int, int *, int *);
106 static void acpi_get_cpufreq(int, int *, int *);
107 static void acpi_set_cpufreq(int, int);
108 static int acpi_get_cpupwrdom(void);
110 /* mwait C-state hint */
111 static int probe_cstate(void);
112 static void set_cstate(int, int);
114 /* Performance monitoring */
115 static void init_perf(void);
116 static void mon_perf(double);
117 static void adj_perf(cpumask_t
, cpumask_t
);
118 static void adj_cpu_pwrdom(int, int);
119 static void adj_cpu_perf(int, int);
120 static void get_cputime(double);
121 static int get_nstate(struct cpu_state
*, double);
122 static void add_spare_cpus(const cpumask_t
, int);
123 static void restore_perf(void);
124 static void set_global_freq(int freq
);
126 /* Battery monitoring */
127 static int has_battery(void);
128 static int mon_battery(void);
129 static void low_battery_alert(int);
132 static void restore_backlight(void);
134 /* Runtime states for performance monitoring */
135 static int global_pcpu_limit
;
136 static struct cpu_state pcpu_state
[MAXCPU
];
137 static struct cpu_state global_cpu_state
;
138 static cpumask_t cpu_used
; /* cpus w/ high perf */
139 static cpumask_t cpu_pwrdom_used
; /* cpu power domains w/ high perf */
140 static cpumask_t usched_cpu_used
; /* cpus for usched */
143 static cpumask_t cpu_pwrdom_mask
; /* usable cpu power domains */
144 static int cpu2pwrdom
[MAXCPU
]; /* cpu to cpu power domain map */
145 static struct cpu_pwrdom
*cpu_pwrdomain
[MAXDOM
];
146 static int NCpus
; /* # of cpus */
147 static char orig_global_cx
[CST_STRLEN
];
148 static char cpu_perf_cx
[CST_STRLEN
];
149 static int cpu_perf_cxlen
;
150 static char cpu_idle_cx
[CST_STRLEN
];
151 static int cpu_idle_cxlen
;
152 static int FreqAry
[MAXFREQ
];
154 static int NFreqChanged
= NFREQ_ALL
;
155 static int SavedPXGlobal
;
158 static int TurboOpt
= 1;
160 static int Hysteresis
= 10; /* percentage */
161 static double TriggerUp
= 0.25; /* single-cpu load to force max freq */
162 static double TriggerDown
; /* load per cpu to force the min freq */
163 static int HasPerfbias
= 0;
164 static int AdjustCpuFreq
= 1;
165 static int AdjustCstate
= 0;
166 static int HighestCpuFreq
;
167 static int LowestCpuFreq
;
168 static int AdjustUsched
= 1;
170 static int AdjustCpuFreqOverride
;
172 static volatile int stopped
;
174 /* Battery life monitoring */
175 static int BatLifeMin
= 2; /* shutdown the box, if low on battery life */
176 static struct timespec BatLifePrevT
;
177 static int BatLifePollIntvl
= 5; /* unit: sec */
178 static struct timespec BatShutdownStartT
;
179 static int BatShutdownLinger
= -1;
180 static int BatShutdownLingerSet
= 60; /* unit: sec */
181 static int BatShutdownLingerCnt
;
182 static int BatShutdownAudioAlert
= 1;
183 static int MinTemp
= 75;
184 static int MaxTemp
= 85;
185 static int BackLightPct
= 100;
186 static int OldBackLightLevel
;
187 static int BackLightDown
;
189 static void sigintr(int signo
);
192 main(int ac
, char **av
)
203 srt
= 8.0; /* time for samples - 8 seconds */
204 pollrate
= 1.0; /* polling rate in seconds */
206 while ((ch
= getopt(ac
, av
, "b:cdefh:l:p:r:tu:B:H:L:P:QT:U")) != -1) {
209 BackLightPct
= strtol(optarg
, NULL
, 10);
224 HighestCpuFreq
= strtol(optarg
, NULL
, 10);
227 LowestCpuFreq
= strtol(optarg
, NULL
, 10);
230 Hysteresis
= (int)strtol(optarg
, NULL
, 10);
233 pollrate
= strtod(optarg
, NULL
);
239 TriggerUp
= (double)strtol(optarg
, NULL
, 10) / 100;
242 BatLifeMin
= strtol(optarg
, NULL
, 10);
245 MaxTemp
= strtol(optarg
, &p2
, 0);
248 MaxTemp
= strtol(p2
+ 1, NULL
, 0);
250 MinTemp
= MaxTemp
* 9 / 10;
254 BatShutdownLingerSet
= strtol(optarg
, NULL
, 10);
255 if (BatShutdownLingerSet
< 0)
256 BatShutdownLingerSet
= 0;
259 BatLifePollIntvl
= strtol(optarg
, NULL
, 10);
262 BatShutdownAudioAlert
= 0;
265 srt
= strtod(optarg
, NULL
);
280 /* Get number of cpus */
284 acpi_get_cpufreq(0, &lowest
, &highest
);
286 if (Hysteresis
< 0 || Hysteresis
> 99) {
287 fprintf(stderr
, "Invalid hysteresis value\n");
291 if (TriggerUp
< 0 || TriggerUp
> 1) {
292 fprintf(stderr
, "Invalid load limit value\n");
296 if (BackLightPct
> 100 || BackLightPct
<= 0) {
297 fprintf(stderr
, "Invalid backlight setting, ignore\n");
301 TriggerDown
= TriggerUp
- (TriggerUp
* (double) Hysteresis
/ 100);
304 * Make sure powerd is not already running.
306 PowerFd
= open("/var/run/powerd.pid", O_CREAT
|O_RDWR
, 0644);
309 "Cannot create /var/run/powerd.pid, "
310 "continuing anyway\n");
315 r
= read(PowerFd
, buf
, sizeof(buf
) - 1);
318 pid
= strtol(buf
, NULL
, 0);
320 if (flock(PowerFd
, LOCK_EX
|LOCK_NB
) < 0) {
323 flock(PowerFd
, LOCK_EX
);
324 fprintf(stderr
, "restarting powerd\n");
327 "powerd is already running, "
328 "unable to kill pid for restart\n");
332 lseek(PowerFd
, 0L, 0);
336 * Demonize and set pid
340 openlog("powerd", LOG_CONS
| LOG_PID
, LOG_DAEMON
);
344 ftruncate(PowerFd
, 0);
345 snprintf(buf
, sizeof(buf
), "%d\n", (int)getpid());
346 write(PowerFd
, buf
, strlen(buf
));
349 /* Do we need to monitor battery life? */
350 if (BatLifePollIntvl
<= 0)
353 monbat
= has_battery();
355 /* Do we have perfbias(4)? */
357 HasPerfbias
= has_perfbias();
359 /* Could we adjust C-state? */
361 AdjustCstate
= probe_cstate();
364 * Wait hw.acpi.cpu.px_dom* sysctl to be created by kernel.
366 * Since hw.acpi.cpu.px_dom* creation is queued into ACPI
367 * taskqueue and ACPI taskqueue is shared across various
368 * ACPI modules, any delay in other modules may cause
369 * hw.acpi.cpu.px_dom* to be created at quite a later time
370 * (e.g. cmbat module's task could take quite a lot of time).
373 /* Prime delta cputime calculation. */
374 get_cputime(pollrate
);
376 /* Wait for all cpus to appear */
377 if (acpi_get_cpupwrdom())
379 usleep((int)(pollrate
* 1000000.0));
383 * Catch some signals so that max performance could be restored.
385 signal(SIGINT
, sigintr
);
386 signal(SIGTERM
, sigintr
);
388 /* Initialize performance states */
391 srt
= srt
/ pollrate
; /* convert to sample count */
393 printf("samples for downgrading: %5.2f\n", srt
);
400 * Monitor performance
402 get_cputime(pollrate
);
410 monbat
= mon_battery();
412 usleep((int)(pollrate
* 1000000.0));
416 * Set to maximum performance if killed.
418 syslog(LOG_INFO
, "killed, setting max and exiting");
420 set_global_freq(SavedPXGlobal
);
428 sigintr(int signo __unused
)
434 * Figure out the cpu power domains.
437 acpi_get_cpupwrdom(void)
439 struct cpu_pwrdom
*dom
;
440 cpumask_t pwrdom_mask
;
445 int n
, i
, ncpu
= 0, dom_id
;
447 memset(cpu2pwrdom
, 0, sizeof(cpu2pwrdom
));
448 memset(cpu_pwrdomain
, 0, sizeof(cpu_pwrdomain
));
449 CPUMASK_ASSZERO(cpu_pwrdom_mask
);
451 for (i
= 0; i
< MAXDOM
; ++i
) {
452 snprintf(buf
, sizeof(buf
),
453 "hw.acpi.cpu.px_dom%d.available", i
);
454 if (sysctlbyname(buf
, NULL
, NULL
, NULL
, 0) < 0)
457 dom
= calloc(1, sizeof(*dom
));
460 if (cpu_pwrdomain
[i
] != NULL
) {
461 fprintf(stderr
, "cpu power domain %d exists\n", i
);
464 cpu_pwrdomain
[i
] = dom
;
465 CPUMASK_ORBIT(cpu_pwrdom_mask
, i
);
467 pwrdom_mask
= cpu_pwrdom_mask
;
469 while (CPUMASK_TESTNZERO(pwrdom_mask
)) {
470 dom_id
= BSFCPUMASK(pwrdom_mask
);
471 CPUMASK_NANDBIT(pwrdom_mask
, dom_id
);
472 dom
= cpu_pwrdomain
[dom_id
];
474 CPUMASK_ASSZERO(dom
->dom_cpumask
);
476 snprintf(buf
, sizeof(buf
),
477 "hw.acpi.cpu.px_dom%d.members", dom
->dom_id
);
478 msize
= sizeof(members
);
479 if (sysctlbyname(buf
, members
, &msize
, NULL
, 0) < 0) {
480 cpu_pwrdomain
[dom_id
] = NULL
;
486 for (str
= strtok(members
, " "); str
; str
= strtok(NULL
, " ")) {
488 sscanf(str
, "cpu%d", &n
);
492 CPUMASK_ORBIT(dom
->dom_cpumask
, n
);
493 cpu2pwrdom
[n
] = dom
->dom_id
;
496 if (dom
->dom_ncpus
== 0) {
497 cpu_pwrdomain
[dom_id
] = NULL
;
502 printf("dom%d cpumask: ", dom
->dom_id
);
503 for (i
= 0; i
< (int)NELEM(dom
->dom_cpumask
.ary
); ++i
) {
505 (uintmax_t)dom
->dom_cpumask
.ary
[i
]);
513 printf("Found %d cpus, expecting %d\n", ncpu
, NCpus
);
515 pwrdom_mask
= cpu_pwrdom_mask
;
516 while (CPUMASK_TESTNZERO(pwrdom_mask
)) {
517 dom_id
= BSFCPUMASK(pwrdom_mask
);
518 CPUMASK_NANDBIT(pwrdom_mask
, dom_id
);
519 dom
= cpu_pwrdomain
[dom_id
];
529 * Save per-cpu load and sum of per-cpu load.
532 get_cputime(double pollrate
)
534 static struct kinfo_cputime ocpu_time
[MAXCPU
];
535 static struct kinfo_cputime ncpu_time
[MAXCPU
];
541 bcopy(ncpu_time
, ocpu_time
, sizeof(struct kinfo_cputime
) * NCpus
);
543 slen
= sizeof(ncpu_time
);
544 if (sysctlbyname("kern.cputime", &ncpu_time
, &slen
, NULL
, 0) < 0) {
545 fprintf(stderr
, "kern.cputime sysctl not available\n");
548 ncpu
= slen
/ sizeof(ncpu_time
[0]);
551 for (cpu
= 0; cpu
< ncpu
; ++cpu
) {
554 d
= (ncpu_time
[cpu
].cp_user
+ ncpu_time
[cpu
].cp_sys
+
555 ncpu_time
[cpu
].cp_nice
+ ncpu_time
[cpu
].cp_intr
) -
556 (ocpu_time
[cpu
].cp_user
+ ocpu_time
[cpu
].cp_sys
+
557 ocpu_time
[cpu
].cp_nice
+ ocpu_time
[cpu
].cp_intr
);
558 pcpu_state
[cpu
].cpu_qavg
= (double)d
/ (pollrate
* 1000000.0);
562 global_cpu_state
.cpu_qavg
= (double)delta
/ (pollrate
* 1000000.0);
566 acpi_getcpufreq_str(int dom_id
, int *highest0
, int *lowest0
)
568 char buf
[256], sysid
[64];
571 int v
, highest
, lowest
;
575 * Retrieve availability list
577 snprintf(sysid
, sizeof(sysid
),
578 "hw.acpi.cpu.px_dom%d.available", dom_id
);
579 buflen
= sizeof(buf
) - 1;
580 if (sysctlbyname(sysid
, buf
, &buflen
, NULL
, 0) < 0)
585 * Parse out the highest and lowest cpu frequencies
588 highest
= lowest
= 0;
590 while (ptr
&& (v
= strtol(ptr
, &ptr
, 10)) > 0) {
591 if ((lowest
== 0 || lowest
> v
) &&
592 (LowestCpuFreq
<= 0 || v
>= LowestCpuFreq
))
594 if ((highest
== 0 || highest
< v
) &&
595 (HighestCpuFreq
<= 0 || v
<= HighestCpuFreq
))
600 if (!TurboOpt
&& highest
- v
== 1)
608 if (freqidx
> MAXFREQ
)
610 if (NFreq
!= freqidx
) {
612 NFreqChanged
= NFREQ_ALL
;
615 while (ptr
&& (v
= strtol(ptr
, &ptr
, 10)) > 0) {
618 if (FreqAry
[freqidx
- 1] != v
)
619 NFreqChanged
= NFREQ_ALL
;
620 FreqAry
[--freqidx
] = v
;
628 acpi_getcpufreq_bin(int dom_id
, int *highest0
, int *lowest0
)
633 int freqary
[MAXFREQ
];
636 * Retrieve availability list
638 snprintf(sysid
, sizeof(sysid
), "hw.acpi.cpu.px_dom%d.avail", dom_id
);
639 freqlen
= sizeof(FreqAry
);
640 bzero(freqary
, sizeof(freqary
));
641 if (sysctlbyname(sysid
, freqary
, &freqlen
, NULL
, 0) < 0)
644 freqcnt
= freqlen
/ sizeof(freqary
[0]);
645 if (NFreq
!= freqcnt
) {
647 NFreqChanged
= NFREQ_ALL
;
649 if (bcmp(freqary
, FreqAry
, sizeof(FreqAry
)) != 0)
650 NFreqChanged
= NFREQ_ALL
;
651 bcopy(freqary
, FreqAry
, sizeof(FreqAry
));
655 for (i
= freqcnt
- 1; i
>= 0; --i
) {
656 *lowest0
= FreqAry
[i
];
657 if (LowestCpuFreq
<= 0 || *lowest0
>= LowestCpuFreq
)
662 *highest0
= FreqAry
[0];
663 if (!TurboOpt
&& freqcnt
> 1 && FreqAry
[0] - FreqAry
[1] == 1) {
665 *highest0
= FreqAry
[1];
667 for (; i
< freqcnt
; ++i
) {
668 if (HighestCpuFreq
<= 0 || *highest0
<= HighestCpuFreq
)
670 *highest0
= FreqAry
[i
];
676 acpi_get_cpufreq(int dom_id
, int *highest
, int *lowest
)
681 if (acpi_getcpufreq_bin(dom_id
, highest
, lowest
))
683 acpi_getcpufreq_str(dom_id
, highest
, lowest
);
690 fprintf(stderr
, "usage: powerd [-cdeftQU] [-p hysteresis] "
691 "[-h highest_freq] [-l lowest_freq] "
692 "[-r poll_interval] [-u trigger_up] "
693 "[-B min_battery_life] [-L low_battery_linger] "
694 "[-P battery_poll_interval] [-T sample_interval] "
700 #define timespecsub(vvp, uvp) \
702 (vvp)->tv_sec -= (uvp)->tv_sec; \
703 (vvp)->tv_nsec -= (uvp)->tv_nsec; \
704 if ((vvp)->tv_nsec < 0) { \
706 (vvp)->tv_nsec += 1000000000; \
711 #define BAT_SYSCTL_TIME_MAX 50000000 /* unit: nanosecond */
716 struct timespec s
, e
;
720 clock_gettime(CLOCK_MONOTONIC_FAST
, &s
);
724 if (sysctlbyname("hw.acpi.acline", &val
, &len
, NULL
, 0) < 0) {
725 /* No AC line information */
728 clock_gettime(CLOCK_MONOTONIC_FAST
, &e
);
731 if (e
.tv_sec
> 0 || e
.tv_nsec
> BAT_SYSCTL_TIME_MAX
) {
732 /* hw.acpi.acline takes to long to be useful */
733 syslog(LOG_NOTICE
, "hw.acpi.acline takes too long");
737 clock_gettime(CLOCK_MONOTONIC_FAST
, &s
);
739 if (sysctlbyname("hw.acpi.battery.life", &val
, &len
, NULL
, 0) < 0) {
740 /* No battery life */
743 clock_gettime(CLOCK_MONOTONIC_FAST
, &e
);
746 if (e
.tv_sec
> 0 || e
.tv_nsec
> BAT_SYSCTL_TIME_MAX
) {
747 /* hw.acpi.battery.life takes to long to be useful */
748 syslog(LOG_NOTICE
, "hw.acpi.battery.life takes too long");
755 low_battery_alert(int life
)
757 int fmt
, stereo
, freq
;
760 syslog(LOG_ALERT
, "low battery life %d%%, please plugin AC line, #%d",
761 life
, BatShutdownLingerCnt
);
762 ++BatShutdownLingerCnt
;
764 if (!BatShutdownAudioAlert
)
767 fd
= open("/dev/dsp", O_WRONLY
);
772 if (ioctl(fd
, SNDCTL_DSP_SETFMT
, &fmt
, sizeof(fmt
)) < 0)
776 if (ioctl(fd
, SNDCTL_DSP_STEREO
, &stereo
, sizeof(stereo
)) < 0)
780 if (ioctl(fd
, SNDCTL_DSP_SPEED
, &freq
, sizeof(freq
)) < 0)
783 write(fd
, alert1
, sizeof(alert1
));
784 write(fd
, alert1
, sizeof(alert1
));
793 struct timespec cur
, ts
;
797 clock_gettime(CLOCK_MONOTONIC_FAST
, &cur
);
799 timespecsub(&ts
, &BatLifePrevT
);
800 if (ts
.tv_sec
< BatLifePollIntvl
)
804 len
= sizeof(acline
);
805 if (sysctlbyname("hw.acpi.acline", &acline
, &len
, NULL
, 0) < 0)
808 BatShutdownLinger
= -1;
809 BatShutdownLingerCnt
= 0;
814 if (!BackLightDown
&& BackLightPct
!= 100) {
815 int backlight_max
, backlight
;
817 len
= sizeof(backlight_max
);
818 if (sysctlbyname("hw.backlight_max", &backlight_max
, &len
,
820 /* No more backlight adjustment */
822 goto after_backlight
;
825 len
= sizeof(OldBackLightLevel
);
826 if (sysctlbyname("hw.backlight_level", &OldBackLightLevel
, &len
,
828 /* No more backlight adjustment */
830 goto after_backlight
;
833 backlight
= (backlight_max
* BackLightPct
) / 100;
834 if (backlight
>= OldBackLightLevel
) {
835 /* No more backlight adjustment */
837 goto after_backlight
;
840 if (sysctlbyname("hw.backlight_level", NULL
, NULL
,
841 &backlight
, sizeof(backlight
)) < 0) {
842 /* No more backlight adjustment */
844 goto after_backlight
;
851 if (sysctlbyname("hw.acpi.battery.life", &life
, &len
, NULL
, 0) < 0)
854 if (BatShutdownLinger
> 0) {
856 timespecsub(&ts
, &BatShutdownStartT
);
857 if (ts
.tv_sec
> BatShutdownLinger
)
858 BatShutdownLinger
= 0;
861 if (life
<= BatLifeMin
) {
862 if (BatShutdownLinger
== 0 || BatShutdownLingerSet
== 0) {
863 syslog(LOG_ALERT
, "low battery life %d%%, "
864 "shutting down", life
);
866 execlp("poweroff", "poweroff", NULL
);
868 } else if (BatShutdownLinger
< 0) {
869 BatShutdownLinger
= BatShutdownLingerSet
;
870 BatShutdownStartT
= cur
;
872 low_battery_alert(life
);
882 slen
= sizeof(NCpus
);
883 if (sysctlbyname("hw.ncpu", &NCpus
, &slen
, NULL
, 0) < 0)
884 err(1, "sysctlbyname hw.ncpu failed");
886 printf("hw.ncpu %d\n", NCpus
);
894 slen
= sizeof(usched_cpu_used
);
895 if (sysctlbyname("kern.usched_global_cpumask", &usched_cpu_used
, &slen
,
897 err(1, "sysctlbyname kern.usched_global_cpumask failed");
901 printf("usched cpumask was: ");
902 for (i
= 0; i
< (int)NELEM(usched_cpu_used
.ary
); ++i
)
903 printf("%jx ", (uintmax_t)usched_cpu_used
.ary
[i
]);
914 printf("usched cpumask: ");
915 for (i
= 0; i
< (int)NELEM(usched_cpu_used
.ary
); ++i
) {
917 (uintmax_t)usched_cpu_used
.ary
[i
]);
921 sysctlbyname("kern.usched_global_cpumask", NULL
, 0,
922 &usched_cpu_used
, sizeof(usched_cpu_used
));
932 if (sysctlbyname("machdep.perfbias0.hint", &hint
, &len
, NULL
, 0) < 0)
938 set_perfbias(int cpu
, int inc
)
940 int hint
= inc
? 0 : 15;
944 printf("cpu%d set perfbias hint %d\n", cpu
, hint
);
945 snprintf(sysid
, sizeof(sysid
), "machdep.perfbias%d.hint", cpu
);
946 sysctlbyname(sysid
, NULL
, NULL
, &hint
, sizeof(hint
));
952 struct cpu_state
*state
;
955 /* Get usched cpumask */
959 * Assume everything are used and are maxed out, before we
962 CPUMASK_ASSBMASK(cpu_used
, NCpus
);
963 cpu_pwrdom_used
= cpu_pwrdom_mask
;
964 global_pcpu_limit
= NCpus
;
966 for (cpu
= 0; cpu
< NCpus
; ++cpu
) {
967 state
= &pcpu_state
[cpu
];
969 state
->cpu_uavg
= 0.0;
970 state
->cpu_davg
= 0.0;
971 state
->cpu_limit
= 1;
972 state
->cpu_count
= 1;
973 snprintf(state
->cpu_name
, sizeof(state
->cpu_name
), "cpu%d",
977 state
= &global_cpu_state
;
978 state
->cpu_uavg
= 0.0;
979 state
->cpu_davg
= 0.0;
980 state
->cpu_limit
= NCpus
;
981 state
->cpu_count
= NCpus
;
982 strlcpy(state
->cpu_name
, "global", sizeof(state
->cpu_name
));
986 get_nstate(struct cpu_state
*state
, double srt
)
988 int ustate
, dstate
, nstate
;
991 state
->cpu_uavg
= (state
->cpu_uavg
* 2.0 + state
->cpu_qavg
) / 3.0;
993 state
->cpu_davg
= (state
->cpu_davg
* srt
+ state
->cpu_qavg
) / (srt
+ 1);
994 if (state
->cpu_davg
< state
->cpu_uavg
)
995 state
->cpu_davg
= state
->cpu_uavg
;
997 ustate
= state
->cpu_uavg
/ TriggerUp
;
998 if (ustate
< state
->cpu_limit
)
999 ustate
= state
->cpu_uavg
/ TriggerDown
;
1000 dstate
= state
->cpu_davg
/ TriggerUp
;
1001 if (dstate
< state
->cpu_limit
)
1002 dstate
= state
->cpu_davg
/ TriggerDown
;
1004 nstate
= (ustate
> dstate
) ? ustate
: dstate
;
1005 if (nstate
> state
->cpu_count
)
1006 nstate
= state
->cpu_count
;
1009 printf("%s qavg=%5.2f uavg=%5.2f davg=%5.2f "
1010 "%2d ncpus=%d\n", state
->cpu_name
,
1011 state
->cpu_qavg
, state
->cpu_uavg
, state
->cpu_davg
,
1012 state
->cpu_limit
, nstate
);
1018 mon_perf(double srt
)
1020 cpumask_t ocpu_used
, ocpu_pwrdom_used
;
1021 int pnstate
= 0, nstate
;
1025 * Find cpus requiring performance and their cooresponding power
1026 * domains. Save the number of cpus requiring performance in
1029 ocpu_used
= cpu_used
;
1030 ocpu_pwrdom_used
= cpu_pwrdom_used
;
1032 CPUMASK_ASSZERO(cpu_used
);
1033 CPUMASK_ASSZERO(cpu_pwrdom_used
);
1035 for (cpu
= 0; cpu
< NCpus
; ++cpu
) {
1036 struct cpu_state
*state
= &pcpu_state
[cpu
];
1039 s
= get_nstate(state
, srt
);
1041 CPUMASK_ORBIT(cpu_used
, cpu
);
1042 CPUMASK_ORBIT(cpu_pwrdom_used
, cpu2pwrdom
[cpu
]);
1046 state
->cpu_limit
= s
;
1050 * Calculate nstate, the number of cpus we wish to run at max
1053 nstate
= get_nstate(&global_cpu_state
, srt
);
1055 if (nstate
== global_cpu_state
.cpu_limit
&&
1056 (NFreqChanged
& NFREQ_MONPERF
) == 0 &&
1057 (pnstate
== global_pcpu_limit
|| nstate
> pnstate
)) {
1058 /* Nothing changed; keep the sets */
1059 cpu_used
= ocpu_used
;
1060 cpu_pwrdom_used
= ocpu_pwrdom_used
;
1062 global_pcpu_limit
= pnstate
;
1065 NFreqChanged
&= ~NFREQ_MONPERF
;
1066 global_pcpu_limit
= pnstate
;
1068 if (nstate
> pnstate
) {
1070 * Add spare cpus to meet global performance requirement.
1072 add_spare_cpus(ocpu_used
, nstate
- pnstate
);
1075 global_cpu_state
.cpu_limit
= nstate
;
1078 * Adjust cpu and cpu power domain performance
1080 adj_perf(ocpu_used
, ocpu_pwrdom_used
);
1084 add_spare_cpus(const cpumask_t ocpu_used
, int ncpu
)
1086 cpumask_t saved_pwrdom
, xcpu_used
;
1090 * Find more cpus in the previous cpu set.
1092 xcpu_used
= cpu_used
;
1093 CPUMASK_XORMASK(xcpu_used
, ocpu_used
);
1094 while (CPUMASK_TESTNZERO(xcpu_used
)) {
1095 cpu
= BSFCPUMASK(xcpu_used
);
1096 CPUMASK_NANDBIT(xcpu_used
, cpu
);
1098 if (CPUMASK_TESTBIT(ocpu_used
, cpu
)) {
1099 CPUMASK_ORBIT(cpu_pwrdom_used
, cpu2pwrdom
[cpu
]);
1100 CPUMASK_ORBIT(cpu_used
, cpu
);
1108 * Find more cpus in the used cpu power domains.
1110 saved_pwrdom
= cpu_pwrdom_used
;
1112 while (CPUMASK_TESTNZERO(saved_pwrdom
)) {
1113 cpumask_t unused_cpumask
;
1116 dom
= BSFCPUMASK(saved_pwrdom
);
1117 CPUMASK_NANDBIT(saved_pwrdom
, dom
);
1119 unused_cpumask
= cpu_pwrdomain
[dom
]->dom_cpumask
;
1120 CPUMASK_NANDMASK(unused_cpumask
, cpu_used
);
1122 while (CPUMASK_TESTNZERO(unused_cpumask
)) {
1123 cpu
= BSFCPUMASK(unused_cpumask
);
1124 CPUMASK_NANDBIT(unused_cpumask
, cpu
);
1126 CPUMASK_ORBIT(cpu_pwrdom_used
, dom
);
1127 CPUMASK_ORBIT(cpu_used
, cpu
);
1136 * Find more cpus in unused cpu power domains
1138 saved_pwrdom
= cpu_pwrdom_mask
;
1139 CPUMASK_NANDMASK(saved_pwrdom
, cpu_pwrdom_used
);
1143 printf("%d cpus not found\n", ncpu
);
1147 acpi_set_cpufreq(int dom
, int inc
)
1149 int lowest
, highest
, desired
;
1152 acpi_get_cpufreq(dom
, &highest
, &lowest
);
1153 if (highest
== 0 || lowest
== 0)
1155 desired
= inc
? highest
: lowest
;
1158 printf("dom%d set frequency %d\n", dom
, desired
);
1159 snprintf(sysid
, sizeof(sysid
), "hw.acpi.cpu.px_dom%d.select", dom
);
1160 sysctlbyname(sysid
, NULL
, NULL
, &desired
, sizeof(desired
));
1164 adj_cpu_pwrdom(int dom
, int inc
)
1166 if (AdjustCpuFreq
&& (inc
== 0 || AdjustCpuFreqOverride
== 0))
1167 acpi_set_cpufreq(dom
, inc
);
1171 adj_cpu_perf(int cpu
, int inc
)
1175 printf("cpu%d increase perf\n", cpu
);
1177 printf("cpu%d decrease perf\n", cpu
);
1181 set_perfbias(cpu
, inc
);
1183 set_cstate(cpu
, inc
);
1187 adj_perf(cpumask_t xcpu_used
, cpumask_t xcpu_pwrdom_used
)
1192 cpumask_t old_usched_used
;
1195 * Set cpus requiring performance to the userland process
1196 * scheduler. Leave the rest of cpus unmapped.
1198 old_usched_used
= usched_cpu_used
;
1199 usched_cpu_used
= cpu_used
;
1200 if (CPUMASK_TESTZERO(usched_cpu_used
))
1201 CPUMASK_ORBIT(usched_cpu_used
, 0);
1202 if (CPUMASK_CMPMASKNEQ(usched_cpu_used
, old_usched_used
))
1207 * Adjust per-cpu performance for any cpus which changed.
1209 CPUMASK_XORMASK(xcpu_used
, cpu_used
);
1210 if (NFreqChanged
& NFREQ_ADJPERF
)
1211 CPUMASK_ASSBMASK(xcpu_used
, NCpus
);
1212 while (CPUMASK_TESTNZERO(xcpu_used
)) {
1213 cpu
= BSFCPUMASK(xcpu_used
);
1214 CPUMASK_NANDBIT(xcpu_used
, cpu
);
1216 if (CPUMASK_TESTBIT(cpu_used
, cpu
)) {
1217 /* Increase cpu performance */
1220 /* Decrease cpu performance */
1223 adj_cpu_perf(cpu
, inc
);
1227 * Adjust cpu power domain performance. This could affect
1230 CPUMASK_XORMASK(xcpu_pwrdom_used
, cpu_pwrdom_used
);
1231 if (NFreqChanged
& NFREQ_ADJPERF
)
1232 CPUMASK_ASSBMASK(xcpu_pwrdom_used
, NCpus
);
1233 while (CPUMASK_TESTNZERO(xcpu_pwrdom_used
)) {
1236 dom
= BSFCPUMASK(xcpu_pwrdom_used
);
1237 CPUMASK_NANDBIT(xcpu_pwrdom_used
, dom
);
1239 if (CPUMASK_TESTBIT(cpu_pwrdom_used
, dom
)) {
1240 /* Increase cpu power domain performance */
1243 /* Decrease cpu power domain performance */
1246 adj_cpu_pwrdom(dom
, inc
);
1248 NFreqChanged
&= ~NFREQ_ADJPERF
;
1254 cpumask_t ocpu_used
, ocpu_pwrdom_used
;
1256 /* Remove highest cpu frequency limitation */
1259 ocpu_used
= cpu_used
;
1260 ocpu_pwrdom_used
= cpu_pwrdom_used
;
1262 /* Max out all cpus and cpu power domains performance */
1263 CPUMASK_ASSBMASK(cpu_used
, NCpus
);
1264 cpu_pwrdom_used
= cpu_pwrdom_mask
;
1266 adj_perf(ocpu_used
, ocpu_pwrdom_used
);
1270 * Restore the original mwait C-state
1273 printf("global set cstate %s\n", orig_global_cx
);
1274 sysctlbyname("machdep.mwait.CX.idle", NULL
, NULL
,
1275 orig_global_cx
, strlen(orig_global_cx
) + 1);
1282 char cx_supported
[1024];
1285 int idle_hlt
, deep
= 1;
1288 len
= sizeof(idle_hlt
);
1289 if (sysctlbyname("machdep.cpu_idle_hlt", &idle_hlt
, &len
, NULL
, 0) < 0)
1294 len
= sizeof(cx_supported
);
1295 if (sysctlbyname("machdep.mwait.CX.supported", cx_supported
, &len
,
1299 len
= sizeof(orig_global_cx
);
1300 if (sysctlbyname("machdep.mwait.CX.idle", orig_global_cx
, &len
,
1304 strlcpy(cpu_perf_cx
, "AUTODEEP", sizeof(cpu_perf_cx
));
1305 cpu_perf_cxlen
= strlen(cpu_perf_cx
) + 1;
1306 if (sysctlbyname("machdep.mwait.CX.idle", NULL
, NULL
,
1307 cpu_perf_cx
, cpu_perf_cxlen
) < 0) {
1308 /* AUTODEEP is not supported; try AUTO */
1310 strlcpy(cpu_perf_cx
, "AUTO", sizeof(cpu_perf_cx
));
1311 cpu_perf_cxlen
= strlen(cpu_perf_cx
) + 1;
1312 if (sysctlbyname("machdep.mwait.CX.idle", NULL
, NULL
,
1313 cpu_perf_cx
, cpu_perf_cxlen
) < 0)
1321 for (ptr
= strtok(cx_supported
, " "); ptr
!= NULL
;
1322 ptr
= strtok(NULL
, " ")) {
1323 if (target
== NULL
||
1324 (target
!= NULL
&& strcmp(ptr
, target
) == 0)) {
1325 strlcpy(cpu_idle_cx
, ptr
, sizeof(cpu_idle_cx
));
1326 cpu_idle_cxlen
= strlen(cpu_idle_cx
) + 1;
1331 if (cpu_idle_cxlen
== 0)
1335 printf("cstate orig %s, perf %s, idle %s\n",
1336 orig_global_cx
, cpu_perf_cx
, cpu_idle_cx
);
1342 set_cstate(int cpu
, int inc
)
1350 len
= cpu_perf_cxlen
;
1353 len
= cpu_idle_cxlen
;
1357 printf("cpu%d set cstate %s\n", cpu
, cst
);
1358 snprintf(sysid
, sizeof(sysid
), "machdep.mwait.CX.idle%d", cpu
);
1359 sysctlbyname(sysid
, NULL
, NULL
, cst
, len
);
1363 restore_backlight(void)
1365 if (BackLightDown
) {
1367 sysctlbyname("hw.backlight_level", NULL
, NULL
,
1368 &OldBackLightLevel
, sizeof(OldBackLightLevel
));
1373 * get_cputemp() / mon_cputemp()
1375 * This enforces the maximum cpu frequency based on temperature
1376 * verses MinTemp and MaxTemp.
1382 struct sensor sensor
;
1388 for (n
= 0; ; ++n
) {
1390 snprintf(sysid
, sizeof(sysid
),
1391 "hw.sensors.cpu_node%d.temp0", n
);
1392 sensor_size
= sizeof(sensor
);
1393 if (sysctlbyname(sysid
, &sensor
, &sensor_size
, NULL
, 0) < 0)
1396 if ((sensor
.flags
& (SENSOR_FINVALID
| SENSOR_FUNKNOWN
)) == 0) {
1397 t
= (int)((sensor
.value
- 273150000) / 1000000);
1406 * Missing nodeN for some reason, try cpuN.
1408 for (n
= 0; ; ++n
) {
1410 snprintf(sysid
, sizeof(sysid
),
1411 "hw.sensors.cpu%d.temp0", n
);
1412 sensor_size
= sizeof(sensor
);
1413 if (sysctlbyname(sysid
, &sensor
, &sensor_size
, NULL
, 0) < 0)
1416 if ((sensor
.flags
& (SENSOR_FINVALID
| SENSOR_FUNKNOWN
)) == 0) {
1417 t
= (int)((sensor
.value
- 273150000) / 1000000);
1426 set_global_freq(int freq
)
1429 sysctlbyname("hw.acpi.cpu.px_global",
1430 NULL
, NULL
, &freq
, sizeof(freq
));
1434 get_global_freq(void)
1440 freq_size
= sizeof(freq
);
1441 sysctlbyname("hw.acpi.cpu.px_global", &freq
, &freq_size
, NULL
, 0);
1449 static int last_temp
= -1;
1450 static int last_idx
= -1;
1451 int temp
= get_cputemp();
1455 static int CurPXGlobal __unused
;
1458 * Reseed FreqAry, it can change w/AC power state
1460 acpi_get_cpufreq(0, &lowest
, &highest
);
1463 * Some cpu frequency steps can cause large shifts in cpu temperature,
1464 * creating an oscillation that min-maxes the temperature in a way
1465 * that is not desireable. To deal with this, we impose an exponential
1466 * average for any temperature change.
1468 * We have to do this in both directions, otherwise (in particular)
1469 * laptop fan responsiveness and temperature sensor response times
1470 * can create major frequency oscillations.
1472 if (last_temp
< 0 || (NFreqChanged
& NFREQ_CPUTEMP
)) {
1473 NFreqChanged
&= ~NFREQ_CPUTEMP
;
1474 last_temp
= temp
<< 8;
1475 } else if (temp
< last_temp
) {
1476 last_temp
= (last_temp
* 15 + (temp
<< 8)) / 16;
1478 printf("Falling temp %d (use %d)\n",
1479 temp
, (last_temp
>> 8));
1482 last_temp
= (last_temp
* 15 + (temp
<< 8)) / 16;
1484 printf("Rising temp %d (use %d)\n",
1485 temp
, (last_temp
>> 8));
1488 temp
= last_temp
>> 8;
1491 * CPU Temp not available or available frequencies not yet
1495 printf("Temp %d {%d-%d} NFreq=%d)\n",
1496 temp
, MinTemp
, MaxTemp
, NFreq
);
1503 * Return to normal operation if under the minimum
1505 if (temp
<= MinTemp
) {
1506 if (AdjustCpuFreqOverride
) {
1507 AdjustCpuFreqOverride
= 0;
1509 NFreqChanged
= NFREQ_ALL
;
1512 "Temp below %d, returning to normal operation",
1515 set_global_freq(SavedPXGlobal
);
1521 * Hysteresis before entering temperature control mode
1523 if (AdjustCpuFreqOverride
== 0 &&
1524 temp
<= MinTemp
+ (MaxTemp
- MinTemp
) / 10 + 1) {
1529 * Override frequency controls (except for idle -> lowest)
1531 if (AdjustCpuFreqOverride
== 0) {
1532 AdjustCpuFreqOverride
= 1;
1533 SavedPXGlobal
= get_global_freq();
1535 NFreqChanged
= NFREQ_ALL
;
1538 "Temp %d {%d-%d}, entering temperature control mode",
1539 temp
, MinTemp
, MaxTemp
);
1541 if (temp
> MaxTemp
+ (MaxTemp
- MinTemp
) / 10 + 1) {
1543 "Temp %d {%d-%d}, TOO HOT!!!",
1544 temp
, MinTemp
, MaxTemp
);
1546 idx
= (temp
- MinTemp
) * NFreq
/ (MaxTemp
- MinTemp
);
1547 if (idx
< 0 || idx
>= NFreq
) /* overtemp */
1551 * Limit frequency shifts to single steps in both directions.
1552 * Some fans react very quickly, this will reduce oscillations.
1555 printf("Temp index %d (use %d)\n", idx
, last_idx
);
1556 if (last_idx
>= 0 && idx
< last_idx
)
1558 else if (last_idx
>= 0 && idx
> last_idx
)
1563 * One last thing, make sure our frequency adheres to
1564 * HighestCpuFreq. However, override LowestCpuFreq for
1565 * temperature control purposes.
1567 while (HighestCpuFreq
> 0 && idx
< NFreq
&&
1568 FreqAry
[idx
] > HighestCpuFreq
) {
1573 * Currently ignore LowestCpuFreq if temp control thinks it
1576 while (LowestCpuFreq
> 0 && idx
> 0 &&
1577 FreqAry
[idx
] < LowestCpuFreq
) {
1582 if (FreqAry
[idx
] != CurPXGlobal
) {
1583 CurPXGlobal
= FreqAry
[idx
];
1586 /* this can get noisy so don't log for now */
1588 "Temp %d {%d-%d}, set frequency %d",
1589 temp
, MinTemp
, MaxTemp
, CurPXGlobal
);
1592 set_global_freq(CurPXGlobal
);