2 * Copyright (c) 2015 Imre Vadász <imre@vdsz.com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
18 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
23 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 * POSSIBILITY OF SUCH DAMAGE.
28 * Device driver for Intel's On Die power usage estimation via MSR.
29 * Supported by Sandy Bridge and later CPUs, and also by Atom CPUs
30 * of the Silvermont and later architectures.
33 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/module.h>
38 #include <sys/cpu_topology.h>
39 #include <sys/kernel.h>
40 #include <sys/sensors.h>
41 #include <sys/bitops.h>
43 #include <machine/specialreg.h>
44 #include <machine/cpufunc.h>
45 #include <machine/cputypes.h>
46 #include <machine/md_var.h>
50 #define MSR_RAPL_POWER_UNIT_POWER __BITS64(0, 3)
51 #define MSR_RAPL_POWER_UNIT_ENERGY __BITS64(8, 12)
52 #define MSR_RAPL_POWER_UNIT_TIME __BITS64(16, 19)
54 struct corepower_sensor
{
57 struct ksensor sensor
;
60 struct corepower_softc
{
63 uint32_t sc_watt_unit
;
64 uint32_t sc_joule_unit
;
65 uint32_t sc_second_unit
;
70 struct corepower_sensor sc_pkg_sens
;
71 struct corepower_sensor sc_dram_sens
;
72 struct corepower_sensor sc_pp0_sens
;
73 struct corepower_sensor sc_pp1_sens
;
75 struct ksensordev sc_sensordev
;
76 struct sensor_task
*sc_senstask
;
82 static void corepower_identify(driver_t
*driver
, device_t parent
);
83 static int corepower_probe(device_t dev
);
84 static int corepower_attach(device_t dev
);
85 static int corepower_detach(device_t dev
);
86 static uint32_t corepower_energy_to_uwatts(struct corepower_softc
*sc
,
87 uint32_t units
, uint32_t secs
);
88 static void corepower_refresh(void *arg
);
89 static void corepower_sens_init(struct corepower_sensor
*sens
,
90 char *desc
, u_int msr
, int cpu
);
91 static void corepower_sens_update(struct corepower_softc
*sc
,
92 struct corepower_sensor
*sens
);
93 static int corepower_try(u_int msr
, char *name
);
95 static device_method_t corepower_methods
[] = {
96 /* Device interface */
97 DEVMETHOD(device_identify
, corepower_identify
),
98 DEVMETHOD(device_probe
, corepower_probe
),
99 DEVMETHOD(device_attach
, corepower_attach
),
100 DEVMETHOD(device_detach
, corepower_detach
),
105 static driver_t corepower_driver
= {
108 sizeof(struct corepower_softc
),
111 static devclass_t corepower_devclass
;
112 DRIVER_MODULE(corepower
, cpu
, corepower_driver
, corepower_devclass
, NULL
, NULL
);
113 MODULE_VERSION(corepower
, 1);
116 corepower_identify(driver_t
*driver
, device_t parent
)
119 const struct cpu_node
*node
;
122 /* Make sure we're not being doubly invoked. */
123 if (device_find_child(parent
, "corepower", -1) != NULL
)
126 /* Check that the vendor is Intel. */
127 if (cpu_vendor_id
!= CPU_VENDOR_INTEL
)
130 /* We only want one child per CPU package */
131 cpu
= device_get_unit(parent
);
132 node
= get_cpu_node_by_cpuid(cpu
);
133 while (node
!= NULL
) {
134 if (node
->type
== CHIP_LEVEL
) {
135 if (node
->child_no
== 0)
139 node
= node
->parent_node
;
144 master_cpu
= BSRCPUMASK(node
->members
);
145 if (cpu
!= master_cpu
)
148 child
= device_add_child(parent
, "corepower", -1);
150 device_printf(parent
, "add corepower child failed\n");
154 corepower_probe(device_t dev
)
156 int cpu_family
, cpu_model
;
158 if (resource_disabled("corepower", 0))
161 cpu_model
= CPUID_TO_MODEL(cpu_id
);
162 cpu_family
= CPUID_TO_FAMILY(cpu_id
);
164 if (cpu_family
== 0x06) {
175 /* Haswell, Broadwell, Skylake */
196 if (corepower_try(MSR_RAPL_POWER_UNIT
, "MSR_RAPL_POWER_UNIT") == 0)
199 device_set_desc(dev
, "CPU On-Die Power Usage Estimation");
201 return (BUS_PROBE_GENERIC
);
205 corepower_attach(device_t dev
)
207 struct corepower_softc
*sc
= device_get_softc(dev
);
209 uint32_t power_units
;
210 uint32_t energy_units
;
212 int cpu_family
, cpu_model
;
216 sc
->sc_have_sens
= 0;
219 cpu_family
= CPUID_TO_FAMILY(cpu_id
);
220 cpu_model
= CPUID_TO_MODEL(cpu_id
);
222 /* Check CPU model */
223 if (cpu_family
== 0x06) {
228 sc
->sc_have_sens
= 0xd;
231 case 0x2d: /* Only Xeon branded, Core i version should probably be 0x5 */
236 sc
->sc_have_sens
= 0x7;
238 /* Haswell, Broadwell, Skylake */
246 /* Check if Core or Xeon (Xeon CPUs might be 0x7) */
247 sc
->sc_have_sens
= 0xf;
256 sc
->sc_have_sens
= 0x5;
257 /* use quirk for Valleyview Atom CPUs */
265 val
= rdmsr(MSR_RAPL_POWER_UNIT
);
267 power_units
= __SHIFTOUT(val
, MSR_RAPL_POWER_UNIT_POWER
);
268 energy_units
= __SHIFTOUT(val
, MSR_RAPL_POWER_UNIT_ENERGY
);
269 time_units
= __SHIFTOUT(val
, MSR_RAPL_POWER_UNIT_TIME
);
271 sc
->sc_watt_unit
= (1 << power_units
);
272 sc
->sc_joule_unit
= (1 << energy_units
);
273 sc
->sc_second_unit
= (1 << time_units
);
276 * Add hw.sensors.cpu_nodeN MIB.
278 cpu
= device_get_unit(device_get_parent(dev
));
279 ksnprintf(sc
->sc_sensordev
.xname
, sizeof(sc
->sc_sensordev
.xname
),
280 "cpu_node%d", get_chip_ID(cpu
));
281 if ((sc
->sc_have_sens
& 1) &&
282 corepower_try(MSR_PKG_ENERGY_STATUS
, "MSR_PKG_ENERGY_STATUS")) {
283 corepower_sens_init(&sc
->sc_pkg_sens
, "Package Power",
284 MSR_PKG_ENERGY_STATUS
, cpu
);
285 sensor_attach(&sc
->sc_sensordev
, &sc
->sc_pkg_sens
.sensor
);
287 sc
->sc_have_sens
&= ~1;
289 if ((sc
->sc_have_sens
& 2) &&
290 corepower_try(MSR_DRAM_ENERGY_STATUS
, "MSR_DRAM_ENERGY_STATUS")) {
291 corepower_sens_init(&sc
->sc_dram_sens
, "DRAM Power",
292 MSR_DRAM_ENERGY_STATUS
, cpu
);
293 sensor_attach(&sc
->sc_sensordev
, &sc
->sc_dram_sens
.sensor
);
295 sc
->sc_have_sens
&= ~2;
297 if ((sc
->sc_have_sens
& 4) &&
298 corepower_try(MSR_PP0_ENERGY_STATUS
, "MSR_PP0_ENERGY_STATUS")) {
299 corepower_sens_init(&sc
->sc_pp0_sens
, "Cores Power",
300 MSR_PP0_ENERGY_STATUS
, cpu
);
301 sensor_attach(&sc
->sc_sensordev
, &sc
->sc_pp0_sens
.sensor
);
303 sc
->sc_have_sens
&= ~4;
305 if ((sc
->sc_have_sens
& 8) &&
306 corepower_try(MSR_PP1_ENERGY_STATUS
, "MSR_PP1_ENERGY_STATUS")) {
307 corepower_sens_init(&sc
->sc_pp1_sens
, "Graphics Power",
308 MSR_PP1_ENERGY_STATUS
, cpu
);
309 sensor_attach(&sc
->sc_sensordev
, &sc
->sc_pp1_sens
.sensor
);
311 sc
->sc_have_sens
&= ~8;
314 if (sc
->sc_have_sens
== 0)
317 sc
->sc_senstask
= sensor_task_register2(sc
, corepower_refresh
, 1, cpu
);
319 sensordev_install(&sc
->sc_sensordev
);
325 corepower_detach(device_t dev
)
327 struct corepower_softc
*sc
= device_get_softc(dev
);
329 sensordev_deinstall(&sc
->sc_sensordev
);
330 sensor_task_unregister2(sc
->sc_senstask
);
336 corepower_energy_to_uwatts(struct corepower_softc
*sc
, uint32_t units
,
341 if (sc
->sc_is_atom
) {
342 val
= ((uint64_t)units
) * sc
->sc_joule_unit
;
344 val
= ((uint64_t)units
) * 1000ULL * 1000ULL;
345 val
/= sc
->sc_joule_unit
;
352 corepower_refresh(void *arg
)
354 struct corepower_softc
*sc
= (struct corepower_softc
*)arg
;
356 if (sc
->sc_have_sens
& 1)
357 corepower_sens_update(sc
, &sc
->sc_pkg_sens
);
358 if (sc
->sc_have_sens
& 2)
359 corepower_sens_update(sc
, &sc
->sc_dram_sens
);
360 if (sc
->sc_have_sens
& 4)
361 corepower_sens_update(sc
, &sc
->sc_pp0_sens
);
362 if (sc
->sc_have_sens
& 8)
363 corepower_sens_update(sc
, &sc
->sc_pp1_sens
);
367 corepower_sens_init(struct corepower_sensor
*sens
, char *desc
, u_int msr
,
370 ksnprintf(sens
->sensor
.desc
, sizeof(sens
->sensor
.desc
), "node%d %s",
371 get_chip_ID(cpu
), desc
);
372 sens
->sensor
.type
= SENSOR_WATTS
;
374 sens
->energy
= rdmsr(sens
->msr
) & 0xffffffffU
;
378 corepower_sens_update(struct corepower_softc
*sc
,
379 struct corepower_sensor
*sens
)
383 a
= rdmsr(sens
->msr
) & 0xffffffffU
;
384 if (sens
->energy
> a
) {
385 res
= (0x100000000ULL
- sens
->energy
) + a
;
387 res
= a
- sens
->energy
;
390 sens
->sensor
.value
= corepower_energy_to_uwatts(sc
, res
, 1);
394 corepower_try(u_int msr
, char *name
)
398 if (rdmsr_safe(msr
, &val
) != 0) {
399 kprintf("msr %s (0x%08x) not available\n", name
, msr
);