Merge mozilla-central to autoland on a CLOSED TREE
[gecko.git] / tools / power / rapl.cpp
blob1aa5fcf6eedd00b20f5fbd9434a00b0f5bdb8ca5
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 // This program provides processor power estimates. It does this by reading
8 // model-specific registers (MSRs) that are part Intel's Running Average Power
9 // Limit (RAPL) interface. These MSRs provide good quality estimates of the
10 // energy consumption of up to four system components:
11 // - PKG: the entire processor package;
12 // - PP0: the cores (a subset of the package);
13 // - PP1: the GPU (a subset of the package);
14 // - DRAM: main memory.
16 // For more details about RAPL, see section 14.9 of Volume 3 of the "Intel 64
17 // and IA-32 Architecture's Software Developer's Manual", Order Number 325384.
19 // This program exists because there are no existing tools on Mac that can
20 // obtain all four RAPL estimates. (|powermetrics| can obtain the package
21 // estimate, but not the others. Intel Power Gadget can obtain the package and
22 // cores estimates.)
24 // On Linux |perf| can obtain all four estimates (as Joules, which are easily
25 // converted to Watts), but this program is implemented for Linux because it's
26 // not too hard to do, and that gives us multi-platform consistency.
28 // This program does not support Windows, unfortunately. It's not obvious how
29 // to access the RAPL MSRs on Windows.
31 // This program deliberately uses only standard libraries and avoids
32 // Mozilla-specific code, to make it easy to compile and test on different
33 // machines.
35 #include <assert.h>
36 #include <getopt.h>
37 #include <math.h>
38 #include <signal.h>
39 #include <stdarg.h>
40 #include <stdint.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <sys/time.h>
45 #include <unistd.h>
47 #include <algorithm>
48 #include <numeric>
49 #include <vector>
51 //---------------------------------------------------------------------------
52 // Utilities
53 //---------------------------------------------------------------------------
55 // The value of argv[0] passed to main(). Used in error messages.
56 static const char* gArgv0;
58 static void Abort(const char* aFormat, ...) {
59 va_list vargs;
60 va_start(vargs, aFormat);
61 fprintf(stderr, "%s: ", gArgv0);
62 vfprintf(stderr, aFormat, vargs);
63 fprintf(stderr, "\n");
64 va_end(vargs);
66 exit(1);
69 static void CmdLineAbort(const char* aMsg) {
70 if (aMsg) {
71 fprintf(stderr, "%s: %s\n", gArgv0, aMsg);
73 fprintf(stderr, "Use --help for more information.\n");
74 exit(1);
77 // A special value that represents an estimate from an unsupported RAPL domain.
78 static const double kUnsupported_j = -1.0;
80 // Print to stdout and flush it, so that the output appears immediately even if
81 // being redirected through |tee| or anything like that.
82 static void PrintAndFlush(const char* aFormat, ...) {
83 va_list vargs;
84 va_start(vargs, aFormat);
85 vfprintf(stdout, aFormat, vargs);
86 va_end(vargs);
88 fflush(stdout);
91 //---------------------------------------------------------------------------
92 // Mac-specific code
93 //---------------------------------------------------------------------------
95 #if defined(__APPLE__)
97 // Because of the pkg_energy_statistics_t::pkes_version check below, the
98 // earliest OS X version this code will work with is 10.9.0 (xnu-2422.1.72).
100 # include <sys/types.h>
101 # include <sys/sysctl.h>
103 // OS X has four kinds of system calls:
105 // 1. Mach traps;
106 // 2. UNIX system calls;
107 // 3. machine-dependent calls;
108 // 4. diagnostic calls.
110 // (See "Mac OS X and iOS Internals" by Jonathan Levin for more details.)
112 // The last category has a single call named diagCall() or diagCall64(). Its
113 // mode is controlled by its first argument, and one of the modes allows access
114 // to the Intel RAPL MSRs.
116 // The interface to diagCall64() is not exported, so we have to import some
117 // definitions from the XNU kernel. All imported definitions are annotated with
118 // the XNU source file they come from, and information about what XNU versions
119 // they were introduced in and (if relevant) modified.
121 // The diagCall64() mode.
122 // From osfmk/i386/Diagnostics.h
123 // - In 10.8.4 (xnu-2050.24.15) this value was introduced. (In 10.8.3 the value
124 // 17 was used for dgGzallocTest.)
125 # define dgPowerStat 17
127 // From osfmk/i386/cpu_data.h
128 // - In 10.8.5 these values were introduced, along with core_energy_stat_t.
129 # define CPU_RTIME_BINS (12)
130 # define CPU_ITIME_BINS (CPU_RTIME_BINS)
132 // core_energy_stat_t and pkg_energy_statistics_t are both from
133 // osfmk/i386/Diagnostics.c.
134 // - In 10.8.4 (xnu-2050.24.15) both structs were introduced, but with many
135 // fewer fields.
136 // - In 10.8.5 (xnu-2050.48.11) both structs were substantially expanded, with
137 // numerous new fields.
138 // - In 10.9.0 (xnu-2422.1.72) pkg_energy_statistics_t::pkes_version was added.
139 // diagCall64(dgPowerStat) fills it with '1' in all versions since (up to
140 // 10.10.2 at time of writing).
141 // - in 10.10.2 (xnu-2782.10.72) core_energy_stat_t::gpmcs was conditionally
142 // added, if DIAG_ALL_PMCS is true. (DIAG_ALL_PMCS is not even defined in the
143 // source code, but it could be defined at compile-time via compiler flags.)
144 // pkg_energy_statistics_t::pkes_version did not change, though.
146 typedef struct {
147 uint64_t caperf;
148 uint64_t cmperf;
149 uint64_t ccres[6];
150 uint64_t crtimes[CPU_RTIME_BINS];
151 uint64_t citimes[CPU_ITIME_BINS];
152 uint64_t crtime_total;
153 uint64_t citime_total;
154 uint64_t cpu_idle_exits;
155 uint64_t cpu_insns;
156 uint64_t cpu_ucc;
157 uint64_t cpu_urc;
158 # if DIAG_ALL_PMCS // Added in 10.10.2 (xnu-2782.10.72).
159 uint64_t gpmcs[4]; // Added in 10.10.2 (xnu-2782.10.72).
160 # endif /* DIAG_ALL_PMCS */ // Added in 10.10.2 (xnu-2782.10.72).
161 } core_energy_stat_t;
163 typedef struct {
164 uint64_t pkes_version; // Added in 10.9.0 (xnu-2422.1.72).
165 uint64_t pkg_cres[2][7];
167 // This is read from MSR 0x606, which Intel calls MSR_RAPL_POWER_UNIT
168 // and XNU calls MSR_IA32_PKG_POWER_SKU_UNIT.
169 uint64_t pkg_power_unit;
171 // These are the four fields for the four RAPL domains. For each field
172 // we list:
174 // - the corresponding MSR number;
175 // - Intel's name for that MSR;
176 // - XNU's name for that MSR;
177 // - which Intel processors the MSR is supported on.
179 // The last of these is determined from chapter 35 of Volume 3 of the
180 // "Intel 64 and IA-32 Architecture's Software Developer's Manual",
181 // Order Number 325384. (Note that chapter 35 contradicts section 14.9
182 // to some degree.)
184 // 0x611 == MSR_PKG_ENERGY_STATUS == MSR_IA32_PKG_ENERGY_STATUS
185 // Atom (various), Sandy Bridge, Next Gen Xeon Phi (model 0x57).
186 uint64_t pkg_energy;
188 // 0x639 == MSR_PP0_ENERGY_STATUS == MSR_IA32_PP0_ENERGY_STATUS
189 // Atom (various), Sandy Bridge, Next Gen Xeon Phi (model 0x57).
190 uint64_t pp0_energy;
192 // 0x641 == MSR_PP1_ENERGY_STATUS == MSR_PP1_ENERGY_STATUS
193 // Sandy Bridge, Haswell.
194 uint64_t pp1_energy;
196 // 0x619 == MSR_DRAM_ENERGY_STATUS == MSR_IA32_DDR_ENERGY_STATUS
197 // Xeon E5, Xeon E5 v2, Haswell/Haswell-E, Next Gen Xeon Phi (model
198 // 0x57)
199 uint64_t ddr_energy;
201 uint64_t llc_flushed_cycles;
202 uint64_t ring_ratio_instantaneous;
203 uint64_t IA_frequency_clipping_cause;
204 uint64_t GT_frequency_clipping_cause;
205 uint64_t pkg_idle_exits;
206 uint64_t pkg_rtimes[CPU_RTIME_BINS];
207 uint64_t pkg_itimes[CPU_ITIME_BINS];
208 uint64_t mbus_delay_time;
209 uint64_t mint_delay_time;
210 uint32_t ncpus;
211 core_energy_stat_t cest[];
212 } pkg_energy_statistics_t;
214 static int diagCall64(uint64_t aMode, void* aBuf) {
215 // We cannot use syscall() here because it doesn't work with diagnostic
216 // system calls -- it raises SIGSYS if you try. So we have to use asm.
218 # ifdef __x86_64__
219 // The 0x40000 prefix indicates it's a diagnostic system call. The 0x01
220 // suffix indicates the syscall number is 1, which also happens to be the
221 // only diagnostic system call. See osfmk/mach/i386/syscall_sw.h for more
222 // details.
223 static const uint64_t diagCallNum = 0x4000001;
224 uint64_t rv;
226 __asm__ __volatile__(
227 "syscall"
229 // Return value goes in "a" (%rax).
230 : /* outputs */ "=a"(rv)
232 // The syscall number goes in "0", a synonym (from outputs) for "a"
233 // (%rax). The syscall arguments go in "D" (%rdi) and "S" (%rsi).
234 : /* inputs */ "0"(diagCallNum), "D"(aMode), "S"(aBuf)
236 // The |syscall| instruction clobbers %rcx, %r11, and %rflags ("cc"). And
237 // this particular syscall also writes memory (aBuf).
238 : /* clobbers */ "rcx", "r11", "cc", "memory");
239 return rv;
240 # else
241 # error Sorry, only x86-64 is supported
242 # endif
245 static void diagCall64_dgPowerStat(pkg_energy_statistics_t* aPkes) {
246 static const uint64_t supported_version = 1;
248 // Write an unsupported version number into pkes_version so that the check
249 // below cannot succeed by dumb luck.
250 aPkes->pkes_version = supported_version - 1;
252 // diagCall64() returns 1 on success, and 0 on failure (which can only happen
253 // if the mode is unrecognized, e.g. in 10.7.x or earlier versions).
254 if (diagCall64(dgPowerStat, aPkes) != 1) {
255 Abort("diagCall64() failed");
258 if (aPkes->pkes_version != 1) {
259 Abort("unexpected pkes_version: %llu", aPkes->pkes_version);
263 class RAPL {
264 bool mIsGpuSupported; // Is the GPU domain supported by the processor?
265 bool mIsRamSupported; // Is the RAM domain supported by the processor?
267 // The DRAM domain on Haswell servers has a fixed energy unit (1/65536 J ==
268 // 15.3 microJoules) which is different to the power unit MSR. (See the
269 // "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, Volume 2 of
270 // 2, Registers" datasheet, September 2014, Reference Number: 330784-001.)
271 // This field records whether the quirk is present.
272 bool mHasRamUnitsQuirk;
274 // The abovementioned 15.3 microJoules value.
275 static const double kQuirkyRamJoulesPerTick;
277 // The previous sample's MSR values.
278 uint64_t mPrevPkgTicks;
279 uint64_t mPrevPp0Ticks;
280 uint64_t mPrevPp1Ticks;
281 uint64_t mPrevDdrTicks;
283 // The struct passed to diagCall64().
284 pkg_energy_statistics_t* mPkes;
286 public:
287 RAPL() : mHasRamUnitsQuirk(false) {
288 // Work out which RAPL MSRs this CPU model supports.
289 int cpuModel;
290 size_t size = sizeof(cpuModel);
291 if (sysctlbyname("machdep.cpu.model", &cpuModel, &size, NULL, 0) != 0) {
292 Abort("sysctlbyname(\"machdep.cpu.model\") failed");
295 // This is similar to arch/x86/kernel/cpu/perf_event_intel_rapl.c in
296 // linux-4.1.5/.
298 // By linux-5.6.14/, this stuff had moved into
299 // arch/x86/events/intel/rapl.c, which references processor families in
300 // arch/x86/include/asm/intel-family.h.
301 switch (cpuModel) {
302 case 0x2a: // Sandy Bridge
303 case 0x3a: // Ivy Bridge
304 // Supports package, cores, GPU.
305 mIsGpuSupported = true;
306 mIsRamSupported = false;
307 break;
309 case 0x3f: // Haswell X
310 case 0x4f: // Broadwell X
311 case 0x55: // Skylake X
312 case 0x56: // Broadwell D
313 // Supports package, cores, RAM. Has the units quirk.
314 mIsGpuSupported = false;
315 mIsRamSupported = true;
316 mHasRamUnitsQuirk = true;
317 break;
319 case 0x2d: // Sandy Bridge X
320 case 0x3e: // Ivy Bridge X
321 // Supports package, cores, RAM.
322 mIsGpuSupported = false;
323 mIsRamSupported = true;
324 break;
326 case 0x3c: // Haswell
327 case 0x3d: // Broadwell
328 case 0x45: // Haswell L
329 case 0x46: // Haswell G
330 case 0x47: // Broadwell G
331 // Supports package, cores, GPU, RAM.
332 mIsGpuSupported = true;
333 mIsRamSupported = true;
334 break;
336 case 0x4e: // Skylake L
337 case 0x5e: // Skylake
338 case 0x8e: // Kaby Lake L
339 case 0x9e: // Kaby Lake
340 case 0x66: // Cannon Lake L
341 case 0x7d: // Ice Lake
342 case 0x7e: // Ice Lake L
343 case 0xa5: // Comet Lake
344 case 0xa6: // Comet Lake L
345 // Supports package, cores, GPU, RAM, PSYS.
346 // XXX: this tool currently doesn't measure PSYS.
347 mIsGpuSupported = true;
348 mIsRamSupported = true;
349 break;
351 default:
352 Abort("unknown CPU model: %d", cpuModel);
353 break;
356 // Get the maximum number of logical CPUs so that we know how big to make
357 // |mPkes|.
358 int logicalcpu_max;
359 size = sizeof(logicalcpu_max);
360 if (sysctlbyname("hw.logicalcpu_max", &logicalcpu_max, &size, NULL, 0) !=
361 0) {
362 Abort("sysctlbyname(\"hw.logicalcpu_max\") failed");
365 // Over-allocate by 1024 bytes per CPU to allow for the uncertainty around
366 // core_energy_stat_t::gpmcs and for any other future extensions to that
367 // struct. (The fields we read all come before the core_energy_stat_t
368 // array, so it won't matter to us whether gpmcs is present or not.)
369 size_t pkesSize = sizeof(pkg_energy_statistics_t) +
370 logicalcpu_max * sizeof(core_energy_stat_t) +
371 logicalcpu_max * 1024;
372 mPkes = (pkg_energy_statistics_t*)malloc(pkesSize);
373 if (!mPkes) {
374 Abort("malloc() failed");
377 // Do an initial measurement so that the first sample's diffs are sensible.
378 double dummy1, dummy2, dummy3, dummy4;
379 EnergyEstimates(dummy1, dummy2, dummy3, dummy4);
382 ~RAPL() { free(mPkes); }
384 static double Joules(uint64_t aTicks, double aJoulesPerTick) {
385 return double(aTicks) * aJoulesPerTick;
388 void EnergyEstimates(double& aPkg_J, double& aCores_J, double& aGpu_J,
389 double& aRam_J) {
390 diagCall64_dgPowerStat(mPkes);
392 // Bits 12:8 are the ESU.
393 // Energy measurements come in multiples of 1/(2^ESU).
394 uint32_t energyStatusUnits = (mPkes->pkg_power_unit >> 8) & 0x1f;
395 double joulesPerTick = ((double)1 / (1 << energyStatusUnits));
397 aPkg_J = Joules(mPkes->pkg_energy - mPrevPkgTicks, joulesPerTick);
398 aCores_J = Joules(mPkes->pp0_energy - mPrevPp0Ticks, joulesPerTick);
399 aGpu_J = mIsGpuSupported
400 ? Joules(mPkes->pp1_energy - mPrevPp1Ticks, joulesPerTick)
401 : kUnsupported_j;
402 aRam_J = mIsRamSupported
403 ? Joules(mPkes->ddr_energy - mPrevDdrTicks,
404 mHasRamUnitsQuirk ? kQuirkyRamJoulesPerTick
405 : joulesPerTick)
406 : kUnsupported_j;
408 mPrevPkgTicks = mPkes->pkg_energy;
409 mPrevPp0Ticks = mPkes->pp0_energy;
410 if (mIsGpuSupported) {
411 mPrevPp1Ticks = mPkes->pp1_energy;
413 if (mIsRamSupported) {
414 mPrevDdrTicks = mPkes->ddr_energy;
419 /* static */ const double RAPL::kQuirkyRamJoulesPerTick = (double)1 / 65536;
421 //---------------------------------------------------------------------------
422 // Linux-specific code
423 //---------------------------------------------------------------------------
425 #elif defined(__linux__)
427 # include <linux/perf_event.h>
428 # include <sys/syscall.h>
430 // There is no glibc wrapper for this system call so we provide our own.
431 static int perf_event_open(struct perf_event_attr* aAttr, pid_t aPid, int aCpu,
432 int aGroupFd, unsigned long aFlags) {
433 return syscall(__NR_perf_event_open, aAttr, aPid, aCpu, aGroupFd, aFlags);
436 // Returns false if the file cannot be opened.
437 template <typename T>
438 static bool ReadValueFromPowerFile(const char* aStr1, const char* aStr2,
439 const char* aStr3, const char* aScanfString,
440 T* aOut) {
441 // The filenames going into this buffer are under our control and the longest
442 // one is "/sys/bus/event_source/devices/power/events/energy-cores.scale".
443 // So 256 chars is plenty.
444 char filename[256];
446 sprintf(filename, "/sys/bus/event_source/devices/power/%s%s%s", aStr1, aStr2,
447 aStr3);
448 FILE* fp = fopen(filename, "r");
449 if (!fp) {
450 return false;
452 if (fscanf(fp, aScanfString, aOut) != 1) {
453 Abort("fscanf() failed");
455 fclose(fp);
457 return true;
460 // This class encapsulates the reading of a single RAPL domain.
461 class Domain {
462 bool mIsSupported; // Is the domain supported by the processor?
464 // These three are only set if |mIsSupported| is true.
465 double mJoulesPerTick; // How many Joules each tick of the MSR represents.
466 int mFd; // The fd through which the MSR is read.
467 double mPrevTicks; // The previous sample's MSR value.
469 public:
470 enum IsOptional { Optional, NonOptional };
472 Domain(const char* aName, uint32_t aType,
473 IsOptional aOptional = NonOptional) {
474 uint64_t config;
475 if (!ReadValueFromPowerFile("events/energy-", aName, "", "event=%llx",
476 &config)) {
477 // Failure is allowed for optional domains.
478 if (aOptional == NonOptional) {
479 Abort(
480 "failed to open file for non-optional domain '%s'\n"
481 "- Is your kernel version 3.14 or later, as required? "
482 "Run |uname -r| to see.",
483 aName);
485 mIsSupported = false;
486 return;
489 mIsSupported = true;
491 if (!ReadValueFromPowerFile("events/energy-", aName, ".scale", "%lf",
492 &mJoulesPerTick)) {
493 Abort("failed to read from .scale file");
496 // The unit should be "Joules", so 128 chars should be plenty.
497 char unit[128];
498 if (!ReadValueFromPowerFile("events/energy-", aName, ".unit", "%127s",
499 unit)) {
500 Abort("failed to read from .unit file");
502 if (strcmp(unit, "Joules") != 0) {
503 Abort("unexpected unit '%s' in .unit file", unit);
506 struct perf_event_attr attr;
507 memset(&attr, 0, sizeof(attr));
508 attr.type = aType;
509 attr.size = uint32_t(sizeof(attr));
510 attr.config = config;
512 // Measure all processes/threads. The specified CPU doesn't matter.
513 mFd = perf_event_open(&attr, /* aPid = */ -1, /* aCpu = */ 0,
514 /* aGroupFd = */ -1, /* aFlags = */ 0);
515 if (mFd < 0) {
516 Abort(
517 "perf_event_open() failed\n"
518 "- Did you run as root (e.g. with |sudo|) or set\n"
519 " /proc/sys/kernel/perf_event_paranoid to 0, as required?");
522 mPrevTicks = 0;
525 ~Domain() {
526 if (mIsSupported) {
527 close(mFd);
531 double EnergyEstimate() {
532 if (!mIsSupported) {
533 return kUnsupported_j;
536 uint64_t thisTicks;
537 if (read(mFd, &thisTicks, sizeof(uint64_t)) != sizeof(uint64_t)) {
538 Abort("read() failed");
541 uint64_t ticks = thisTicks - mPrevTicks;
542 mPrevTicks = thisTicks;
543 double joules = ticks * mJoulesPerTick;
544 return joules;
548 class RAPL {
549 Domain* mPkg;
550 Domain* mCores;
551 Domain* mGpu;
552 Domain* mRam;
554 public:
555 RAPL() {
556 uint32_t type;
557 if (!ReadValueFromPowerFile("type", "", "", "%u", &type)) {
558 Abort("failed to read from type file");
561 mPkg = new Domain("pkg", type);
562 mCores = new Domain("cores", type);
563 mGpu = new Domain("gpu", type, Domain::Optional);
564 mRam = new Domain("ram", type, Domain::Optional);
565 if (!mPkg || !mCores || !mGpu || !mRam) {
566 Abort("new Domain() failed");
570 ~RAPL() {
571 delete mPkg;
572 delete mCores;
573 delete mGpu;
574 delete mRam;
577 void EnergyEstimates(double& aPkg_J, double& aCores_J, double& aGpu_J,
578 double& aRam_J) {
579 aPkg_J = mPkg->EnergyEstimate();
580 aCores_J = mCores->EnergyEstimate();
581 aGpu_J = mGpu->EnergyEstimate();
582 aRam_J = mRam->EnergyEstimate();
586 #else
588 //---------------------------------------------------------------------------
589 // Unsupported platforms
590 //---------------------------------------------------------------------------
592 # error Sorry, this platform is not supported
594 #endif // platform
596 //---------------------------------------------------------------------------
597 // The main loop
598 //---------------------------------------------------------------------------
600 // The sample interval, measured in seconds.
601 static double gSampleInterval_sec;
603 // The platform-specific RAPL-reading machinery.
604 static RAPL* gRapl;
606 // All the sampled "total" values, in Watts.
607 static std::vector<double> gTotals_W;
609 // Power = Energy / Time, where power is measured in Watts, Energy is measured
610 // in Joules, and Time is measured in seconds.
611 static double JoulesToWatts(double aJoules) {
612 return aJoules / gSampleInterval_sec;
615 // "Normalize" here means convert kUnsupported_j to zero so it can be used in
616 // additive expressions. All printed values are 5 or maybe 6 chars (though 6
617 // chars would require a value > 100 W, which is unlikely). Values above 1000 W
618 // are normalized to " n/a ", so 6 chars is the longest that may be printed.
619 static void NormalizeAndPrintAsWatts(char* aBuf, double& aValue_J) {
620 if (aValue_J == kUnsupported_j || aValue_J >= 1000) {
621 aValue_J = 0;
622 sprintf(aBuf, "%s", " n/a ");
623 } else {
624 sprintf(aBuf, "%5.2f", JoulesToWatts(aValue_J));
628 static void SigAlrmHandler(int aSigNum, siginfo_t* aInfo, void* aContext) {
629 static int sampleNumber = 1;
631 double pkg_J, cores_J, gpu_J, ram_J;
632 gRapl->EnergyEstimates(pkg_J, cores_J, gpu_J, ram_J);
634 // We should have pkg and cores estimates, but might not have gpu and ram
635 // estimates.
636 assert(pkg_J != kUnsupported_j);
637 assert(cores_J != kUnsupported_j);
639 // This needs to be big enough to print watt values to two decimal places. 16
640 // should be plenty.
641 static const size_t kNumStrLen = 16;
643 static char pkgStr[kNumStrLen], coresStr[kNumStrLen], gpuStr[kNumStrLen],
644 ramStr[kNumStrLen];
645 NormalizeAndPrintAsWatts(pkgStr, pkg_J);
646 NormalizeAndPrintAsWatts(coresStr, cores_J);
647 NormalizeAndPrintAsWatts(gpuStr, gpu_J);
648 NormalizeAndPrintAsWatts(ramStr, ram_J);
650 // Core and GPU power are a subset of the package power.
651 assert(pkg_J >= cores_J + gpu_J);
653 // Compute "other" (i.e. rest of the package) and "total" only after the
654 // other values have been normalized.
656 char otherStr[kNumStrLen];
657 double other_J = pkg_J - cores_J - gpu_J;
658 NormalizeAndPrintAsWatts(otherStr, other_J);
660 char totalStr[kNumStrLen];
661 double total_J = pkg_J + ram_J;
662 NormalizeAndPrintAsWatts(totalStr, total_J);
664 gTotals_W.push_back(JoulesToWatts(total_J));
666 // Print and flush so that the output appears immediately even if being
667 // redirected through |tee| or anything like that.
668 PrintAndFlush("#%02d %s W = %s (%s + %s + %s) + %s W\n", sampleNumber++,
669 totalStr, pkgStr, coresStr, gpuStr, otherStr, ramStr);
672 static void Finish() {
673 size_t n = gTotals_W.size();
675 // This time calculation assumes that the timers are perfectly accurate which
676 // is not true but the inaccuracy should be small in practice.
677 double time = n * gSampleInterval_sec;
679 printf("\n");
680 printf("%d sample%s taken over a period of %.3f second%s\n", int(n),
681 n == 1 ? "" : "s", n * gSampleInterval_sec, time == 1.0 ? "" : "s");
683 if (n == 0 || n == 1) {
684 exit(0);
687 // Compute the mean.
688 double sum = std::accumulate(gTotals_W.begin(), gTotals_W.end(), 0.0);
689 double mean = sum / n;
691 // Compute the *population* standard deviation:
693 // popStdDev = sqrt(Sigma(x - m)^2 / n)
695 // where |x| is the sum variable, |m| is the mean, and |n| is the
696 // population size.
698 // This is different from the *sample* standard deviation, which divides by
699 // |n - 1|, and would be appropriate if we were using a random sample of a
700 // larger population.
701 double sumOfSquaredDeviations = 0;
702 for (double& iter : gTotals_W) {
703 double deviation = (iter - mean);
704 sumOfSquaredDeviations += deviation * deviation;
706 double popStdDev = sqrt(sumOfSquaredDeviations / n);
708 // Sort so that percentiles can be determined. We use the "Nearest Rank"
709 // method of determining percentiles, which is simplest to compute and which
710 // chooses values from those that appear in the input set.
711 std::sort(gTotals_W.begin(), gTotals_W.end());
713 printf("\n");
714 printf("Distribution of 'total' values:\n");
715 printf(" mean = %5.2f W\n", mean);
716 printf(" std dev = %5.2f W\n", popStdDev);
717 printf(" 0th percentile = %5.2f W (min)\n", gTotals_W[0]);
718 printf(" 5th percentile = %5.2f W\n", gTotals_W[ceil(0.05 * n) - 1]);
719 printf(" 25th percentile = %5.2f W\n", gTotals_W[ceil(0.25 * n) - 1]);
720 printf(" 50th percentile = %5.2f W\n", gTotals_W[ceil(0.50 * n) - 1]);
721 printf(" 75th percentile = %5.2f W\n", gTotals_W[ceil(0.75 * n) - 1]);
722 printf(" 95th percentile = %5.2f W\n", gTotals_W[ceil(0.95 * n) - 1]);
723 printf("100th percentile = %5.2f W (max)\n", gTotals_W[n - 1]);
725 exit(0);
728 static void SigIntHandler(int aSigNum, siginfo_t* aInfo, void* aContext) {
729 Finish();
732 static void PrintUsage() {
733 printf(
734 "usage: rapl [options]\n"
735 "\n"
736 "Options:\n"
737 "\n"
738 " -h --help show this message\n"
739 " -i --sample-interval <N> sample every N ms [default=1000]\n"
740 " -n --sample-count <N> get N samples (0 means unlimited) "
741 "[default=0]\n"
742 "\n"
743 #if defined(__APPLE__)
744 "On Mac this program can be run by any user.\n"
745 #elif defined(__linux__)
746 "On Linux this program can only be run by the super-user unless the "
747 "contents\n"
748 "of /proc/sys/kernel/perf_event_paranoid is set to 0 or lower.\n"
749 #else
750 # error Sorry, this platform is not supported
751 #endif
752 "\n");
755 int main(int argc, char** argv) {
756 // Process command line options.
758 gArgv0 = argv[0];
760 // Default values.
761 int sampleInterval_msec = 1000;
762 int sampleCount = 0;
764 struct option longOptions[] = {
765 {"help", no_argument, NULL, 'h'},
766 {"sample-interval", required_argument, NULL, 'i'},
767 {"sample-count", required_argument, NULL, 'n'},
768 {NULL, 0, NULL, 0}};
769 const char* shortOptions = "hi:n:";
771 int c;
772 char* endPtr;
773 while ((c = getopt_long(argc, argv, shortOptions, longOptions, NULL)) != -1) {
774 switch (c) {
775 case 'h':
776 PrintUsage();
777 exit(0);
779 case 'i':
780 sampleInterval_msec = strtol(optarg, &endPtr, /* base = */ 10);
781 if (*endPtr) {
782 CmdLineAbort("sample interval is not an integer");
784 if (sampleInterval_msec < 1 || sampleInterval_msec > 3600000) {
785 CmdLineAbort("sample interval must be in the range 1..3600000 ms");
787 break;
789 case 'n':
790 sampleCount = strtol(optarg, &endPtr, /* base = */ 10);
791 if (*endPtr) {
792 CmdLineAbort("sample count is not an integer");
794 if (sampleCount < 0 || sampleCount > 1000000) {
795 CmdLineAbort("sample count must be in the range 0..1000000");
797 break;
799 default:
800 CmdLineAbort(NULL);
804 // The RAPL MSRs update every ~1 ms, but the measurement period isn't exactly
805 // 1 ms, which means the sample periods are not exact. "Power Measurement
806 // Techniques on Standard Compute Nodes: A Quantitative Comparison" by
807 // Hackenberg et al. suggests the following.
809 // "RAPL provides energy (and not power) consumption data without
810 // timestamps associated to each counter update. This makes sampling rates
811 // above 20 Samples/s unfeasible if the systematic error should be below
812 // 5%... Constantly polling the RAPL registers will both occupy a processor
813 // core and distort the measurement itself."
815 // So warn about this case.
816 if (sampleInterval_msec < 50) {
817 fprintf(stderr,
818 "\nWARNING: sample intervals < 50 ms are likely to produce "
819 "inaccurate estimates\n\n");
821 gSampleInterval_sec = double(sampleInterval_msec) / 1000;
823 // Initialize the platform-specific RAPL reading machinery.
824 gRapl = new RAPL();
825 if (!gRapl) {
826 Abort("new RAPL() failed");
829 // Install the signal handlers.
831 struct sigaction sa;
832 memset(&sa, 0, sizeof(sa));
833 sa.sa_flags = SA_RESTART | SA_SIGINFO;
834 // The extra parens around (0) suppress a -Wunreachable-code warning on OS X
835 // where sigemptyset() is a macro that can never fail and always returns 0.
836 if (sigemptyset(&sa.sa_mask) < (0)) {
837 Abort("sigemptyset() failed");
839 sa.sa_sigaction = SigAlrmHandler;
840 if (sigaction(SIGALRM, &sa, NULL) < 0) {
841 Abort("sigaction(SIGALRM) failed");
843 sa.sa_sigaction = SigIntHandler;
844 if (sigaction(SIGINT, &sa, NULL) < 0) {
845 Abort("sigaction(SIGINT) failed");
848 // Set up the timer.
849 struct itimerval timer;
850 timer.it_interval.tv_sec = sampleInterval_msec / 1000;
851 timer.it_interval.tv_usec = (sampleInterval_msec % 1000) * 1000;
852 timer.it_value = timer.it_interval;
853 if (setitimer(ITIMER_REAL, &timer, NULL) < 0) {
854 Abort("setitimer() failed");
857 // Print header.
858 PrintAndFlush(" total W = _pkg_ (cores + _gpu_ + other) + _ram_ W\n");
860 // Take samples.
861 if (sampleCount == 0) {
862 while (true) {
863 pause();
865 } else {
866 for (int i = 0; i < sampleCount; i++) {
867 pause();
871 Finish();
873 return 0;