Add logging for comparison behaviors
[hiphop-php.git] / hphp / util / hardware-counter.cpp
blobe0e391f2002e3b720b366035bbf473f723c1d667
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/util/hardware-counter.h"
19 #ifndef NO_HARDWARE_COUNTERS
21 #include <folly/ScopeGuard.h>
23 #include "hphp/util/alloc.h"
24 #include "hphp/util/logger.h"
25 #include "hphp/util/service-data.h"
26 #include "hphp/util/struct-log.h"
27 #include "hphp/util/timer.h"
29 #define _GNU_SOURCE 1
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <fcntl.h>
34 #include <errno.h>
35 #include <assert.h>
36 #include <sys/ioctl.h>
37 #include <asm/unistd.h>
38 #include <sys/prctl.h>
39 #include <linux/perf_event.h>
41 #include <folly/String.h>
42 #include <folly/Memory.h>
43 #include <folly/portability/SysMman.h>
44 #include <folly/portability/Unistd.h>
46 namespace HPHP {
47 ///////////////////////////////////////////////////////////////////////////////
49 THREAD_LOCAL_NO_CHECK(HardwareCounter, HardwareCounter::s_counter);
51 static bool s_recordSubprocessTimes = false;
52 static bool s_excludeKernel = false;
53 static bool s_profileHWEnable;
54 static bool s_fastReads = false;
55 static int s_exportInterval = -1;
56 static std::string s_profileHWEvents;
58 static inline bool useCounters() {
59 #ifdef VALGRIND
60 return false;
61 #else
62 return s_profileHWEnable;
63 #endif
67 * Turning this on helps with the resolution of multiplexed counters
68 * (provided cap_user_time is true in the
69 * perf_event_mmap_page). However, experiments show that periodically,
70 * time_offset and the result of rdtsc "jump" (this is probably when
71 * the thread migrates from one cpu to another); when they do, they
72 * jump by appropriate amounts so that enabled and runtime progress
73 * monotonically (and by sensible values) - but they don't seem to
74 * jump atomically, so there can be one sample where only one has
75 * jumped. This can cause a temporary blip in enabled and or runtime.
77 * I'm adding this so we can choose to *not* use rdtsc, and avoid the
78 * blips.
80 * It turns out that doing so does degrade the accuracy when there's a
81 * lot of multiplexing going on, and a bit more experimentation shows
82 * that the blip is only really a problem if we record it in the
83 * baseline during a reset (since that then affects every read until
84 * the next reset), so for now, turn it on but don't use it for
85 * reset_values.
87 static constexpr auto use_cap_time = true;
89 #if defined(__x86_64__)
90 #define barrier() __asm__ volatile("" ::: "memory")
91 #elif defined(__aarch64__)
92 #define barrier() asm volatile("dmb ish" : : : "memory")
93 #define isb() asm volatile("isb" : : : "memory")
94 #else
95 #define barrier()
96 #endif
98 static uint64_t rdtsc() {
99 #if defined(__x86_64__)
100 uint64_t msr;
101 asm volatile ( "rdtsc\n\t" // Returns the time in EDX:EAX.
102 "shl $32, %%rdx\n\t" // Shift the upper bits left.
103 "or %%rdx, %0" // 'Or' in the lower bits.
104 : "=a" (msr)
106 : "rdx");
107 return msr;
108 #endif
109 always_assert(false);
112 static uint64_t rdpmc(uint32_t counter) {
113 #if defined(__x86_64__)
114 uint32_t low, high;
116 __asm__ volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter));
117 return low | ((uint64_t)high << 32);
118 #elif defined(__aarch64__)
119 uint64_t ret;
120 if (counter == PERF_COUNT_HW_CPU_CYCLES)
121 asm volatile("mrs %0, pmccntr_el0" : "=r" (ret));
122 else {
123 asm volatile("msr pmselr_el0, %0" : : "r" ((uint64_t)(counter-1)));
124 asm volatile("mrs %0, pmxevcntr_el0" : "=r" (ret));
127 isb();
128 return ret;
129 #endif
130 always_assert(false);
133 static ServiceData::ExportedTimeSeries*
134 createTimeSeries(const std::string& name) {
135 assertx(!name.empty());
137 if (s_exportInterval == -1) {
138 // We're initializing counters for the main thread in a server process,
139 // which won't be running requests and shouldn't have any time series. Or
140 // someone manually disabled time series exporting in the config. Either
141 // way, bail out early.
142 return nullptr;
145 static const std::vector<ServiceData::StatsType> exportTypes{
146 ServiceData::StatsType::AVG,
147 ServiceData::StatsType::SUM,
150 return ServiceData::createTimeSeries(
151 "perf." + name,
152 exportTypes,
153 {std::chrono::seconds(s_exportInterval)}
157 struct HardwareCounterImpl {
158 HardwareCounterImpl(int type, unsigned long config, const char* desc)
159 : m_desc(desc ? desc : "")
160 , m_timeSeries(createTimeSeries(m_desc))
161 , m_timeSeriesNonPsp(createTimeSeries(m_desc + "-nonpsp")) {
162 pe.type = type;
163 pe.size = sizeof (struct perf_event_attr);
164 pe.config = config;
165 pe.inherit = s_recordSubprocessTimes;
166 pe.disabled = 1;
167 pe.pinned = 0;
168 pe.exclude_kernel = s_excludeKernel;
169 pe.exclude_hv = 1;
170 pe.read_format =
171 PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING;
174 ~HardwareCounterImpl() {
175 close();
178 void updateServiceData(StructuredLogEntry* entry, bool includingPsp) {
179 auto const value = read();
180 auto timeSeries = includingPsp ? m_timeSeries : m_timeSeriesNonPsp;
182 if (value != 0) {
183 if (entry) entry->setInt(m_desc, value);
184 if (timeSeries) timeSeries->addValue(value);
188 void init_if_not() {
190 * perf_event_open(struct perf_event_attr *hw_event_uptr, pid_t pid,
191 * int cpu, int group_fd, unsigned long flags)
193 if (inited) return;
194 inited = true;
195 m_fd = syscall(__NR_perf_event_open, &pe, 0, -1, -1, 0);
196 if (m_fd < 0) {
197 Logger::FWarning("HardwareCounter: perf_event_open failed with: {}",
198 folly::errnoStr(errno));
199 m_err = -1;
200 return;
203 fcntl(m_fd, F_SETFD, O_CLOEXEC);
205 if (ioctl(m_fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
206 Logger::FWarning("perf_event failed to enable: {}",
207 folly::errnoStr(errno));
208 close();
209 m_err = -1;
210 return;
213 if (!s_fastReads) return;
215 auto const base = mmap(nullptr, s_pageSize, PROT_READ | PROT_WRITE,
216 MAP_SHARED, m_fd, 0);
217 if (base == MAP_FAILED) {
218 Logger::FWarning("HardwareCounter: failed to mmap perf_event: {}",
219 folly::errnoStr(errno));
220 } else {
221 m_meta = static_cast<perf_event_mmap_page*>(base);
222 if (!m_meta->cap_user_rdpmc ||
223 (use_cap_time && !m_meta->cap_user_time)) {
224 munmap(m_meta, s_pageSize);
225 m_meta = nullptr;
227 ioctl(m_fd, PERF_EVENT_IOC_RESET, 0);
230 reset();
233 int64_t read() {
234 uint64_t values[3];
235 if (auto const width = readRaw(values)) {
236 values[0] -= reset_values[0];
237 values[1] -= reset_values[1];
238 values[2] -= reset_values[2];
239 if (width < 64) {
240 auto const mask = (1uLL << width) - 1;
241 values[0] &= mask;
242 if (values[0] > (mask >> 1)) return extra;
243 } else if (values[0] > std::numeric_limits<int64_t>::max()) {
244 return extra;
246 if (values[1] == values[2]) {
247 return values[0] + extra;
249 if (!values[2]) {
250 return extra;
252 int64_t value = (double)values[0] * values[1] / values[2];
253 return value + extra;
255 return 0;
258 void incCount(int64_t amount) {
259 extra += amount;
263 * read current value, enabled time, and running time for the
264 * counter.
266 * returns the width of the counter in bits, or zero on failure.
268 uint32_t readRaw(uint64_t* values, bool forReset = false) {
269 if (m_err || !useCounters()) return 0;
270 init_if_not();
272 // try to read the values in user space
273 if (m_meta) {
274 uint32_t seq, time_mult, time_shift, idx, width;
275 uint64_t cyc, time_offset;
276 uint64_t count, enabled, running;
278 do {
279 seq = m_meta->lock;
280 barrier();
281 enabled = m_meta->time_enabled;
282 running = m_meta->time_running;
284 if (use_cap_time && !forReset) {
285 assertx(m_meta->cap_user_time);
287 cyc = rdtsc();
288 time_offset = m_meta->time_offset;
289 time_mult = m_meta->time_mult;
290 time_shift = m_meta->time_shift;
293 idx = m_meta->index;
294 count = m_meta->offset;
295 width = m_meta->pmc_width;
297 assertx(m_meta->cap_user_rdpmc);
298 if (idx) {
299 count += rdpmc(idx - 1);
302 barrier();
303 } while (m_meta->lock != seq);
305 [&] {
306 if (!ever_active) {
307 if (!idx && !count) {
308 // enabled and running don't get meaningful values until
309 // the first time the counter is enabled. This only really
310 // matters if this call is being used to initialize the
311 // reset_values, because we'll get garbage values for the
312 // baseline.
313 enabled = running = 0;
314 return;
316 ever_active = true;
318 if (use_cap_time && !forReset) {
319 auto const quot = (cyc >> time_shift);
320 auto const rem = cyc & (((uint64_t)1 << time_shift) - 1);
321 auto const delta = time_offset + quot * time_mult +
322 ((rem * time_mult) >> time_shift);
324 enabled += delta;
325 if (idx) running += delta;
327 }();
329 values[0] = count;
330 values[1] = enabled;
331 values[2] = running;
332 return width;
335 if (m_fd <= 0) return 0;
337 * read the count + scaling values
339 * It is not necessary to stop an event to read its value
341 auto ret = ::read(m_fd, values, sizeof(*values) * 3);
342 return ret == sizeof(*values) * 3 ? 64 : 0;
345 void reset() {
346 if (m_err || !useCounters()) return;
347 init_if_not();
348 extra = 0;
349 if (m_fd > 0) {
350 if (!m_meta && ioctl(m_fd, PERF_EVENT_IOC_RESET, 0) < 0) {
351 Logger::FWarning("perf_event failed to reset with: {}",
352 folly::errnoStr(errno));
353 m_err = -1;
354 return;
356 if (!readRaw(reset_values, true)) {
357 Logger::FWarning("perf_event failed to reset with: {}",
358 folly::errnoStr(errno));
359 m_err = -1;
360 return;
365 public:
366 std::string m_desc;
367 int m_err{0};
368 private:
369 int m_fd{-1};
370 bool inited{false};
371 bool ever_active{false};
372 ServiceData::ExportedTimeSeries* m_timeSeries;
373 ServiceData::ExportedTimeSeries* m_timeSeriesNonPsp;
374 struct perf_event_attr pe{};
375 uint64_t reset_values[3];
376 uint64_t extra{0};
377 perf_event_mmap_page* m_meta{};
379 void close() {
380 if (m_fd > 0) {
381 ::close(m_fd);
382 m_fd = -1;
383 if (m_meta) {
384 munmap(m_meta, s_pageSize);
385 m_meta = nullptr;
391 HardwareCounter::HardwareCounter()
392 : m_countersSet(false) {
393 m_instructionCounter = std::make_unique<HardwareCounterImpl>(
394 PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, "instructions"
396 if (s_profileHWEvents.empty()) {
397 m_loadCounter = std::make_unique<HardwareCounterImpl>(
398 PERF_TYPE_HW_CACHE,
399 PERF_COUNT_HW_CACHE_L1D | ((PERF_COUNT_HW_CACHE_OP_READ) << 8),
400 "loads"
402 m_storeCounter = std::make_unique<HardwareCounterImpl>(
403 PERF_TYPE_HW_CACHE,
404 PERF_COUNT_HW_CACHE_L1D | ((PERF_COUNT_HW_CACHE_OP_WRITE) << 8),
405 "stores"
407 } else {
408 m_countersSet = true;
409 setPerfEvents(s_profileHWEvents);
413 HardwareCounter::~HardwareCounter() {
416 void HardwareCounter::RecordSubprocessTimes() {
417 s_recordSubprocessTimes = true;
420 void HardwareCounter::ExcludeKernel() {
421 s_excludeKernel = true;
424 void HardwareCounter::Init(bool enable, const std::string& events,
425 bool subProc,
426 bool excludeKernel,
427 bool fastReads,
428 int exportInterval) {
429 s_profileHWEnable = enable;
430 s_profileHWEvents = events;
431 s_recordSubprocessTimes = subProc;
432 s_excludeKernel = excludeKernel;
433 s_fastReads = fastReads,
434 s_exportInterval = exportInterval;
437 void HardwareCounter::Reset() {
438 s_counter->reset();
441 void HardwareCounter::reset() {
442 m_instructionCounter->reset();
443 if (!m_countersSet) {
444 m_storeCounter->reset();
445 m_loadCounter->reset();
447 for (unsigned i = 0; i < m_counters.size(); i++) {
448 m_counters[i]->reset();
452 int64_t HardwareCounter::GetInstructionCount() {
453 return s_counter->getInstructionCount();
456 int64_t HardwareCounter::getInstructionCount() {
457 return m_instructionCounter->read();
460 int64_t HardwareCounter::GetLoadCount() {
461 return s_counter->getLoadCount();
464 int64_t HardwareCounter::getLoadCount() {
465 return m_loadCounter ? m_loadCounter->read() : 0;
468 int64_t HardwareCounter::GetStoreCount() {
469 return s_counter->getStoreCount();
472 int64_t HardwareCounter::getStoreCount() {
473 return m_storeCounter ? m_storeCounter->read() : 0;
476 void HardwareCounter::IncInstructionCount(int64_t amount) {
477 s_counter->m_instructionCounter->incCount(amount);
480 void HardwareCounter::IncLoadCount(int64_t amount) {
481 if (!s_counter->m_countersSet) {
482 s_counter->m_loadCounter->incCount(amount);
486 void HardwareCounter::IncStoreCount(int64_t amount) {
487 if (!s_counter->m_countersSet) {
488 s_counter->m_storeCounter->incCount(amount);
492 struct PerfTable perfTable[] = {
493 /* PERF_TYPE_HARDWARE events */
494 #define PC(n) PERF_TYPE_HARDWARE, PERF_COUNT_HW_ ## n
495 { "cpu-cycles", PC(CPU_CYCLES) },
496 { "cycles", PC(CPU_CYCLES) },
497 { "instructions", PC(INSTRUCTIONS) },
498 { "cache-references", PC(CACHE_REFERENCES) },
499 { "cache-misses", PC(CACHE_MISSES) },
500 { "branch-instructions", PC(BRANCH_INSTRUCTIONS) },
501 { "branches", PC(BRANCH_INSTRUCTIONS) },
502 { "branch-misses", PC(BRANCH_MISSES) },
503 { "bus-cycles", PC(BUS_CYCLES) },
504 { "stalled-cycles-frontend", PC(STALLED_CYCLES_FRONTEND) },
505 { "stalled-cycles-backend", PC(STALLED_CYCLES_BACKEND) },
507 /* PERF_TYPE_HW_CACHE hw_cache_id */
508 #define PCC(n) PERF_TYPE_HW_CACHE, PERF_COUNT_HW_CACHE_ ## n
509 { "L1-dcache-", PCC(L1D) },
510 { "L1-icache-", PCC(L1I) },
511 { "LLC-", PCC(LL) },
512 { "dTLB-", PCC(DTLB) },
513 { "iTLB-", PCC(ITLB) },
514 { "branch-", PCC(BPU) },
516 /* PERF_TYPE_HW_CACHE hw_cache_op, hw_cache_result */
517 #define PCCO(n, m) PERF_TYPE_HW_CACHE, \
518 ((PERF_COUNT_HW_CACHE_OP_ ## n) << 8 | \
519 (PERF_COUNT_HW_CACHE_RESULT_ ## m) << 16)
520 { "loads", PCCO(READ, ACCESS) },
521 { "load-misses", PCCO(READ, MISS) },
522 { "stores", PCCO(WRITE, ACCESS) },
523 { "store-misses", PCCO(WRITE, MISS) },
524 { "prefetches", PCCO(PREFETCH, ACCESS) },
525 { "prefetch-misses", PCCO(PREFETCH, MISS) }
528 static int findEvent(const char *event, struct PerfTable *t,
529 int len, int *match_len) {
530 int i;
532 for (i = 0; i < len; i++) {
533 if (!strncmp(event, t[i].name, strlen(t[i].name))) {
534 *match_len = strlen(t[i].name);
535 return i;
538 return -1;
541 #define CPUID_STEPPING(x) ((x) & 0xf)
542 #define CPUID_MODEL(x) (((x) & 0xf0) >> 4)
543 #define CPUID_FAMILY(x) (((x) & 0xf00) >> 8)
544 #define CPUID_TYPE(x) (((x) & 0x3000) >> 12)
546 // hack to get LLC counters on perflab frc machines
547 static bool isIntelE5_2670() {
548 #ifdef __x86_64__
549 unsigned long x;
550 asm volatile ("cpuid" : "=a"(x): "a"(1) : "ebx", "ecx", "edx");
551 return CPUID_STEPPING(x) == 6 && CPUID_MODEL(x) == 0xd
552 && CPUID_FAMILY(x) == 6 && CPUID_TYPE(x) == 0;
553 #else
554 return false;
555 #endif
558 static void checkLLCHack(const char* event, uint32_t& type, uint64_t& config) {
559 if (!strncmp(event, "LLC-load", 8) && isIntelE5_2670()) {
560 type = PERF_TYPE_RAW;
561 if (!strncmp(&event[4], "loads", 5)) {
562 config = 0x534f2e;
563 } else if (!strncmp(&event[4], "load-misses", 11)) {
564 config = 0x53412e;
569 bool HardwareCounter::addPerfEvent(const char* event) {
570 uint32_t type = 0;
571 uint64_t config = 0;
572 int i, match_len;
573 bool found = false;
574 const char* ev = event;
576 while ((i = findEvent(ev, perfTable,
577 sizeof(perfTable)/sizeof(struct PerfTable),
578 &match_len))
579 != -1) {
580 if (!found) {
581 found = true;
582 type = perfTable[i].type;
583 } else if (type != perfTable[i].type) {
584 Logger::FWarning("failed to find perf event: {}", event);
585 return false;
587 config |= perfTable[i].config;
588 ev = &ev[match_len];
591 checkLLCHack(event, type, config);
593 // Check if we have a raw spec.
594 if (!found && event[0] == 'r' && event[1] != 0) {
595 config = strtoull(event + 1, const_cast<char**>(&ev), 16);
596 if (*ev == 0) {
597 found = true;
598 type = PERF_TYPE_RAW;
602 if (!found || *ev) {
603 Logger::FWarning("failed to find perf event: {}", event);
604 return false;
606 auto hwc = std::make_unique<HardwareCounterImpl>(type, config, event);
607 if (hwc->m_err) {
608 Logger::FWarning("failed to set perf event: {}", event);
609 return false;
611 m_counters.emplace_back(std::move(hwc));
612 if (!m_countersSet) {
613 // reset load and store counters. This is because
614 // perf does not seem to handle more than three counters
615 // very well.
616 m_loadCounter.reset();
617 m_storeCounter.reset();
618 m_countersSet = true;
620 return true;
623 bool HardwareCounter::eventExists(const char *event) {
624 // hopefully m_counters set is small, so a linear scan does not hurt
625 for(unsigned i = 0; i < m_counters.size(); i++) {
626 if (!strcmp(event, m_counters[i]->m_desc.c_str())) {
627 return true;
630 return false;
633 bool HardwareCounter::setPerfEvents(folly::StringPiece sevents) {
634 // Make a copy of the string for use with strtok.
635 auto const sevents_buf = static_cast<char*>(malloc(sevents.size() + 1));
636 SCOPE_EXIT { free(sevents_buf); };
637 memcpy(sevents_buf, sevents.data(), sevents.size());
638 sevents_buf[sevents.size()] = '\0';
640 char* strtok_buf = nullptr;
641 char* s = strtok_r(sevents_buf, ",", &strtok_buf);
642 while (s) {
643 if (!eventExists(s) && !addPerfEvent(s)) {
644 return false;
646 s = strtok_r(nullptr, ",", &strtok_buf);
648 return true;
651 bool HardwareCounter::SetPerfEvents(folly::StringPiece events) {
652 return s_counter->setPerfEvents(events);
655 void HardwareCounter::clearPerfEvents() {
656 m_counters.clear();
659 void HardwareCounter::ClearPerfEvents() {
660 s_counter->clearPerfEvents();
663 void HardwareCounter::updateServiceData(StructuredLogEntry* entry,
664 bool includingPsp) {
665 forEachCounter([entry,includingPsp](HardwareCounterImpl& counter) {
666 counter.updateServiceData(entry, includingPsp);
670 void HardwareCounter::UpdateServiceData(const timespec& cpu_begin,
671 const timespec& wall_begin,
672 StructuredLogEntry* entry,
673 bool includingPsp) {
674 // The begin timespec should be what was recorded at the beginning of the
675 // request, so we subtract that out from the current measurement. The
676 // perf-based counters owned by this file are reset to 0 at the same time as
677 // the begin timespec is recorded, so there's no subtraction needed for
678 // those.
679 struct timespec cpu_now;
680 gettime(CLOCK_THREAD_CPUTIME_ID, &cpu_now);
682 s_counter->updateServiceData(entry, includingPsp);
684 static auto cpuTimeSeries = createTimeSeries("cpu-time-us");
685 static auto cpuTimeNonPspSeries = createTimeSeries("cpu-time-us-nonpsp");
686 auto cpu_series = includingPsp ? cpuTimeSeries : cpuTimeNonPspSeries;
687 auto const cpuTimeUs = gettime_diff_us(cpu_begin, cpu_now);
688 if (cpuTimeUs > 0) {
689 if (entry) entry->setInt("cpu-time-us", cpuTimeUs);
690 if (cpu_series) cpu_series->addValue(cpuTimeUs);
693 struct timespec wall_now;
694 Timer::GetMonotonicTime(wall_now);
695 static auto wallTimeSeries = createTimeSeries("wall-time-us");
696 static auto wallTimeNonPspSeries = createTimeSeries("wall-time-us-nonpsp");
697 auto wall_series = includingPsp ? wallTimeSeries : wallTimeNonPspSeries;
698 auto const wallTimeUs = gettime_diff_us(wall_begin, wall_now);
699 if (wallTimeUs > 0) {
700 if (entry) entry->setInt("wall-time-us", wallTimeUs);
701 if (wall_series) wall_series->addValue(wallTimeUs);
704 if (entry) entry->setInt("includingPsp", includingPsp);
707 void HardwareCounter::getPerfEvents(PerfEventCallback f, void* data) {
708 forEachCounter([f, data](HardwareCounterImpl& counter) {
709 f(counter.m_desc, counter.read(), data);
713 template<typename F>
714 void HardwareCounter::forEachCounter(F func) {
715 func(*m_instructionCounter);
716 if (!m_countersSet) {
717 func(*m_loadCounter);
718 func(*m_storeCounter);
720 for (auto& counter : m_counters) func(*counter);
723 void HardwareCounter::GetPerfEvents(PerfEventCallback f, void* data) {
724 s_counter->getPerfEvents(f, data);
727 ///////////////////////////////////////////////////////////////////////////////
731 #else // NO_HARDWARE_COUNTERS
733 namespace HPHP {
734 ///////////////////////////////////////////////////////////////////////////////
736 HardwareCounter HardwareCounter::s_counter;
738 ///////////////////////////////////////////////////////////////////////////////
741 #endif // NO_HARDWARE_COUNTERS