ASAN: Fix alloc/dealloc mismatch in hotprofiler
[hiphop-php.git] / hphp / runtime / ext / ext_hotprofiler.cpp
blobafcc3f7296ecfbd6d1dd6e501f8fdcad9fcb6106
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) |
6 | Copyright (c) 1997-2010 The PHP Group |
7 +----------------------------------------------------------------------+
8 | This source file is subject to version 3.01 of the PHP license, |
9 | that is bundled with this package in the file LICENSE, and is |
10 | available through the world-wide-web at the following url: |
11 | http://www.php.net/license/3_01.txt |
12 | If you did not receive a copy of the PHP license and are unable to |
13 | obtain it through the world-wide-web, please send a note to |
14 | license@php.net so we can mail you a copy immediately. |
15 +----------------------------------------------------------------------+
18 #include "hphp/runtime/ext/ext_fb.h"
19 #include "hphp/runtime/base/memory-manager.h"
20 #include "hphp/runtime/base/request-local.h"
21 #include "hphp/runtime/base/zend-math.h"
22 #include "hphp/runtime/server/server-stats.h"
23 #include "hphp/runtime/base/ini-setting.h"
24 #include "hphp/runtime/vm/event-hook.h"
25 #include "hphp/util/alloc.h"
26 #include "hphp/util/vdso.h"
27 #include "hphp/util/cycles.h"
29 #ifdef __FreeBSD__
30 #include <sys/resource.h>
31 #include <sys/param.h>
32 #include <sys/cpuset.h>
33 #define cpu_set_t cpuset_t
34 #define SET_AFFINITY(pid, size, mask) \
35 cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, size, mask)
36 #define GET_AFFINITY(pid, size, mask) \
37 cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, size, mask)
38 #elif defined(__APPLE__)
39 #include <mach/mach_init.h>
40 #include <mach/thread_policy.h>
41 #include <mach/thread_act.h>
43 #define cpu_set_t thread_affinity_policy_data_t
44 #define CPU_SET(cpu_id, new_mask) \
45 (*(new_mask)).affinity_tag = (cpu_id + 1)
46 #define CPU_ZERO(new_mask) \
47 (*(new_mask)).affinity_tag = THREAD_AFFINITY_TAG_NULL
48 #define GET_AFFINITY(pid, size, mask) \
49 (*(mask)).affinity_tag = THREAD_AFFINITY_TAG_NULL
50 #define SET_AFFINITY(pid, size, mask) \
51 thread_policy_set(mach_thread_self(), THREAD_AFFINITY_POLICY, \
52 (int *)mask, THREAD_AFFINITY_POLICY_COUNT)
53 #else
54 #include <sched.h>
55 #define SET_AFFINITY(pid, size, mask) sched_setaffinity(0, size, mask)
56 #define GET_AFFINITY(pid, size, mask) sched_getaffinity(0, size, mask)
57 #endif
60 #include <iostream>
61 #include <fstream>
62 #include <zlib.h>
64 // Append the delimiter
65 #define HP_STACK_DELIM "==>"
66 #define HP_STACK_DELIM_LEN (sizeof(HP_STACK_DELIM) - 1)
68 namespace HPHP {
70 IMPLEMENT_DEFAULT_EXTENSION(hotprofiler);
71 IMPLEMENT_DEFAULT_EXTENSION(xhprof);
72 ///////////////////////////////////////////////////////////////////////////////
73 // helpers
76 * A hash function to calculate a 8-bit hash code for a function name.
77 * This is based on a small modification to 'zend_inline_hash_func' by summing
78 * up all bytes of the ulong returned by 'zend_inline_hash_func'.
80 * @param str, char *, string to be calculated hash code for.
82 * @author cjiang
84 static inline uint8_t hprof_inline_hash(const char * str) {
85 unsigned long h = 5381;
86 uint i = 0;
87 uint8_t res = 0;
89 while (*str) {
90 h += (h << 5);
91 h ^= (unsigned long) *str++;
94 for (i = 0; i < sizeof(unsigned long); i++) {
95 res += ((uint8_t *)&h)[i];
97 return res;
101 * Get time delta in microseconds.
103 static long get_us_interval(struct timeval *start, struct timeval *end) {
104 return (((end->tv_sec - start->tv_sec) * 1000000)
105 + (end->tv_usec - start->tv_usec));
109 * Incr time with the given microseconds.
111 static void incr_us_interval(struct timeval *start, uint64_t incr) {
112 incr += (start->tv_sec * 1000000 + start->tv_usec);
113 start->tv_sec = incr/1000000;
114 start->tv_usec = incr%1000000;
115 return;
119 * Truncates the given timeval to the nearest slot begin, where
120 * the slot size is determined by intr
122 * @param tv Input timeval to be truncated in place
123 * @param intr Time interval in microsecs - slot width
124 * @return void
125 * @author veeve
127 static void hp_trunc_time(struct timeval *tv, uint64_t intr) {
128 uint64_t time_in_micro;
130 // Convert to microsecs and trunc that first
131 time_in_micro = (tv->tv_sec * 1000000) + tv->tv_usec;
132 time_in_micro /= intr;
133 time_in_micro *= intr;
135 // Update tv
136 tv->tv_sec = (time_in_micro / 1000000);
137 tv->tv_usec = (time_in_micro % 1000000);
140 ///////////////////////////////////////////////////////////////////////////////
141 // High precision timer related functions.
144 * This is a microbenchmark to get cpu frequency the process is running on. The
145 * returned value is used to convert TSC counter values to microseconds.
147 * @return int64.
148 * @author cjiang
150 static int64_t get_cpu_frequency() {
151 struct timeval start;
152 struct timeval end;
154 if (gettimeofday(&start, 0)) {
155 perror("gettimeofday");
156 return 0.0;
158 uint64_t tsc_start = cpuCycles();
159 // Sleep for 5 miliseconds. Comparaing with gettimeofday's few microseconds
160 // execution time, this should be enough.
161 usleep(5000);
162 if (gettimeofday(&end, 0)) {
163 perror("gettimeofday");
164 return 0.0;
166 uint64_t tsc_end = cpuCycles();
167 return nearbyint((tsc_end - tsc_start) * 1.0
168 / (get_us_interval(&start, &end)));
171 #define MAX_LINELENGTH 1024
173 static int64_t* get_cpu_frequency_from_file(const char *file, int ncpus)
175 std::ifstream cpuinfo(file);
176 if (cpuinfo.fail()) {
177 return nullptr;
179 char line[MAX_LINELENGTH];
180 int64_t* freqs = new int64_t[ncpus];
181 for (int i = 0; i < ncpus; ++i) {
182 freqs[i] = 0;
184 int processor = -1;
186 while (cpuinfo.getline(line, sizeof(line))) {
187 if (sscanf(line, "processor : %d", &processor) == 1) {
188 continue;
190 float freq;
191 if (sscanf(line, "cpu MHz : %f", &freq) == 1) {
192 if (processor != -1 && processor < ncpus) {
193 freqs[processor] = nearbyint(freq);
194 processor = -1;
198 for (int i = 0; i < ncpus; ++i) {
199 if (freqs[i] == 0) {
200 delete[] freqs;
201 return nullptr;
204 return freqs;
207 class esyscall {
208 public:
209 int num;
211 explicit esyscall(const char *syscall_name) {
212 num = -1;
213 char format[strlen(syscall_name) + sizeof(" %d")];
214 sprintf(format, "%s %%d", syscall_name);
216 std::ifstream syscalls("/proc/esyscall");
217 if (syscalls.fail()) {
218 return;
220 char line[MAX_LINELENGTH];
221 if (!syscalls.getline(line, sizeof(line))) {
222 return;
224 // perhaps we should check the format, but we're just going to assume
225 // Name Number
226 while (syscalls.getline(line, sizeof(line))) {
227 int number;
228 if (sscanf(line, format, &number) == 1) {
229 num = number;
230 return;
236 ///////////////////////////////////////////////////////////////////////////////
237 // Machine information that we collect just once.
239 class MachineInfo {
240 public:
242 * Bind the current process to a specified CPU. This function is to ensure
243 * that the OS won't schedule the process to different processors, which
244 * would make values read by rdtsc unreliable.
246 * @param uint32 cpu_id, the id of the logical cpu to be bound to.
248 * @author cjiang
250 static void BindToCPU(uint32_t cpu_id) {
251 cpu_set_t new_mask;
252 CPU_ZERO(&new_mask);
253 CPU_SET(cpu_id, &new_mask);
254 SET_AFFINITY(0, sizeof(cpu_set_t), &new_mask);
257 public:
258 // The number of logical CPUs this machine has.
259 int m_cpu_num;
260 // Store the cpu frequency. Get it from /proc/cpuinfo if we can.
261 int64_t* m_cpu_frequencies;
263 MachineInfo() {
264 m_cpu_num = sysconf(_SC_NPROCESSORS_CONF);
265 m_cpu_frequencies = get_cpu_frequency_from_file("/proc/cpuinfo", m_cpu_num);
267 if (m_cpu_frequencies)
268 return;
270 m_cpu_frequencies = new int64_t[m_cpu_num];
271 for (int i = 0; i < m_cpu_num; i++) {
272 cpu_set_t prev_mask;
273 GET_AFFINITY(0, sizeof(cpu_set_t), &prev_mask);
274 BindToCPU(i);
275 // Make sure the current process gets scheduled to the target cpu. This
276 // might not be necessary though.
277 usleep(0);
278 m_cpu_frequencies[i] = get_cpu_frequency();
279 SET_AFFINITY(0, sizeof(cpu_set_t), &prev_mask);
283 ~MachineInfo() {
284 delete[] m_cpu_frequencies;
287 static MachineInfo s_machine;
289 static inline uint64_t
290 tv_to_cycles(const struct timeval& tv, int64_t MHz)
292 return (((uint64_t)tv.tv_sec * 1000000) + tv.tv_usec) * MHz;
295 static inline uint64_t
296 to_usec(int64_t cycles, int64_t MHz, bool cpu_time = false)
298 #ifdef CLOCK_THREAD_CPUTIME_ID
299 static int64_t vdso_usable =
300 Util::Vdso::ClockGetTimeNS(CLOCK_THREAD_CPUTIME_ID);
301 #else
302 static int64_t vdso_usable = -1;
303 #endif
305 if (cpu_time && vdso_usable >= 0)
306 return cycles / 1000;
307 return (cycles + MHz/2) / MHz;
310 static esyscall vtsc_syscall("vtsc");
312 static inline uint64_t vtsc(int64_t MHz) {
313 #ifdef CLOCK_THREAD_CPUTIME_ID
314 int64_t rval = Util::Vdso::ClockGetTimeNS(CLOCK_THREAD_CPUTIME_ID);
315 if (rval >= 0) {
316 return rval;
318 #endif
319 if (vtsc_syscall.num > 0) {
320 return syscall(vtsc_syscall.num);
322 struct rusage usage;
323 getrusage(RUSAGE_SELF, &usage);
324 return
325 tv_to_cycles(usage.ru_utime, MHz) + tv_to_cycles(usage.ru_stime, MHz);
328 uint64_t
329 get_allocs()
331 #ifdef USE_JEMALLOC
332 auto& mm = MM();
333 return mm.getAllocated();
334 #endif
335 #ifdef USE_TCMALLOC
336 if (MallocExtensionInstance) {
337 size_t stat;
338 MallocExtensionInstance()->GetNumericProperty(
339 "generic.thread_bytes_allocated", &stat);
340 return stat;
342 #endif
343 return 0;
346 uint64_t
347 get_frees()
349 #ifdef USE_JEMALLOC
350 auto& mm = MM();
351 return mm.getDeallocated();
352 #endif
353 #ifdef USE_TCMALLOC
354 if (MallocExtensionInstance) {
355 size_t stat;
356 MallocExtensionInstance()->GetNumericProperty(
357 "generic.thread_bytes_freed", &stat);
358 return stat;
360 #endif
361 return 0;
364 ///////////////////////////////////////////////////////////////////////////////
365 // classes
368 * All information we collect about a frame.
370 class Frame {
371 public:
372 Frame *m_parent; // ptr to parent frame
373 const char *m_name; // function name
374 uint8_t m_hash_code; // hash_code for the function name
375 int m_recursion; // recursion level for function
377 uint64_t m_tsc_start; // start value for TSC counter
378 int64_t m_mu_start; // memory usage
379 int64_t m_pmu_start; // peak memory usage
380 int64_t m_vtsc_start; // user/sys time start
383 * Returns formatted function name
385 * @param result_buf ptr to result buf
386 * @param result_len max size of result buf
387 * @return total size of the function name returned in result_buf
388 * @author veeve
390 size_t getName(char *result_buf, size_t result_len) {
391 if (result_len <= 1) {
392 return 0; // Insufficient result_bug. Bail!
395 // Add '@recurse_level' if required
396 // NOTE: Dont use snprintf's return val as it is compiler dependent
397 if (m_recursion) {
398 snprintf(result_buf, result_len, "%s@%d", m_name, m_recursion);
399 } else {
400 snprintf(result_buf, result_len, "%s", m_name);
403 // Force null-termination at MAX
404 result_buf[result_len - 1] = 0;
405 return strlen(result_buf);
409 * Build a caller qualified name for a callee.
411 * For example, if A() is caller for B(), then it returns "A==>B".
412 * Recursive invokations are denoted with @<n> where n is the recursion
413 * depth.
415 * For example, "foo==>foo@1", and "foo@2==>foo@3" are examples of direct
416 * recursion. And "bar==>foo@1" is an example of an indirect recursive
417 * call to foo (implying the foo() is on the call stack some levels
418 * above).
420 size_t getStack(int level, char *result_buf, size_t result_len) {
421 // End recursion if we dont need deeper levels or
422 // we dont have any deeper levels
423 if (!m_parent || level <= 1) {
424 return getName(result_buf, result_len);
427 // Take care of all ancestors first
428 size_t len = m_parent->getStack(level - 1, result_buf, result_len);
429 if (result_len < (len + HP_STACK_DELIM_LEN)) {
430 return len; // Insufficient result_buf. Bail out!
433 // Add delimiter only if entry had ancestors
434 if (len) {
435 strncat(result_buf + len, HP_STACK_DELIM, result_len - len);
436 len += HP_STACK_DELIM_LEN;
439 // Append the current function name
440 return len + getName(result_buf + len, result_len - len);
444 enum Flag {
445 TrackBuiltins = 0x1,
446 TrackCPU = 0x2,
447 TrackMemory = 0x4,
448 TrackVtsc = 0x8,
449 XhpTrace = 0x10,
450 MeasureXhprofDisable = 0x20,
451 Unused = 0x40,
452 TrackMalloc = 0x80,
455 const StaticString
456 s_ct("ct"),
457 s_wt("wt"),
458 s_cpu("cpu"),
459 s_mu("mu"),
460 s_pmu("pmu"),
461 s_alloc("alloc"),
462 s_free("free"),
463 s_compressed_trace("(compressed_trace)");
466 * Maintain profiles of a running stack.
468 class Profiler {
469 public:
470 Profiler() : m_successful(true), m_stack(nullptr),
471 m_frame_free_list(nullptr) {
472 if (!s_rand_initialized) {
473 s_rand_initialized = true;
474 srand(math_generate_seed());
477 // bind to a random cpu so that we can use rdtsc instruction.
478 int cur_cpu_id = rand() % s_machine.m_cpu_num;
479 GET_AFFINITY(0, sizeof(cpu_set_t), &m_prev_mask);
480 MachineInfo::BindToCPU(cur_cpu_id);
481 m_MHz = s_machine.m_cpu_frequencies[cur_cpu_id];
483 memset(m_func_hash_counters, 0, sizeof(m_func_hash_counters));
486 virtual ~Profiler() {
487 SET_AFFINITY(0, sizeof(cpu_set_t), &m_prev_mask);
489 endAllFrames();
490 for (Frame *p = m_frame_free_list; p;) {
491 Frame *cur = p;
492 p = p->m_parent;
493 free(cur);
498 * Subclass can do extra work by overriding these two virtual functions.
500 virtual void beginFrameEx() {} // called right before a function call
501 virtual void endFrameEx() {} // called right after a function is finished
504 * Final results.
506 virtual void writeStats(Array &ret) {}
509 * Start a new frame with the specified symbol.
511 virtual void beginFrame(const char *symbol) __attribute__ ((noinline)) ;
514 * End top of the stack.
516 virtual void endFrame(const char *symbol,
517 bool endMain = false) __attribute__ ((noinline)) ;
519 virtual void endAllFrames() {
520 while (m_stack) {
521 endFrame(nullptr, true);
525 template<class phpret, class Name, class Counts>
526 static void returnVals(phpret& ret, const Name& name, const Counts& counts,
527 int flags, int64_t MHz)
529 ArrayInit arr(5);
530 arr.set(s_ct, counts.count);
531 arr.set(s_wt, to_usec(counts.wall_time, MHz));
532 if (flags & TrackCPU) {
533 arr.set(s_cpu, to_usec(counts.cpu, MHz, true));
535 if (flags & TrackMemory) {
536 arr.set(s_mu, counts.memory);
537 arr.set(s_pmu, counts.peak_memory);
538 } else if (flags & TrackMalloc) {
539 arr.set(s_alloc, counts.memory);
540 arr.set(s_free, counts.peak_memory);
542 ret.set(String(name), arr.create());
545 template<class phpret, class StatsMap>
546 static bool extractStats(phpret& ret, StatsMap& stats, int flags, int64_t MHz)
548 for (typename StatsMap::const_iterator iter = stats.begin();
549 iter != stats.end(); ++iter) {
550 returnVals(ret, iter->first, iter->second, flags, MHz);
552 return true;
555 bool m_successful;
557 int64_t m_MHz; // cpu freq for either the local cpu or the saved trace
558 Frame *m_stack; // top of the profile stack
560 static bool s_rand_initialized;
562 cpu_set_t m_prev_mask; // saved cpu affinity
563 Frame *m_frame_free_list; // freelist of Frame
564 uint8_t m_func_hash_counters[256]; // counter table by hash code;
567 * Fast allocate a Frame structure. Picks one from the
568 * free list if available, else does an actual allocate.
570 Frame *createFrame(const char *symbol) {
571 Frame *p = m_frame_free_list;
572 if (p) {
573 m_frame_free_list = p->m_parent;
574 } else {
575 p = (Frame*)malloc(sizeof(Frame));
577 p->m_parent = m_stack;
578 p->m_name = symbol;
579 p->m_hash_code = hprof_inline_hash(symbol);
580 m_stack = p;
581 return p;
585 * Fast free a Frame structure. Simply returns back the Frame to a free list
586 * and doesn't actually perform the free.
588 void releaseFrame() {
589 assert(m_stack);
591 Frame *p = m_stack;
592 m_stack = p->m_parent;
593 p->m_parent = m_frame_free_list; // we overload the m_parent field here
594 m_frame_free_list = p;
597 bool Profiler::s_rand_initialized = false;
599 void Profiler::beginFrame(const char *symbol) {
600 Frame *current = createFrame(symbol);
602 // NOTE(cjiang): use hash code to fend off most of call-stack traversal
603 int recursion_level = 0;
604 if (m_func_hash_counters[current->m_hash_code] > 0) {
605 // Find this symbols recurse level
606 for (Frame *p = current->m_parent; p; p = p->m_parent) {
607 if (strcmp(current->m_name, p->m_name) == 0) {
608 recursion_level = p->m_recursion + 1;
609 break;
613 current->m_recursion = recursion_level;
615 m_func_hash_counters[current->m_hash_code]++;
616 beginFrameEx();
620 * End top of the stack.
622 void Profiler::endFrame(const char *symbol, bool endMain) {
623 if (m_stack) {
624 // special case for main() frame that's only ended by endAllFrames()
625 if (!endMain && m_stack->m_parent == nullptr) {
626 return;
628 endFrameEx();
629 m_func_hash_counters[m_stack->m_hash_code]--;
630 releaseFrame();
634 ///////////////////////////////////////////////////////////////////////////////
635 // SimpleProfiler
638 * vtsc() based profiler, but simple enough to print basic information.
640 * When available, we now use the vtsc() call, which is relatively inexpensive
641 * and accurate. It's still a system call, but a cheap one. If the call isn't
642 * available, the comment below still applies. --renglish
644 * COMMENT(cjiang): getrusage is very expensive and inaccurate. It is based
645 * on sampling at the rate about once every 5 to 10 miliseconds. The sampling
646 * error can be very significantly, especially given that we are
647 * instrumenting at a very fine granularity. (every PHP function call will
648 * lead to one invokation of getrusage.) Most PHP functions such as the
649 * built-ins typically finish in microseconds. Thus the result we get from
650 * getrusage is very likely going to be skewed. Also worth noting that
651 * getrusage actually is a system call, which involves expensive swapping
652 * between user-mode and kernel mode. I would suggest we remove collecting
653 * CPU usage all together, as exclusive wall-time is very useful already.
654 * Or at least we should make it an opt-in choice.
656 * See: http://ww2.cs.fsu.edu/~hines/present/timing_linux.pdf
658 * Above is a nice paper talking about the overhead and the inaccuracy problem
659 * associated with getrusage.
661 class SimpleProfiler : public Profiler {
662 private:
663 class CountMap {
664 public:
665 CountMap() : count(0), tsc(0), vtsc(0) {}
667 int64_t count;
668 int64_t tsc;
669 int64_t vtsc;
671 typedef hphp_hash_map<std::string, CountMap, string_hash> StatsMap;
672 StatsMap m_stats; // outcome
674 public:
675 SimpleProfiler() {
676 echo("<div style='display:none'>");
679 ~SimpleProfiler() {
680 echo("</div>");
681 print_output();
684 virtual void beginFrameEx() {
685 m_stack->m_tsc_start = cpuCycles();
686 m_stack->m_vtsc_start = vtsc(m_MHz);
689 virtual void endFrameEx() {
690 CountMap &counts = m_stats[m_stack->m_name];
691 counts.count++;
692 counts.tsc += cpuCycles() - m_stack->m_tsc_start;
693 counts.vtsc += vtsc(m_MHz) - m_stack->m_vtsc_start;
696 private:
697 void print_output() {
698 echo("<link rel='stylesheet' href='/css/hotprofiler.css' type='text/css'>"
699 "<script language='javascript' src='/js/hotprofiler.js'></script>"
700 "<p><center><h2>Hotprofiler Data</h2></center><br>"
701 "<div id='hotprofiler_stats'></div>"
702 "<script language='javascript'>hotprofiler_data = [");
703 for (StatsMap::const_iterator iter = m_stats.begin();
704 iter != m_stats.end(); ++iter) {
705 echo("{\"fn\": \"");
706 echo(iter->first.c_str());
707 echo("\"");
709 const CountMap &counts = iter->second;
711 char buf[512];
712 snprintf(
713 buf, sizeof(buf),
714 ",\"ct\": %" PRId64 ",\"wt\": %" PRId64 ",\"ut\": %" PRId64 ",\"st\": 0",
715 counts.count, (int64_t)to_usec(counts.tsc, m_MHz),
716 (int64_t)to_usec(counts.vtsc, m_MHz, true));
717 echo(buf);
719 echo("},\n");
721 echo("]; write_data('ut', false);</script><br><br>&nbsp;<br>");
725 ///////////////////////////////////////////////////////////////////////////////
726 // HierarchicalProfiler
728 class HierarchicalProfiler : public Profiler {
729 private:
730 class CountMap {
731 public:
732 CountMap() : count(0), wall_time(0), cpu(0), memory(0), peak_memory(0) {}
734 int64_t count;
735 int64_t wall_time;
736 int64_t cpu;
737 int64_t memory;
738 int64_t peak_memory;
740 typedef hphp_hash_map<std::string, CountMap, string_hash> StatsMap;
741 StatsMap m_stats; // outcome
743 public:
745 public:
746 explicit HierarchicalProfiler(int flags) : m_flags(flags) {
749 virtual void beginFrameEx() {
750 m_stack->m_tsc_start = cpuCycles();
752 if (m_flags & TrackCPU) {
753 m_stack->m_vtsc_start = vtsc(m_MHz);
756 if (m_flags & TrackMemory) {
757 auto const& stats = MM().getStats();
758 m_stack->m_mu_start = stats.usage;
759 m_stack->m_pmu_start = stats.peakUsage;
760 } else if (m_flags & TrackMalloc) {
761 m_stack->m_mu_start = get_allocs();
762 m_stack->m_pmu_start = get_frees();
766 virtual void endFrameEx() {
767 char symbol[512];
768 m_stack->getStack(2, symbol, sizeof(symbol));
769 CountMap &counts = m_stats[symbol];
770 counts.count++;
771 counts.wall_time += cpuCycles() - m_stack->m_tsc_start;
773 if (m_flags & TrackCPU) {
774 counts.cpu += vtsc(m_MHz) - m_stack->m_vtsc_start;
777 if (m_flags & TrackMemory) {
778 auto const& stats = MM().getStats();
779 int64_t mu_end = stats.usage;
780 int64_t pmu_end = stats.peakUsage;
781 counts.memory += mu_end - m_stack->m_mu_start;
782 counts.peak_memory += pmu_end - m_stack->m_pmu_start;
783 } else if (m_flags & TrackMalloc) {
784 counts.memory += get_allocs() - m_stack->m_mu_start;
785 counts.peak_memory += get_frees() - m_stack->m_pmu_start;
789 virtual void writeStats(Array &ret) {
790 extractStats(ret, m_stats, m_flags, m_MHz);
793 private:
794 uint32_t m_flags;
797 ///////////////////////////////////////////////////////////////////////////////
798 // TraceProfiler
800 // Walks a log of function enter and exit events captured by
801 // TraceProfiler and generates statistics for each function executed.
802 template <class TraceIterator, class Stats>
803 class TraceWalker {
804 public:
805 struct Frame {
806 TraceIterator trace; // Pointer to the log entry which pushed this frame
807 int level; // Recursion level for this function
808 int len; // Length of the function name
811 TraceWalker()
812 : m_arcBuffLen(200)
813 , m_arcBuff((char*)malloc(200))
814 , m_badArcCount(0)
817 ~TraceWalker() {
818 free(m_arcBuff);
819 for (auto& r : m_recursion) delete[] r.first;
822 void walk(TraceIterator begin, TraceIterator end, Stats& stats) {
823 if (begin == end) return;
824 m_recursion.push_back(std::make_pair(nullptr, 0));
825 // Trim exit traces off the front of the log. These may be due to
826 // the return from turning tracing on.
827 std::map<const char*, unsigned> functionLevel;
828 auto current = begin;
829 while (current != end && !current->symbol) ++current;
830 while (current != end) {
831 if (!current->is_func_exit) {
832 unsigned level = ++functionLevel[current->symbol];
833 if (level >= m_recursion.size()) {
834 char *level_string = new char[8];
835 sprintf(level_string, "@%u", level);
836 m_recursion.push_back(std::make_pair(level_string,
837 strlen(level_string)));
839 Frame fr;
840 fr.trace = current;
841 fr.level = level - 1;
842 fr.len = strlen(current->symbol);
843 checkArcBuff(fr.len);
844 m_stack.push_back(fr);
845 } else if (m_stack.size() > 1) {
846 validateStack(current, stats); // NB: may update m_stack.
847 --functionLevel[m_stack.back().trace->symbol];
848 popFrame(current, stats);
850 ++current;
852 --current;
853 while (m_stack.size() > 1) {
854 popFrame(current, stats);
856 if (!m_stack.empty()) {
857 incStats(m_stack.back().trace->symbol, current, m_stack.back(), stats);
859 if (m_badArcCount > 0) {
860 stats["(trace has mismatched calls and returns)"].count = m_badArcCount;
864 private:
865 void checkArcBuff(int len) {
866 len = 2*len + HP_STACK_DELIM_LEN + 2;
867 if (len >= m_arcBuffLen) {
868 m_arcBuffLen *= 2;
869 m_arcBuff = (char *)realloc(m_arcBuff, m_arcBuffLen);
870 if (m_arcBuff == nullptr) {
871 throw std::bad_alloc();
876 void incStats(const char* arc, TraceIterator tr, const Frame& fr,
877 Stats& stats) {
878 auto& st = stats[arc];
879 ++st.count;
880 st.wall_time += tr->wall_time - fr.trace->wall_time;
881 st.cpu += tr->cpu - fr.trace->cpu;
882 st.memory += tr->memory - fr.trace->memory;
883 st.peak_memory += tr->peak_memory - fr.trace->peak_memory;
886 // Look for mismatched enter and exit events, and try to correct if
887 // we can. Only try to correct very simple imbalances... we could go
888 // nuts here, but it's likely not worth it.
889 void validateStack(TraceIterator tIt, Stats& stats) {
890 auto enteredName = m_stack.back().trace->symbol;
891 auto exitedName = tIt->symbol;
892 if ((exitedName != nullptr) &&
893 ((enteredName == nullptr) || (strcmp(enteredName, exitedName) != 0))) {
894 // We have a few special names that we form on entry. We don't
895 // have the information to form them again on exit, so tolerate
896 // them here. See EventHook::GetFunctionNameForProfiler().
897 if ((enteredName != nullptr) &&
898 ((strncmp(enteredName, "run_init::", 10) == 0) ||
899 (strcmp(enteredName, "_") == 0))) return;
900 bool fixed = false;
901 if (m_stack.size() > 1) {
902 auto callerName = (m_stack.end() - 2)->trace->symbol;
903 if ((callerName != nullptr) && (strcmp(callerName, exitedName) == 0)) {
904 // We have an exit for Foo(), but we were in Bar(). However,
905 // it appears that Foo() was the caller of Bar(). This
906 // suggests we've missed the exit event for Bar() and have
907 // the exit event for Foo() in hand. So remove Bar() to
908 // re-balance the stack.
909 m_stack.pop_back();
910 fixed = true;
913 // The first few bad arcs typically point at the problem, so
914 // report them. The rest we'll just count.
915 if (++m_badArcCount < 20) {
916 std::string badArc;
917 if (fixed) {
918 badArc = folly::format("(warning: corrected bad arc #{}: "
919 "enter '{}', exit '{}')",
920 m_badArcCount,
921 enteredName, exitedName).str();
922 } else {
923 badArc = folly::format("(error: bad arc #{}: "
924 "enter '{}', exit '{}')",
925 m_badArcCount,
926 enteredName, exitedName).str();
928 ++stats[badArc.data()].count;
933 void popFrame(TraceIterator tIt, Stats& stats) {
934 Frame callee = m_stack.back();
935 m_stack.pop_back();
936 Frame& caller = m_stack.back();
937 char *cp = m_arcBuff;
938 memcpy(cp, caller.trace->symbol, caller.len);
939 cp += caller.len;
940 if (caller.level >= 1) {
941 std::pair<char*, int>& lvl = m_recursion[caller.level];
942 memcpy(cp, lvl.first, lvl.second);
943 cp += lvl.second;
945 memcpy(cp, HP_STACK_DELIM, HP_STACK_DELIM_LEN);
946 cp += HP_STACK_DELIM_LEN;
947 memcpy(cp, callee.trace->symbol, callee.len);
948 cp += callee.len;
949 if (callee.level >= 1) {
950 std::pair<char*, int>& lvl = m_recursion[callee.level];
951 memcpy(cp, lvl.first, lvl.second);
952 cp += lvl.second;
954 *cp = 0;
955 incStats(m_arcBuff, tIt, callee, stats);
958 vector<std::pair<char*, int>> m_recursion;
959 vector<Frame> m_stack;
960 int m_arcBuffLen;
961 char *m_arcBuff;
962 int m_badArcCount;
965 // Profiler which makes a log of all function enter and exit events,
966 // then processes that into per-function statistics. A single-frame
967 // stack trace is used to aggregate stats for each function when
968 // called from different call sites.
969 class TraceProfiler : public Profiler {
970 public:
971 explicit TraceProfiler(int flags)
972 : m_traceBuffer(nullptr)
973 , m_traceBufferSize(0)
974 , m_nextTraceEntry(0)
975 , m_traceBufferFilled(false)
976 , m_maxTraceBuffer(0)
977 , m_overflowCalls(0)
978 , m_flags(flags)
980 if (pthread_mutex_trylock(&s_inUse)) {
981 // This profiler uses a very large amount of memory. Only allow
982 // one in the process at any time.
983 m_successful = false;
984 } else {
985 char buf[20];
986 sprintf(buf, "%d", RuntimeOption::ProfilerMaxTraceBuffer);
987 IniSetting::Bind("profiler.max_trace_buffer", buf,
988 ini_on_update_long, &m_maxTraceBuffer);
992 ~TraceProfiler() {
993 if (m_successful) {
994 free(m_traceBuffer);
995 IniSetting::Unbind("profiler.max_trace_buffer");
996 pthread_mutex_unlock(&s_inUse);
1000 private:
1001 // Data measued on function entry and exit
1002 struct TraceData {
1003 int64_t wall_time;
1004 int64_t cpu;
1006 // It's not plausible that we need a full 64bits to hold memory
1007 // stats, no matter what the collection mode. So we steal one bit
1008 // from the memory field to use as a flag. We want to keep this
1009 // data structure small since we need a huge number of them during
1010 // a profiled run.
1011 int64_t memory : 63; // Total memory, or memory allocated depending on flags
1012 bool is_func_exit : 1; // Is the entry for a function exit?
1013 int64_t peak_memory : 63; // Peak memory, or memory freed depending on flags
1014 uint64_t unused : 1; // Unused, to keep memory and peak_memory the same size
1016 void clear() {
1017 wall_time = cpu = memory = peak_memory = 0;
1019 static void compileTimeAssertions() {
1020 static_assert(sizeof(TraceData) == (sizeof(uint64_t) * 4), "");
1024 // One entry in the log, representing a function enter or exit event
1025 struct TraceEntry : TraceData {
1026 const char *symbol; // Function name
1029 bool isTraceSpaceAvailable() {
1030 // the two slots are reserved for internal use
1031 return m_nextTraceEntry < m_traceBufferSize - 3;
1034 bool ensureTraceSpace() {
1035 bool track_realloc = FALSE;
1036 if (m_traceBufferFilled) {
1037 m_overflowCalls++;
1038 return false;
1040 int new_array_size;
1041 if (m_traceBufferSize == 0) {
1042 new_array_size = RuntimeOption::ProfilerTraceBuffer;
1043 } else {
1044 new_array_size = m_traceBufferSize *
1045 RuntimeOption::ProfilerTraceExpansion;
1046 if (m_maxTraceBuffer != 0 && new_array_size > m_maxTraceBuffer) {
1047 new_array_size = m_maxTraceBuffer > m_traceBufferSize ?
1048 m_maxTraceBuffer : m_traceBufferSize;
1050 if (new_array_size - m_nextTraceEntry <= 5) {
1051 // for this operation to succeed, we need room for the entry we're
1052 // adding, two realloc entries, and two entries to mark the end of
1053 // the trace.
1054 m_traceBufferFilled = true;
1055 collectStats("(trace buffer terminated)", false,
1056 m_traceBuffer[m_nextTraceEntry++]);
1057 return false;
1059 track_realloc = TRUE;
1061 if (track_realloc) {
1062 collectStats("(trace buffer realloc)", false,
1063 m_traceBuffer[m_nextTraceEntry++]);
1066 DECLARE_THREAD_INFO
1067 MemoryManager::MaskAlloc masker(*info->m_mm);
1068 TraceEntry *r = (TraceEntry*)realloc((void *)m_traceBuffer,
1069 new_array_size * sizeof(TraceEntry));
1071 if (!r) {
1072 m_traceBufferFilled = true;
1073 if (m_traceBuffer) {
1074 collectStats("(trace buffer terminated)", false,
1075 m_traceBuffer[m_nextTraceEntry++]);
1077 return false;
1079 m_traceBufferSize = new_array_size;
1080 m_traceBuffer = r;
1082 if (track_realloc) {
1083 collectStats("(trace buffer realloc)", true,
1084 m_traceBuffer[m_nextTraceEntry++]);
1086 return true;
1089 virtual void beginFrame(const char *symbol) {
1090 doTrace(symbol, false);
1093 virtual void endFrame(const char *symbol, bool endMain = false) {
1094 doTrace(symbol, true);
1097 virtual void endAllFrames() {
1098 if (m_traceBuffer && m_nextTraceEntry < m_traceBufferSize - 1) {
1099 endFrame(nullptr);
1100 m_traceBufferFilled = true;
1104 void collectStats(const char *symbol, bool isFuncExit, TraceEntry& te) {
1105 te.symbol = symbol;
1106 te.is_func_exit = isFuncExit;
1107 collectStats(te);
1110 void collectStats(TraceData& te) {
1111 te.wall_time = cpuCycles();
1112 te.cpu = 0;
1113 if (m_flags & TrackCPU) {
1114 te.cpu = vtsc(m_MHz);
1116 if (m_flags & TrackMemory) {
1117 auto const& stats = MM().getStats();
1118 te.memory = stats.usage;
1119 te.peak_memory = stats.peakUsage;
1120 } else if (m_flags & TrackMalloc) {
1121 te.memory = get_allocs();
1122 te.peak_memory = get_frees();
1123 } else {
1124 te.memory = 0;
1125 te.peak_memory = 0;
1129 TraceEntry* nextTraceEntry() {
1130 if (!isTraceSpaceAvailable() && !ensureTraceSpace()) {
1131 return 0;
1133 return &m_traceBuffer[m_nextTraceEntry++];
1136 void doTrace(const char *symbol, bool isFuncExit) {
1137 TraceEntry *te = nextTraceEntry();
1138 if (te != nullptr) {
1139 collectStats(symbol, isFuncExit, *te);
1143 template<class TraceIterator, class Stats>
1144 void walkTrace(TraceIterator begin, TraceIterator end, Stats& stats) {
1145 TraceWalker<TraceIterator, Stats> walker;
1146 walker.walk(begin, end, stats);
1149 virtual void writeStats(Array &ret) {
1150 TraceData my_begin;
1151 collectStats(my_begin);
1152 walkTrace(m_traceBuffer, m_traceBuffer + m_nextTraceEntry, m_stats);
1153 if (m_overflowCalls) {
1154 m_stats["(trace buffer terminated)"].count += m_overflowCalls/2;
1156 extractStats(ret, m_stats, m_flags, m_MHz);
1157 CountedTraceData allocStats;
1158 allocStats.count = 0;
1159 allocStats.peak_memory = allocStats.memory =
1160 m_nextTraceEntry * sizeof(*m_traceBuffer);
1161 returnVals(ret, "(trace buffer alloc)", allocStats, m_flags, m_MHz);
1162 if (m_flags & MeasureXhprofDisable) {
1163 CountedTraceData my_end;
1164 collectStats(my_end);
1165 my_end.count = 1;
1166 my_end.cpu -= my_begin.cpu;
1167 my_end.wall_time -= my_begin.wall_time;
1168 my_end.memory -= my_begin.memory;
1169 my_end.peak_memory -= my_begin.peak_memory;
1170 returnVals(ret, "xhprof_post_processing()", my_end, m_flags, m_MHz);
1174 TraceEntry* m_traceBuffer;
1175 int m_traceBufferSize;
1176 int m_nextTraceEntry;
1177 bool m_traceBufferFilled;
1178 int64_t m_maxTraceBuffer;
1179 int64_t m_overflowCalls;
1180 uint32_t m_flags;
1182 // Final stats, per-function per-callsite, with a count of how many
1183 // times the function was called from that callsite.
1184 class CountedTraceData : public TraceData {
1185 public:
1186 int64_t count;
1187 CountedTraceData() : count(0) { clear(); }
1189 typedef hphp_hash_map<std::string, CountedTraceData, string_hash> StatsMap;
1190 StatsMap m_stats; // outcome
1192 static pthread_mutex_t s_inUse;
1195 pthread_mutex_t TraceProfiler::s_inUse = PTHREAD_MUTEX_INITIALIZER;
1197 ///////////////////////////////////////////////////////////////////////////////
1198 // SampleProfiler
1201 * Sampling based profiler.
1203 class SampleProfiler : public Profiler {
1204 private:
1205 typedef hphp_hash_map<std::string, int64_t, string_hash> CountMap;
1206 typedef hphp_hash_map<std::string, CountMap, string_hash> StatsMap;
1207 StatsMap m_stats; // outcome
1209 public:
1210 SampleProfiler() {
1211 struct timeval now;
1212 uint64_t truncated_us;
1213 uint64_t truncated_tsc;
1215 // Init the last_sample in tsc
1216 m_last_sample_tsc = cpuCycles();
1218 // Find the microseconds that need to be truncated
1219 gettimeofday(&m_last_sample_time, 0);
1220 now = m_last_sample_time;
1221 hp_trunc_time(&m_last_sample_time, SAMPLING_INTERVAL);
1223 // Subtract truncated time from last_sample_tsc
1224 truncated_us = get_us_interval(&m_last_sample_time, &now);
1225 truncated_tsc = truncated_us * m_MHz;
1226 if (m_last_sample_tsc > truncated_tsc) {
1227 // just to be safe while subtracting unsigned ints
1228 m_last_sample_tsc -= truncated_tsc;
1231 // Convert sampling interval to ticks
1232 m_sampling_interval_tsc = SAMPLING_INTERVAL * m_MHz;
1235 virtual void beginFrameEx() {
1236 sample_check();
1239 virtual void endFrameEx() {
1240 sample_check();
1243 virtual void writeStats(Array &ret) {
1244 for (StatsMap::const_iterator iter = m_stats.begin();
1245 iter != m_stats.end(); ++iter) {
1246 Array arr;
1247 const CountMap &counts = iter->second;
1248 for (CountMap::const_iterator iterCount = counts.begin();
1249 iterCount != counts.end(); ++iterCount) {
1250 arr.set(String(iterCount->first), iterCount->second);
1252 ret.set(String(iter->first), arr);
1256 private:
1257 static const int SAMPLING_INTERVAL = 100000; // microsecs
1259 struct timeval m_last_sample_time;
1260 uint64_t m_last_sample_tsc;
1261 uint64_t m_sampling_interval_tsc;
1264 * Sample the stack. Add it to the stats_count global.
1266 * @param tv current time
1267 * @param entries func stack as linked list of hprof_entry_t
1268 * @return void
1269 * @author veeve
1271 void sample_stack() {
1272 char key[512];
1273 snprintf(key, sizeof(key), "%" PRId64 ".%06" PRId64,
1274 (int64_t)m_last_sample_time.tv_sec,
1275 (int64_t)m_last_sample_time.tv_usec);
1277 char symbol[5120];
1278 m_stack->getStack(INT_MAX, symbol, sizeof(symbol));
1279 m_stats[key][symbol] = 1;
1283 * Checks to see if it is time to sample the stack.
1284 * Calls hp_sample_stack() if its time.
1286 * @param entries func stack as linked list of hprof_entry_t
1287 * @param last_sample time the last sample was taken
1288 * @param sampling_intr sampling interval in microsecs
1289 * @return void
1290 * @author veeve
1292 void sample_check() {
1293 if (m_stack) {
1294 // While loop is to handle a single function taking a long time
1295 // and passing several sampling intervals
1296 while ((cpuCycles() - m_last_sample_tsc) > m_sampling_interval_tsc) {
1297 m_last_sample_tsc += m_sampling_interval_tsc;
1298 // HAS TO BE UPDATED BEFORE calling sample_stack
1299 incr_us_interval(&m_last_sample_time, SAMPLING_INTERVAL);
1300 sample_stack();
1306 ///////////////////////////////////////////////////////////////////////////////
1308 class ProfilerFactory : public RequestEventHandler {
1309 public:
1310 enum Level {
1311 Simple = 1,
1312 Hierarchical = 2,
1313 Memory = 3,
1314 Trace = 4,
1315 Sample = 620002, // Rockfort's zip code
1318 static bool EnableNetworkProfiler;
1320 public:
1321 ProfilerFactory() : m_profiler(nullptr) {
1324 ~ProfilerFactory() {
1325 stop();
1328 Profiler *getProfiler() {
1329 return m_profiler;
1332 virtual void requestInit() {
1335 virtual void requestShutdown() {
1336 stop();
1337 m_artificialFrameNames.reset();
1340 void start(Level level, long flags) {
1341 if (!RuntimeOption::EnableHotProfiler) {
1342 return;
1344 // This will be disabled automatically when the thread completes the request
1345 HPHP::EventHook::Enable();
1346 if (m_profiler == nullptr) {
1347 switch (level) {
1348 case Simple:
1349 m_profiler = new SimpleProfiler();
1350 break;
1351 case Hierarchical:
1352 m_profiler = new HierarchicalProfiler(flags);
1353 break;
1354 case Sample:
1355 m_profiler = new SampleProfiler();
1356 break;
1357 case Trace:
1358 m_profiler = new TraceProfiler(flags);
1359 break;
1360 default:
1361 throw_invalid_argument("level: %d", level);
1362 return;
1364 if (m_profiler->m_successful) {
1365 m_profiler->beginFrame("main()");
1366 ThreadInfo::s_threadInfo->m_profiler = m_profiler;
1367 } else {
1368 delete m_profiler;
1369 m_profiler = nullptr;
1374 Variant stop() {
1375 if (m_profiler) {
1376 m_profiler->endAllFrames();
1378 Array ret;
1379 m_profiler->writeStats(ret);
1380 delete m_profiler;
1381 m_profiler = nullptr;
1382 ThreadInfo::s_threadInfo->m_profiler = nullptr;
1384 return ret;
1386 return uninit_null();
1390 * The whole purpose to make sure "const char *" is safe to take on these
1391 * strings.
1393 void cacheString(const String& name) {
1394 m_artificialFrameNames.append(name);
1397 private:
1398 Profiler *m_profiler;
1399 Array m_artificialFrameNames;
1402 bool ProfilerFactory::EnableNetworkProfiler = false;
1404 #ifdef HOTPROFILER
1405 IMPLEMENT_STATIC_REQUEST_LOCAL(ProfilerFactory, s_factory);
1406 #endif
1408 ///////////////////////////////////////////////////////////////////////////////
1409 // main functions
1411 void f_hotprofiler_enable(int level) {
1412 #ifdef HOTPROFILER
1413 long flags = 0;
1414 if (level == ProfilerFactory::Hierarchical) {
1415 flags = TrackBuiltins;
1416 } else if (level == ProfilerFactory::Memory) {
1417 level = ProfilerFactory::Hierarchical;
1418 flags = TrackBuiltins | TrackMemory;
1420 s_factory->start((ProfilerFactory::Level)level, flags);
1421 #endif
1424 Variant f_hotprofiler_disable() {
1425 #ifdef HOTPROFILER
1426 return s_factory->stop();
1427 #else
1428 return uninit_null();
1429 #endif
1432 void f_phprof_enable(int flags /* = 0 */) {
1433 #ifdef HOTPROFILER
1434 s_factory->start(ProfilerFactory::Hierarchical, flags);
1435 #endif
1438 Variant f_phprof_disable() {
1439 #ifdef HOTPROFILER
1440 return s_factory->stop();
1441 #else
1442 return uninit_null();
1443 #endif
1446 void f_fb_setprofile(CVarRef callback) {
1447 #ifdef HOTPROFILER
1448 if (ThreadInfo::s_threadInfo->m_profiler != nullptr) {
1449 // phpprof is enabled, don't let PHP code override it
1450 return;
1452 #endif
1453 g_vmContext->m_setprofileCallback = callback;
1454 if (callback.isNull()) {
1455 HPHP::EventHook::Disable();
1456 } else {
1457 HPHP::EventHook::Enable();
1461 void f_xhprof_frame_begin(const String& name) {
1462 #ifdef HOTPROFILER
1463 Profiler *prof = ThreadInfo::s_threadInfo->m_profiler;
1464 if (prof) {
1465 s_factory->cacheString(name);
1466 prof->beginFrame(name.data());
1468 #endif
1471 void f_xhprof_frame_end() {
1472 #ifdef HOTPROFILER
1473 Profiler *prof = ThreadInfo::s_threadInfo->m_profiler;
1474 if (prof) {
1475 prof->endFrame(nullptr);
1477 #endif
1480 void f_xhprof_enable(int flags/* = 0 */,
1481 CArrRef args /* = null_array */) {
1482 #ifdef HOTPROFILER
1483 #ifdef CLOCK_THREAD_CPUTIME_ID
1484 bool missingClockGetTimeNS =
1485 Util::Vdso::ClockGetTimeNS(CLOCK_THREAD_CPUTIME_ID) == -1;
1486 #else
1487 bool missingClockGetTimeNS = true;
1488 #endif
1489 if (vtsc_syscall.num <= 0 && missingClockGetTimeNS) {
1490 flags &= ~TrackVtsc;
1492 if (flags & TrackVtsc) {
1493 flags |= TrackCPU;
1495 if (flags & XhpTrace) {
1496 s_factory->start(ProfilerFactory::Trace, flags);
1497 } else {
1498 s_factory->start(ProfilerFactory::Hierarchical, flags);
1500 #endif
1503 Variant f_xhprof_disable() {
1504 #ifdef HOTPROFILER
1505 return s_factory->stop();
1506 #else
1507 return uninit_null();
1508 #endif
1511 void f_xhprof_network_enable() {
1512 ServerStats::StartNetworkProfile();
1515 Variant f_xhprof_network_disable() {
1516 return ServerStats::EndNetworkProfile();
1519 void f_xhprof_sample_enable() {
1520 #ifdef HOTPROFILER
1521 s_factory->start(ProfilerFactory::Sample, 0);
1522 #endif
1525 Variant f_xhprof_sample_disable() {
1526 #ifdef HOTPROFILER
1527 return s_factory->stop();
1528 #else
1529 return uninit_null();
1530 #endif
1533 ///////////////////////////////////////////////////////////////////////////////
1534 // constants
1535 const int64_t k_XHPROF_FLAGS_NO_BUILTINS = TrackBuiltins;
1536 const int64_t k_XHPROF_FLAGS_CPU = TrackCPU;
1537 const int64_t k_XHPROF_FLAGS_MEMORY = TrackMemory;
1538 const int64_t k_XHPROF_FLAGS_VTSC = TrackVtsc;
1539 const int64_t k_XHPROF_FLAGS_TRACE = XhpTrace;
1540 const int64_t k_XHPROF_FLAGS_MEASURE_XHPROF_DISABLE = MeasureXhprofDisable;
1541 const int64_t k_XHPROF_FLAGS_MALLOC = TrackMalloc;
1543 ///////////////////////////////////////////////////////////////////////////////
1544 // injected code
1546 void begin_profiler_frame(Profiler *p, const char *symbol) {
1547 p->beginFrame(symbol);
1550 void end_profiler_frame(Profiler *p, const char *symbol) {
1551 p->endFrame(symbol);
1554 ///////////////////////////////////////////////////////////////////////////////