Graphing tweaks.
[beedb.git] / perf / perftest.cc
blobcd3a751d0617af944c3c2204466f4b308dd464fb
1 /*
2 Copyright 2009 Kristian Nielsen
4 This file is part of BeeDB.
6 Foobar is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 2 of the License, or
9 (at your option) any later version.
11 Foobar is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Foobar. If not, see <http://www.gnu.org/licenses/>.
22 Helper code for BeeDB performance regression testing.
26 Need to put this early, as it does not work after some other system header
27 might include inttypes.h
29 #include "port/format_macros.h"
31 #include <errno.h>
32 #include <sys/types.h>
33 #include <unistd.h>
34 #include <sys/time.h>
35 #include <time.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <stdio.h>
40 #include "beedb.h"
41 #include "perftest.h"
44 For now we are based on perfmon to do our stuff.
46 We might want later to support other methods or at least fallback to a
47 simple portable time measurement. But for now its important to get some
48 results.
50 Initially there are even Core 2 specific parts in the list of performance
51 counters to use, though that should be easy to extend to other CPUs later.
54 #include <perfmon/pfmlib.h>
55 #include <perfmon/perfmon.h>
58 perftest::perftest()
60 static bool perfmon_inited= false;
61 if (!perfmon_inited)
63 int ret= pfm_initialize();
64 if (ret != PFMLIB_SUCCESS)
65 fatal_error("Cannot initialize libperfmon: %s\n", pfm_strerror(ret));
66 perfmon_inited= true;
69 setup_perfmon();
72 perftest::~perftest()
74 close(perfmon.ctx_fd);
77 void
78 perftest::run_test(test *t, uint64_t loops)
80 printf("T: %s", t->text);
81 if (t->variant != NULL)
82 printf(" {%s}", t->variant);
83 if (t->param1 != NULL)
85 printf(" [%s", t->param1);
86 if (t->param2 != NULL)
87 printf(";%s", t->param2);
88 printf("]");
90 printf(" I=%" PRIu64, loops);
91 if (t->workunits != 0)
92 printf(" U=%" PRIu64, t->workunits);
93 printf("\n");
95 /* First run it without timing, to warm up caches etc. */
96 t->prepare(-1);
97 t->run(loops);
99 /* Now run the test over all the performance counter runs. */
100 for (int run= 0; run < num_runs; run++)
102 t->prepare(run);
103 t->run(loops);
106 printf(" Seconds: %.4f", t->elapsed_time[0]);
107 for (int run= 1; run < num_runs; run++)
108 printf(" %+.4f", t->elapsed_time[run] - t->elapsed_time[0]);
109 printf("\n");
110 report_perfmon();
113 double
114 perftest::gettime(void)
116 struct timeval tv;
117 if (gettimeofday(&tv, NULL))
119 perror("gettimeofday()");
120 exit(1);
122 return (double)tv.tv_sec + (double)tv.tv_usec*1e-6;
125 void
126 perftest::fatal_error(const char *format, ...)
128 va_list ap;
129 va_start(ap, format);
130 fatal_error(format, ap);
131 va_end(ap);
134 void
135 perftest::fatal_error(const char *format, va_list ap)
137 vfprintf(stderr, format, ap);
138 exit(1);
141 void
142 perftest::prepare_perfmon_event(pfmlib_input_param_t *inp,
143 const char *event_name, int idx)
145 int ret= pfm_find_full_event(event_name, &(inp->pfp_events[idx]));
146 if (ret != PFMLIB_SUCCESS)
147 fatal_error("Failed to prepare event '%s': %s\n",
148 event_name, pfm_strerror(ret));
149 if (inp->pfp_event_count <= idx)
150 inp->pfp_event_count= idx+1;
153 static struct {
154 const char *event0;
155 const char *event1;
156 } core2_events[]= {
157 "RS_UOPS_DISPATCHED", "UOPS_RETIRED:ANY",
158 "INST_RETIRED:LOADS", "INST_RETIRED:STORES",
159 "BRANCH_INSTRUCTIONS_RETIRED", "MISPREDICTED_BRANCH_RETIRED",
160 "MEM_LOAD_RETIRED:L1D_LINE_MISS", "CYCLES_L1I_MEM_STALLED",
161 "MEM_LOAD_RETIRED:L2_LINE_MISS", "RS_UOPS_DISPATCHED_NONE",
162 "MEM_LOAD_RETIRED:DTLB_MISS", "L1D_PREFETCH:REQUESTS"
165 void
166 perftest::setup_perfmon()
168 /* Compute performance counter config for all runs. */
169 for (int run= 0; run < num_runs; run++)
171 pfmlib_input_param_t *inp= &(perfmon.run[run].inp);
172 memset(inp, 0, sizeof(*inp));
173 prepare_perfmon_event(inp, "UNHALTED_CORE_CYCLES", 0);
174 prepare_perfmon_event(inp, "UNHALTED_REFERENCE_CYCLES", 1);
175 prepare_perfmon_event(inp, "INSTRUCTIONS_RETIRED", 2);
176 prepare_perfmon_event(inp, core2_events[run].event0, 3);
177 prepare_perfmon_event(inp, core2_events[run].event1, 4);
180 ToDo: This sets us to count only while in user mode (I think).
181 We may want to make this configurable per-test or something.
183 inp->pfp_dfl_plm= PFM_PLM3;
185 pfmlib_output_param_t *outp= &(perfmon.run[run].outp);
186 memset(outp, 0, sizeof(*outp));
187 int ret= pfm_dispatch_events(inp, NULL, outp, NULL);
188 if (ret != PFMLIB_SUCCESS)
189 fatal_error("Perfmon dispatch failed: %s\n", pfm_strerror(ret));
191 pfarg_pmc_t *pc= &(perfmon.run[run].pc[0]);
192 memset(pc, 0, sizeof(*pc));
193 for (int i= 0; i < outp->pfp_pmc_count; i++)
195 pc[i].reg_num= outp->pfp_pmcs[i].reg_num;
196 pc[i].reg_value= outp->pfp_pmcs[i].reg_value;
198 pfarg_pmd_t *pd= &(perfmon.run[run].pd[0]);
199 memset(pd, 0, sizeof(*pd));
200 for (int i= 0; i < outp->pfp_pmd_count; i++)
202 pd[i].reg_num= outp->pfp_pmds[i].reg_num;
207 Prepare and load the perfmon context.
208 The performance counter registers are not programmed until start_perfmon().
210 memset(&perfmon.ctx, 0, sizeof(perfmon.ctx));
211 perfmon.ctx_fd= pfm_create_context(&perfmon.ctx, NULL, NULL, 0);
212 if (perfmon.ctx_fd == -1)
214 if (errno == ENOSYS)
215 fatal_error("Failed to create perfmon context due to ENOSYS (this "
216 "supposedly means the kernel does not have perfmon support"
217 "compiled in).\n");
218 else
219 fatal_error("Failed to create perfmon context: %s\n", strerror(errno));
222 pfarg_load_t load_args;
223 memset(&load_args, 0, sizeof(load_args));
224 load_args.load_pid= getpid();
225 int ret= pfm_load_context(perfmon.ctx_fd, &load_args);
226 if (ret)
227 fatal_error("pfm_load_context(%d) failed: ret=%d errno=%d (%s)\n",
228 (int)load_args.load_pid, ret, errno, strerror(errno));
232 perftest::test::test(perftest *tester, const char *text, const char *variant,
233 const char *param1, const char *param2, uint64_t workunits)
234 : tester(tester), text(text), variant(variant),
235 param1(param1), param2(param2), workunits(workunits)
239 void
240 perftest::test::fatal_error(const char *format, ...)
242 va_list ap;
243 va_start(ap, format);
244 tester->fatal_error(format, ap);
245 va_end(ap);
248 void
249 perftest::test::start()
251 if (current_run < 0)
252 return;
254 start_time= gettime();
255 tester->start_perfmon(current_run);
258 void
259 perftest::test::record_time(const char *text)
261 if (current_run < 0)
262 return;
264 tester->record_perfmon(current_run);
265 elapsed_time[current_run]= gettime() - start_time;
266 tester->record_perfmon_not_time_critical(current_run);
269 void
270 perftest::start_perfmon(int run)
273 Write the performance counters with the control and data for this run.
274 The values of the data counters are reset to zero.
276 pfmlib_output_param_t *outp= &(perfmon.run[run].outp);
277 int ret= pfm_write_pmcs(perfmon.ctx_fd, perfmon.run[run].pc,
278 outp->pfp_pmc_count);
279 if (ret)
280 fatal_error("pfm_write_pmcs() error, ret=%d errno=%d\n", ret, errno);
281 for (int i= 0; i < outp->pfp_pmd_count; i++)
282 perfmon.run[run].pd[i].reg_value= 0;
283 ret= pfm_write_pmds(perfmon.ctx_fd, perfmon.run[run].pd,
284 outp->pfp_pmd_count);
285 if (ret)
286 fatal_error("pfm_write_pmds() error, ret=%d errno=%d\n", ret, errno);
288 ret= pfm_start(perfmon.ctx_fd, NULL);
289 if (ret)
290 fatal_error("pfm_start(%d, NULL) failed: ret=%d errno=%d\n",
291 perfmon.ctx_fd, ret, errno);
294 void
295 perftest::record_perfmon(int run)
297 int ret= pfm_stop(perfmon.ctx_fd);
298 if (ret)
299 fatal_error("pfm_stop(%d) failed: ret=%d errno=%d\n",
300 perfmon.ctx_fd, ret, errno);
304 We split this out from record_perfmon(), as it is not time critical (once
305 the counters are stopped, the read out can happen at any point after with no
306 difference in result), so other time critical tasks (ie. stopping wallclock)
307 can be done first.
309 void
310 perftest::record_perfmon_not_time_critical(int run)
312 int ret= pfm_read_pmds(perfmon.ctx_fd, perfmon.run[run].pd,
313 perfmon.run[run].inp.pfp_event_count);
314 if (ret)
315 fatal_error("pfm_read_pdms(%d) failed: ret=%d errno=%d (%s)\n",
316 perfmon.ctx_fd, ret, errno, strerror(errno));
319 static int
320 get_num_digits_64(uint64_t value)
322 int digits= 1;
323 uint64_t x= 10;
324 while (value >= x)
326 digits++;
327 if (x > UINT64_MAX/10)
328 break;
329 x*= 10;
331 return digits;
334 void
335 perftest::report_perfmon()
337 size_t name_len;
338 pfm_get_max_event_name_len(&name_len);
339 char *name = new char[name_len+1];
340 if (name == NULL)
341 fatal_error("Out of memory allocating %ld bytes\n", (long)(name_len+1));
343 /* To get a nicely aligned output, we first get the lengths of everything. */
344 int counter_name_max= 1;
345 int counter_value_max= 1;
346 int counter_delta_max[num_runs - 1];
347 for (int run= 1; run < num_runs; run++)
348 counter_delta_max[run - 1]= 0;
350 for (int run= 0; run < num_runs; run++)
352 for (int i= 0; i < perfmon.run[run].inp.pfp_event_count; i++)
354 pfm_get_full_event_name(&perfmon.run[run].inp.pfp_events[i],
355 name, name_len+1);
356 int len= strlen(name);
357 if (len > counter_name_max)
358 counter_name_max= len;
360 For the fixed counters, which are the same in every run, we output
361 the value only in the first run, and deltas for the rest.
363 if (run == 0 || i >= num_fixed_counters)
365 len= get_num_digits_64(perfmon.run[run].pd[i].reg_value);
366 if (len > counter_value_max)
367 counter_value_max= len;
369 else
371 uint64_t first_value= perfmon.run[0].pd[i].reg_value;
372 uint64_t this_value= perfmon.run[run].pd[i].reg_value;
373 uint64_t delta;
374 if (first_value >= this_value)
375 delta= first_value - this_value;
376 else
377 delta= this_value - first_value;
378 len= get_num_digits_64(delta);
379 if (len > counter_delta_max[run - 1])
380 counter_delta_max[run - 1]= len;
385 for (int i= 0; i < num_fixed_counters; i++)
387 pfm_get_full_event_name(&perfmon.run[0].inp.pfp_events[i],
388 name, name_len+1);
389 uint64_t first_value= perfmon.run[0].pd[i].reg_value;
390 printf(" %s %-*s %*" PRIu64, (i == 0 ? "F: " : " "),
391 counter_name_max, name,
392 counter_value_max, first_value);
393 for (int run= 1; run < num_runs; run++)
395 int64_t delta;
396 uint64_t value= perfmon.run[run].pd[i].reg_value;
397 if (value > first_value)
398 delta= (int64_t)(value - first_value);
399 else
400 delta= -(int64_t)(first_value - value);
401 printf(" %+*" PRIi64, 1 + counter_delta_max[run - 1], delta);
403 printf("\n");
406 for (int run= 0; run < num_runs; run++)
408 for (int i= num_fixed_counters; i<perfmon.run[run].inp.pfp_event_count; i++)
410 pfm_get_full_event_name(&perfmon.run[run].inp.pfp_events[i],
411 name, name_len+1);
412 uint64_t value= perfmon.run[run].pd[i].reg_value;
413 if (i == num_fixed_counters)
414 printf(" V%d: ", run + 1);
415 else
416 printf(" ");
417 printf("%-*s %*" PRIu64 "\n",
418 counter_name_max, name, counter_value_max, value);
422 delete[] name;
425 perftest::test::~test()