2 Copyright 2009 Kristian Nielsen
4 This file is part of BeeDB.
6 Foobar is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 2 of the License, or
9 (at your option) any later version.
11 Foobar is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Foobar. If not, see <http://www.gnu.org/licenses/>.
22 Helper code for BeeDB performance regression testing.
26 Need to put this early, as it does not work after some other system header
27 might include inttypes.h
29 #include "port/format_macros.h"
32 #include <sys/types.h>
44 For now we are based on perfmon to do our stuff.
46 We might want later to support other methods or at least fallback to a
47 simple portable time measurement. But for now its important to get some
50 Initially there are even Core 2 specific parts in the list of performance
51 counters to use, though that should be easy to extend to other CPUs later.
54 #include <perfmon/pfmlib.h>
55 #include <perfmon/perfmon.h>
60 static bool perfmon_inited
= false;
63 int ret
= pfm_initialize();
64 if (ret
!= PFMLIB_SUCCESS
)
65 fatal_error("Cannot initialize libperfmon: %s\n", pfm_strerror(ret
));
74 close(perfmon
.ctx_fd
);
78 perftest::run_test(test
*t
, uint64_t loops
)
80 printf("T: %s", t
->text
);
81 if (t
->variant
!= NULL
)
82 printf(" {%s}", t
->variant
);
83 if (t
->param1
!= NULL
)
85 printf(" [%s", t
->param1
);
86 if (t
->param2
!= NULL
)
87 printf(";%s", t
->param2
);
90 printf(" I=%" PRIu64
, loops
);
91 if (t
->workunits
!= 0)
92 printf(" U=%" PRIu64
, t
->workunits
);
95 /* First run it without timing, to warm up caches etc. */
99 /* Now run the test over all the performance counter runs. */
100 for (int run
= 0; run
< num_runs
; run
++)
106 printf(" Seconds: %.4f", t
->elapsed_time
[0]);
107 for (int run
= 1; run
< num_runs
; run
++)
108 printf(" %+.4f", t
->elapsed_time
[run
] - t
->elapsed_time
[0]);
114 perftest::gettime(void)
117 if (gettimeofday(&tv
, NULL
))
119 perror("gettimeofday()");
122 return (double)tv
.tv_sec
+ (double)tv
.tv_usec
*1e-6;
126 perftest::fatal_error(const char *format
, ...)
129 va_start(ap
, format
);
130 fatal_error(format
, ap
);
135 perftest::fatal_error(const char *format
, va_list ap
)
137 vfprintf(stderr
, format
, ap
);
142 perftest::prepare_perfmon_event(pfmlib_input_param_t
*inp
,
143 const char *event_name
, int idx
)
145 int ret
= pfm_find_full_event(event_name
, &(inp
->pfp_events
[idx
]));
146 if (ret
!= PFMLIB_SUCCESS
)
147 fatal_error("Failed to prepare event '%s': %s\n",
148 event_name
, pfm_strerror(ret
));
149 if (inp
->pfp_event_count
<= idx
)
150 inp
->pfp_event_count
= idx
+1;
157 "RS_UOPS_DISPATCHED", "UOPS_RETIRED:ANY",
158 "INST_RETIRED:LOADS", "INST_RETIRED:STORES",
159 "BRANCH_INSTRUCTIONS_RETIRED", "MISPREDICTED_BRANCH_RETIRED",
160 "MEM_LOAD_RETIRED:L1D_LINE_MISS", "CYCLES_L1I_MEM_STALLED",
161 "MEM_LOAD_RETIRED:L2_LINE_MISS", "RS_UOPS_DISPATCHED_NONE",
162 "MEM_LOAD_RETIRED:DTLB_MISS", "L1D_PREFETCH:REQUESTS"
166 perftest::setup_perfmon()
168 /* Compute performance counter config for all runs. */
169 for (int run
= 0; run
< num_runs
; run
++)
171 pfmlib_input_param_t
*inp
= &(perfmon
.run
[run
].inp
);
172 memset(inp
, 0, sizeof(*inp
));
173 prepare_perfmon_event(inp
, "UNHALTED_CORE_CYCLES", 0);
174 prepare_perfmon_event(inp
, "UNHALTED_REFERENCE_CYCLES", 1);
175 prepare_perfmon_event(inp
, "INSTRUCTIONS_RETIRED", 2);
176 prepare_perfmon_event(inp
, core2_events
[run
].event0
, 3);
177 prepare_perfmon_event(inp
, core2_events
[run
].event1
, 4);
180 ToDo: This sets us to count only while in user mode (I think).
181 We may want to make this configurable per-test or something.
183 inp
->pfp_dfl_plm
= PFM_PLM3
;
185 pfmlib_output_param_t
*outp
= &(perfmon
.run
[run
].outp
);
186 memset(outp
, 0, sizeof(*outp
));
187 int ret
= pfm_dispatch_events(inp
, NULL
, outp
, NULL
);
188 if (ret
!= PFMLIB_SUCCESS
)
189 fatal_error("Perfmon dispatch failed: %s\n", pfm_strerror(ret
));
191 pfarg_pmc_t
*pc
= &(perfmon
.run
[run
].pc
[0]);
192 memset(pc
, 0, sizeof(*pc
));
193 for (int i
= 0; i
< outp
->pfp_pmc_count
; i
++)
195 pc
[i
].reg_num
= outp
->pfp_pmcs
[i
].reg_num
;
196 pc
[i
].reg_value
= outp
->pfp_pmcs
[i
].reg_value
;
198 pfarg_pmd_t
*pd
= &(perfmon
.run
[run
].pd
[0]);
199 memset(pd
, 0, sizeof(*pd
));
200 for (int i
= 0; i
< outp
->pfp_pmd_count
; i
++)
202 pd
[i
].reg_num
= outp
->pfp_pmds
[i
].reg_num
;
207 Prepare and load the perfmon context.
208 The performance counter registers are not programmed until start_perfmon().
210 memset(&perfmon
.ctx
, 0, sizeof(perfmon
.ctx
));
211 perfmon
.ctx_fd
= pfm_create_context(&perfmon
.ctx
, NULL
, NULL
, 0);
212 if (perfmon
.ctx_fd
== -1)
215 fatal_error("Failed to create perfmon context due to ENOSYS (this "
216 "supposedly means the kernel does not have perfmon support"
219 fatal_error("Failed to create perfmon context: %s\n", strerror(errno
));
222 pfarg_load_t load_args
;
223 memset(&load_args
, 0, sizeof(load_args
));
224 load_args
.load_pid
= getpid();
225 int ret
= pfm_load_context(perfmon
.ctx_fd
, &load_args
);
227 fatal_error("pfm_load_context(%d) failed: ret=%d errno=%d (%s)\n",
228 (int)load_args
.load_pid
, ret
, errno
, strerror(errno
));
232 perftest::test::test(perftest
*tester
, const char *text
, const char *variant
,
233 const char *param1
, const char *param2
, uint64_t workunits
)
234 : tester(tester
), text(text
), variant(variant
),
235 param1(param1
), param2(param2
), workunits(workunits
)
240 perftest::test::fatal_error(const char *format
, ...)
243 va_start(ap
, format
);
244 tester
->fatal_error(format
, ap
);
249 perftest::test::start()
254 start_time
= gettime();
255 tester
->start_perfmon(current_run
);
259 perftest::test::record_time(const char *text
)
264 tester
->record_perfmon(current_run
);
265 elapsed_time
[current_run
]= gettime() - start_time
;
266 tester
->record_perfmon_not_time_critical(current_run
);
270 perftest::start_perfmon(int run
)
273 Write the performance counters with the control and data for this run.
274 The values of the data counters are reset to zero.
276 pfmlib_output_param_t
*outp
= &(perfmon
.run
[run
].outp
);
277 int ret
= pfm_write_pmcs(perfmon
.ctx_fd
, perfmon
.run
[run
].pc
,
278 outp
->pfp_pmc_count
);
280 fatal_error("pfm_write_pmcs() error, ret=%d errno=%d\n", ret
, errno
);
281 for (int i
= 0; i
< outp
->pfp_pmd_count
; i
++)
282 perfmon
.run
[run
].pd
[i
].reg_value
= 0;
283 ret
= pfm_write_pmds(perfmon
.ctx_fd
, perfmon
.run
[run
].pd
,
284 outp
->pfp_pmd_count
);
286 fatal_error("pfm_write_pmds() error, ret=%d errno=%d\n", ret
, errno
);
288 ret
= pfm_start(perfmon
.ctx_fd
, NULL
);
290 fatal_error("pfm_start(%d, NULL) failed: ret=%d errno=%d\n",
291 perfmon
.ctx_fd
, ret
, errno
);
295 perftest::record_perfmon(int run
)
297 int ret
= pfm_stop(perfmon
.ctx_fd
);
299 fatal_error("pfm_stop(%d) failed: ret=%d errno=%d\n",
300 perfmon
.ctx_fd
, ret
, errno
);
304 We split this out from record_perfmon(), as it is not time critical (once
305 the counters are stopped, the read out can happen at any point after with no
306 difference in result), so other time critical tasks (ie. stopping wallclock)
310 perftest::record_perfmon_not_time_critical(int run
)
312 int ret
= pfm_read_pmds(perfmon
.ctx_fd
, perfmon
.run
[run
].pd
,
313 perfmon
.run
[run
].inp
.pfp_event_count
);
315 fatal_error("pfm_read_pdms(%d) failed: ret=%d errno=%d (%s)\n",
316 perfmon
.ctx_fd
, ret
, errno
, strerror(errno
));
320 get_num_digits_64(uint64_t value
)
327 if (x
> UINT64_MAX
/10)
335 perftest::report_perfmon()
338 pfm_get_max_event_name_len(&name_len
);
339 char *name
= new char[name_len
+1];
341 fatal_error("Out of memory allocating %ld bytes\n", (long)(name_len
+1));
343 /* To get a nicely aligned output, we first get the lengths of everything. */
344 int counter_name_max
= 1;
345 int counter_value_max
= 1;
346 int counter_delta_max
[num_runs
- 1];
347 for (int run
= 1; run
< num_runs
; run
++)
348 counter_delta_max
[run
- 1]= 0;
350 for (int run
= 0; run
< num_runs
; run
++)
352 for (int i
= 0; i
< perfmon
.run
[run
].inp
.pfp_event_count
; i
++)
354 pfm_get_full_event_name(&perfmon
.run
[run
].inp
.pfp_events
[i
],
356 int len
= strlen(name
);
357 if (len
> counter_name_max
)
358 counter_name_max
= len
;
360 For the fixed counters, which are the same in every run, we output
361 the value only in the first run, and deltas for the rest.
363 if (run
== 0 || i
>= num_fixed_counters
)
365 len
= get_num_digits_64(perfmon
.run
[run
].pd
[i
].reg_value
);
366 if (len
> counter_value_max
)
367 counter_value_max
= len
;
371 uint64_t first_value
= perfmon
.run
[0].pd
[i
].reg_value
;
372 uint64_t this_value
= perfmon
.run
[run
].pd
[i
].reg_value
;
374 if (first_value
>= this_value
)
375 delta
= first_value
- this_value
;
377 delta
= this_value
- first_value
;
378 len
= get_num_digits_64(delta
);
379 if (len
> counter_delta_max
[run
- 1])
380 counter_delta_max
[run
- 1]= len
;
385 for (int i
= 0; i
< num_fixed_counters
; i
++)
387 pfm_get_full_event_name(&perfmon
.run
[0].inp
.pfp_events
[i
],
389 uint64_t first_value
= perfmon
.run
[0].pd
[i
].reg_value
;
390 printf(" %s %-*s %*" PRIu64
, (i
== 0 ? "F: " : " "),
391 counter_name_max
, name
,
392 counter_value_max
, first_value
);
393 for (int run
= 1; run
< num_runs
; run
++)
396 uint64_t value
= perfmon
.run
[run
].pd
[i
].reg_value
;
397 if (value
> first_value
)
398 delta
= (int64_t)(value
- first_value
);
400 delta
= -(int64_t)(first_value
- value
);
401 printf(" %+*" PRIi64
, 1 + counter_delta_max
[run
- 1], delta
);
406 for (int run
= 0; run
< num_runs
; run
++)
408 for (int i
= num_fixed_counters
; i
<perfmon
.run
[run
].inp
.pfp_event_count
; i
++)
410 pfm_get_full_event_name(&perfmon
.run
[run
].inp
.pfp_events
[i
],
412 uint64_t value
= perfmon
.run
[run
].pd
[i
].reg_value
;
413 if (i
== num_fixed_counters
)
414 printf(" V%d: ", run
+ 1);
417 printf("%-*s %*" PRIu64
"\n",
418 counter_name_max
, name
, counter_value_max
, value
);
425 perftest::test::~test()