2 Copyright (c) 2014 Intel Corporation. All Rights Reserved.
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
8 * Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 * Neither the name of Intel Corporation nor the names of its
14 contributors may be used to endorse or promote products derived
15 from this software without specific prior written permission.
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 #include "offload_timer.h"
33 #ifdef __INTEL_COMPILER
34 #include <ia32intrin.h>
35 #else // __INTEL_COMPILER
36 #include <x86intrin.h>
37 #endif // __INTEL_COMPILER
39 #include "offload_host.h"
44 int timer_enabled
= 0;
48 int offload_report_level
= 0;
49 int offload_report_enabled
= 1;
51 static const int host_timer_prefix_spaces
[] = {
52 /*c_offload_host_setup_buffers*/ 0,
53 /*c_offload_host_initialize*/ 2,
54 /*c_offload_host_target_acquire*/ 2,
55 /*c_offload_host_wait_deps*/ 2,
56 /*c_offload_host_setup_buffers*/ 2,
57 /*c_offload_host_alloc_buffers*/ 4,
58 /*c_offload_host_setup_misc_data*/ 2,
59 /*c_offload_host_alloc_data_buffer*/ 4,
60 /*c_offload_host_send_pointers*/ 2,
61 /*c_offload_host_gather_inputs*/ 2,
62 /*c_offload_host_map_in_data_buffer*/ 4,
63 /*c_offload_host_unmap_in_data_buffer*/ 4,
64 /*c_offload_host_start_compute*/ 2,
65 /*c_offload_host_wait_compute*/ 2,
66 /*c_offload_host_start_buffers_reads*/ 2,
67 /*c_offload_host_scatter_outputs*/ 2,
68 /*c_offload_host_map_out_data_buffer*/ 4,
69 /*c_offload_host_unmap_out_data_buffer*/ 4,
70 /*c_offload_host_wait_buffers_reads*/ 2,
71 /*c_offload_host_destroy_buffers*/ 2
74 const static int target_timer_prefix_spaces
[] = {
75 /*c_offload_target_total_time*/ 0,
76 /*c_offload_target_descriptor_setup*/ 2,
77 /*c_offload_target_func_lookup*/ 2,
78 /*c_offload_target_func_time*/ 2,
79 /*c_offload_target_scatter_inputs*/ 4,
80 /*c_offload_target_add_buffer_refs*/ 6,
81 /*c_offload_target_compute*/ 4,
82 /*c_offload_target_gather_outputs*/ 4,
83 /*c_offload_target_release_buffer_refs*/ 6
86 static OffloadHostTimerData
* timer_data_head
;
87 static OffloadHostTimerData
* timer_data_tail
;
88 static mutex_t timer_data_mutex
;
90 static void offload_host_phase_name(std::stringstream
&ss
, int p_node
);
91 static void offload_target_phase_name(std::stringstream
&ss
, int p_node
);
93 extern void Offload_Timer_Print(void)
98 "**************************************************************";
100 ss
<< "\n\n" << stars
<< "\n";
102 ss
<< report_get_message_str(c_report_title
) << "\n";
104 double frequency
= cpu_frequency
;
106 for (OffloadHostTimerData
*pnode
= timer_data_head
;
107 pnode
!= 0; pnode
= pnode
->next
) {
109 ss
<< report_get_message_str(c_report_from_file
) << " "<< pnode
->file
;
110 ss
<< report_get_message_str(c_report_line
) << " " << pnode
->line
;
112 for (int i
= 0; i
< c_offload_host_max_phase
; i
++) {
114 offload_host_phase_name(ss
, i
);
115 ss
<< " " << std::fixed
<< std::setprecision(5);
116 ss
<< (double)pnode
->phases
[i
].total
/ frequency
<< "\n";
119 for (int i
= 0; i
< c_offload_target_max_phase
; i
++) {
121 if (pnode
->target
.frequency
!= 0) {
122 time
= (double) pnode
->target
.phases
[i
].total
/
123 (double) pnode
->target
.frequency
;
126 offload_target_phase_name(ss
, i
);
127 ss
<< " " << std::fixed
<< std::setprecision(5);
133 fprintf(stdout
, buf
.data());
137 extern void Offload_Report_Prolog(OffloadHostTimerData
*pnode
)
139 double frequency
= cpu_frequency
;
141 std::stringstream ss
;
144 // [Offload] [Mic 0] [File] file.c
145 ss
<< "[" << report_get_message_str(c_report_offload
) << "] [";
146 ss
<< report_get_message_str(c_report_mic
) << " ";
147 ss
<< pnode
->card_number
<< "] [";
148 ss
<< report_get_message_str(c_report_file
);
149 ss
<< "] " << pnode
->file
<< "\n";
151 // [Offload] [Mic 0] [Line] 1234
152 ss
<< "[" << report_get_message_str(c_report_offload
) << "] [";
153 ss
<< report_get_message_str(c_report_mic
) << " ";
154 ss
<< pnode
->card_number
<< "] [";
155 ss
<< report_get_message_str(c_report_line
);
156 ss
<< "] " << pnode
->line
<< "\n";
158 // [Offload] [Mic 0] [Tag] Tag 1
159 ss
<< "[" << report_get_message_str(c_report_offload
) << "] [";
160 ss
<< report_get_message_str(c_report_mic
) << " ";
161 ss
<< pnode
->card_number
<< "] [";
162 ss
<< report_get_message_str(c_report_tag
);
163 ss
<< "] " << report_get_message_str(c_report_tag
);
164 ss
<< " " << pnode
->offload_number
<< "\n";
167 fprintf(stdout
, buf
.data());
172 extern void Offload_Report_Epilog(OffloadHostTimerData
* timer_data
)
174 double frequency
= cpu_frequency
;
176 std::stringstream ss
;
178 OffloadHostTimerData
*pnode
= timer_data
;
183 ss
<< "[" << report_get_message_str(c_report_offload
) << "] [";
184 ss
<< report_get_message_str(c_report_host
) << "] [";
185 ss
<< report_get_message_str(c_report_tag
) << " ";
186 ss
<< pnode
->offload_number
<< "] [";
187 ss
<< report_get_message_str(c_report_cpu_time
) << "] ";
188 ss
<< std::fixed
<< std::setprecision(6);
189 ss
<< (double) pnode
->phases
[0].total
/ frequency
;
190 ss
<< report_get_message_str(c_report_seconds
) << "\n";
192 if (offload_report_level
>= OFFLOAD_REPORT_2
) {
193 ss
<< "[" << report_get_message_str(c_report_offload
) << "] [";
194 ss
<< report_get_message_str(c_report_mic
);
195 ss
<< " " << pnode
->card_number
;
196 ss
<< "] [" << report_get_message_str(c_report_tag
) << " ";
197 ss
<< pnode
->offload_number
<< "] [";
198 ss
<< report_get_message_str(c_report_cpu_to_mic_data
) << "] ";
199 ss
<< pnode
->sent_bytes
<< " ";
200 ss
<< report_get_message_str(c_report_bytes
) << "\n";
204 if (pnode
->target
.frequency
!= 0) {
205 time
= (double) pnode
->target
.phases
[0].total
/
206 (double) pnode
->target
.frequency
;
208 ss
<< "[" << report_get_message_str(c_report_offload
) << "] [";
209 ss
<< report_get_message_str(c_report_mic
) << " ";
210 ss
<< pnode
->card_number
<< "] [";
211 ss
<< report_get_message_str(c_report_tag
) << " ";
212 ss
<< pnode
->offload_number
<< "] [";
213 ss
<< report_get_message_str(c_report_mic_time
) << "] ";
214 ss
<< std::fixed
<< std::setprecision(6) << time
;
215 ss
<< report_get_message_str(c_report_seconds
) << "\n";
217 if (offload_report_level
>= OFFLOAD_REPORT_2
) {
218 ss
<< "[" << report_get_message_str(c_report_offload
) << "] [";
219 ss
<< report_get_message_str(c_report_mic
);
220 ss
<< " " << pnode
->card_number
;
221 ss
<< "] [" << report_get_message_str(c_report_tag
) << " ";
222 ss
<< pnode
->offload_number
<< "] [";
223 ss
<< report_get_message_str(c_report_mic_to_cpu_data
) << "] ";
224 ss
<< pnode
->received_bytes
<< " ";
225 ss
<< report_get_message_str(c_report_bytes
) << "\n";
230 fprintf(stdout
, buf
.data());
233 offload_report_free_data(timer_data
);
236 extern void offload_report_free_data(OffloadHostTimerData
* timer_data
)
238 OffloadHostTimerData
*pnode_last
= NULL
;
240 for (OffloadHostTimerData
*pnode
= timer_data_head
;
241 pnode
!= 0; pnode
= pnode
->next
) {
242 if (timer_data
== pnode
) {
244 pnode_last
->next
= pnode
->next
;
247 timer_data_head
= pnode
->next
;
256 static void fill_buf_with_spaces(std::stringstream
&ss
, int num
)
258 for (; num
> 0; num
--) {
263 static void offload_host_phase_name(std::stringstream
&ss
, int p_node
)
268 const int message_length
= 40;
271 str
= report_get_host_stage_str(p_node
);
272 prefix_spaces
= host_timer_prefix_spaces
[p_node
];
273 fill_buf_with_spaces(ss
, prefix_spaces
);
274 str_length
= strlen(str
);
276 tail_length
= message_length
- prefix_spaces
- str_length
;
277 tail_length
= tail_length
> 0? tail_length
: 1;
278 fill_buf_with_spaces(ss
, tail_length
);
281 static void offload_target_phase_name(std::stringstream
&ss
, int p_node
)
285 const int message_length
= 40;
289 str
= report_get_target_stage_str(p_node
);
290 prefix_spaces
= target_timer_prefix_spaces
[p_node
];
291 fill_buf_with_spaces(ss
, prefix_spaces
);
292 str_length
= strlen(str
);
294 tail_length
= message_length
- prefix_spaces
- str_length
;
295 tail_length
= (tail_length
> 0)? tail_length
: 1;
296 fill_buf_with_spaces(ss
, tail_length
);
299 void offload_timer_start(OffloadHostTimerData
* timer_data
,
300 OffloadHostPhase p_type
)
302 timer_data
->phases
[p_type
].start
= _rdtsc();
305 void offload_timer_stop(OffloadHostTimerData
* timer_data
,
306 OffloadHostPhase p_type
)
308 timer_data
->phases
[p_type
].total
+= _rdtsc() -
309 timer_data
->phases
[p_type
].start
;
312 void offload_timer_fill_target_data(OffloadHostTimerData
* timer_data
,
315 uint64_t *data
= (uint64_t*) buf
;
317 timer_data
->target
.frequency
= *data
++;
318 for (int i
= 0; i
< c_offload_target_max_phase
; i
++) {
319 timer_data
->target
.phases
[i
].total
= *data
++;
323 void offload_timer_fill_host_sdata(OffloadHostTimerData
* timer_data
,
327 timer_data
->sent_bytes
+= sent_bytes
;
331 void offload_timer_fill_host_rdata(OffloadHostTimerData
* timer_data
,
332 uint64_t received_bytes
)
335 timer_data
->received_bytes
+= received_bytes
;
339 void offload_timer_fill_host_mic_num(OffloadHostTimerData
* timer_data
,
343 timer_data
->card_number
= card_number
;
347 OffloadHostTimerData
* offload_timer_init(const char *file
, int line
)
349 static bool first_time
= true;
350 OffloadHostTimerData
* timer_data
= NULL
;
352 timer_data_mutex
.lock();
355 (offload_report_level
&& offload_report_enabled
)) {
356 timer_data
= (OffloadHostTimerData
*)
357 OFFLOAD_MALLOC(sizeof(OffloadHostTimerData
), 0);
358 memset(timer_data
, 0, sizeof(OffloadHostTimerData
));
360 timer_data
->offload_number
= OFFLOAD_DEBUG_INCR_OFLD_NUM() - 1;
362 if (timer_data_head
== 0) {
363 timer_data_head
= timer_data
;
364 timer_data_tail
= timer_data
;
367 timer_data_tail
->next
= timer_data
;
368 timer_data_tail
= timer_data
;
371 timer_data
->file
= file
;
372 timer_data
->line
= line
;
375 timer_data_mutex
.unlock();
379 #endif // TIMING_SUPPORT