* gcc.dg/20150120-1.c (dg-final): Cleanup original tree dump.
[official-gcc.git] / liboffloadmic / runtime / offload_timer_host.cpp
blob719af887abc690d548482f216d9041e786027b47
1 /*
2 Copyright (c) 2014 Intel Corporation. All Rights Reserved.
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
8 * Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 * Neither the name of Intel Corporation nor the names of its
14 contributors may be used to endorse or promote products derived
15 from this software without specific prior written permission.
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 #include "offload_timer.h"
33 #ifdef __INTEL_COMPILER
34 #include <ia32intrin.h>
35 #else // __INTEL_COMPILER
36 #include <x86intrin.h>
37 #endif // __INTEL_COMPILER
39 #include "offload_host.h"
40 #include <sstream>
41 #include <iostream>
42 #include <iomanip>
44 int timer_enabled = 0;
46 #ifdef TIMING_SUPPORT
48 int offload_report_level = 0;
49 int offload_report_enabled = 1;
51 static const int host_timer_prefix_spaces[] = {
52 /*c_offload_host_setup_buffers*/ 0,
53 /*c_offload_host_initialize*/ 2,
54 /*c_offload_host_target_acquire*/ 2,
55 /*c_offload_host_wait_deps*/ 2,
56 /*c_offload_host_setup_buffers*/ 2,
57 /*c_offload_host_alloc_buffers*/ 4,
58 /*c_offload_host_setup_misc_data*/ 2,
59 /*c_offload_host_alloc_data_buffer*/ 4,
60 /*c_offload_host_send_pointers*/ 2,
61 /*c_offload_host_gather_inputs*/ 2,
62 /*c_offload_host_map_in_data_buffer*/ 4,
63 /*c_offload_host_unmap_in_data_buffer*/ 4,
64 /*c_offload_host_start_compute*/ 2,
65 /*c_offload_host_wait_compute*/ 2,
66 /*c_offload_host_start_buffers_reads*/ 2,
67 /*c_offload_host_scatter_outputs*/ 2,
68 /*c_offload_host_map_out_data_buffer*/ 4,
69 /*c_offload_host_unmap_out_data_buffer*/ 4,
70 /*c_offload_host_wait_buffers_reads*/ 2,
71 /*c_offload_host_destroy_buffers*/ 2
74 const static int target_timer_prefix_spaces[] = {
75 /*c_offload_target_total_time*/ 0,
76 /*c_offload_target_descriptor_setup*/ 2,
77 /*c_offload_target_func_lookup*/ 2,
78 /*c_offload_target_func_time*/ 2,
79 /*c_offload_target_scatter_inputs*/ 4,
80 /*c_offload_target_add_buffer_refs*/ 6,
81 /*c_offload_target_compute*/ 4,
82 /*c_offload_target_gather_outputs*/ 4,
83 /*c_offload_target_release_buffer_refs*/ 6
86 static OffloadHostTimerData* timer_data_head;
87 static OffloadHostTimerData* timer_data_tail;
88 static mutex_t timer_data_mutex;
90 static void offload_host_phase_name(std::stringstream &ss, int p_node);
91 static void offload_target_phase_name(std::stringstream &ss, int p_node);
93 extern void Offload_Timer_Print(void)
95 std::string buf;
96 std::stringstream ss;
97 const char *stars =
98 "**************************************************************";
100 ss << "\n\n" << stars << "\n";
101 ss << " ";
102 ss << report_get_message_str(c_report_title) << "\n";
103 ss << stars << "\n";
104 double frequency = cpu_frequency;
106 for (OffloadHostTimerData *pnode = timer_data_head;
107 pnode != 0; pnode = pnode->next) {
108 ss << " ";
109 ss << report_get_message_str(c_report_from_file) << " "<< pnode->file;
110 ss << report_get_message_str(c_report_line) << " " << pnode->line;
111 ss << "\n";
112 for (int i = 0; i < c_offload_host_max_phase ; i++) {
113 ss << " ";
114 offload_host_phase_name(ss, i);
115 ss << " " << std::fixed << std::setprecision(5);
116 ss << (double)pnode->phases[i].total / frequency << "\n";
119 for (int i = 0; i < c_offload_target_max_phase ; i++) {
120 double time = 0;
121 if (pnode->target.frequency != 0) {
122 time = (double) pnode->target.phases[i].total /
123 (double) pnode->target.frequency;
125 ss << " ";
126 offload_target_phase_name(ss, i);
127 ss << " " << std::fixed << std::setprecision(5);
128 ss << time << "\n";
132 buf = ss.str();
133 fprintf(stdout, buf.data());
134 fflush(stdout);
137 extern void Offload_Report_Prolog(OffloadHostTimerData *pnode)
139 double frequency = cpu_frequency;
140 std::string buf;
141 std::stringstream ss;
143 if (pnode) {
144 // [Offload] [Mic 0] [File] file.c
145 ss << "[" << report_get_message_str(c_report_offload) << "] [";
146 ss << report_get_message_str(c_report_mic) << " ";
147 ss << pnode->card_number << "] [";
148 ss << report_get_message_str(c_report_file);
149 ss << "] " << pnode->file << "\n";
151 // [Offload] [Mic 0] [Line] 1234
152 ss << "[" << report_get_message_str(c_report_offload) << "] [";
153 ss << report_get_message_str(c_report_mic) << " ";
154 ss << pnode->card_number << "] [";
155 ss << report_get_message_str(c_report_line);
156 ss << "] " << pnode->line << "\n";
158 // [Offload] [Mic 0] [Tag] Tag 1
159 ss << "[" << report_get_message_str(c_report_offload) << "] [";
160 ss << report_get_message_str(c_report_mic) << " ";
161 ss << pnode->card_number << "] [";
162 ss << report_get_message_str(c_report_tag);
163 ss << "] " << report_get_message_str(c_report_tag);
164 ss << " " << pnode->offload_number << "\n";
166 buf = ss.str();
167 fprintf(stdout, buf.data());
168 fflush(stdout);
172 extern void Offload_Report_Epilog(OffloadHostTimerData * timer_data)
174 double frequency = cpu_frequency;
175 std::string buf;
176 std::stringstream ss;
178 OffloadHostTimerData *pnode = timer_data;
180 if (!pnode) {
181 return;
183 ss << "[" << report_get_message_str(c_report_offload) << "] [";
184 ss << report_get_message_str(c_report_host) << "] [";
185 ss << report_get_message_str(c_report_tag) << " ";
186 ss << pnode->offload_number << "] [";
187 ss << report_get_message_str(c_report_cpu_time) << "] ";
188 ss << std::fixed << std::setprecision(6);
189 ss << (double) pnode->phases[0].total / frequency;
190 ss << report_get_message_str(c_report_seconds) << "\n";
192 if (offload_report_level >= OFFLOAD_REPORT_2) {
193 ss << "[" << report_get_message_str(c_report_offload) << "] [";
194 ss << report_get_message_str(c_report_mic);
195 ss << " " << pnode->card_number;
196 ss << "] [" << report_get_message_str(c_report_tag) << " ";
197 ss << pnode->offload_number << "] [";
198 ss << report_get_message_str(c_report_cpu_to_mic_data) << "] ";
199 ss << pnode->sent_bytes << " ";
200 ss << report_get_message_str(c_report_bytes) << "\n";
203 double time = 0;
204 if (pnode->target.frequency != 0) {
205 time = (double) pnode->target.phases[0].total /
206 (double) pnode->target.frequency;
208 ss << "[" << report_get_message_str(c_report_offload) << "] [";
209 ss << report_get_message_str(c_report_mic) << " ";
210 ss << pnode->card_number<< "] [";
211 ss << report_get_message_str(c_report_tag) << " ";
212 ss << pnode->offload_number << "] [";
213 ss << report_get_message_str(c_report_mic_time) << "] ";
214 ss << std::fixed << std::setprecision(6) << time;
215 ss << report_get_message_str(c_report_seconds) << "\n";
217 if (offload_report_level >= OFFLOAD_REPORT_2) {
218 ss << "[" << report_get_message_str(c_report_offload) << "] [";
219 ss << report_get_message_str(c_report_mic);
220 ss << " " << pnode->card_number;
221 ss << "] [" << report_get_message_str(c_report_tag) << " ";
222 ss << pnode->offload_number << "] [";
223 ss << report_get_message_str(c_report_mic_to_cpu_data) << "] ";
224 ss << pnode->received_bytes << " ";
225 ss << report_get_message_str(c_report_bytes) << "\n";
227 ss << "\n";
229 buf = ss.str();
230 fprintf(stdout, buf.data());
231 fflush(stdout);
233 offload_report_free_data(timer_data);
236 extern void offload_report_free_data(OffloadHostTimerData * timer_data)
238 OffloadHostTimerData *pnode_last = NULL;
240 for (OffloadHostTimerData *pnode = timer_data_head;
241 pnode != 0; pnode = pnode->next) {
242 if (timer_data == pnode) {
243 if (pnode_last) {
244 pnode_last->next = pnode->next;
246 else {
247 timer_data_head = pnode->next;
249 OFFLOAD_FREE(pnode);
250 break;
252 pnode_last = pnode;
256 static void fill_buf_with_spaces(std::stringstream &ss, int num)
258 for (; num > 0; num--) {
259 ss << " ";
263 static void offload_host_phase_name(std::stringstream &ss, int p_node)
265 int prefix_spaces;
266 int str_length;
267 int tail_length;
268 const int message_length = 40;
269 char const *str;
271 str = report_get_host_stage_str(p_node);
272 prefix_spaces = host_timer_prefix_spaces[p_node];
273 fill_buf_with_spaces(ss, prefix_spaces);
274 str_length = strlen(str);
275 ss << str;
276 tail_length = message_length - prefix_spaces - str_length;
277 tail_length = tail_length > 0? tail_length : 1;
278 fill_buf_with_spaces(ss, tail_length);
281 static void offload_target_phase_name(std::stringstream &ss, int p_node)
283 int prefix_spaces;
284 int str_length;
285 const int message_length = 40;
286 int tail_length;
287 char const *str;
289 str = report_get_target_stage_str(p_node);
290 prefix_spaces = target_timer_prefix_spaces[p_node];
291 fill_buf_with_spaces(ss, prefix_spaces);
292 str_length = strlen(str);
293 ss << str;
294 tail_length = message_length - prefix_spaces - str_length;
295 tail_length = (tail_length > 0)? tail_length : 1;
296 fill_buf_with_spaces(ss, tail_length);
299 void offload_timer_start(OffloadHostTimerData * timer_data,
300 OffloadHostPhase p_type)
302 timer_data->phases[p_type].start = _rdtsc();
305 void offload_timer_stop(OffloadHostTimerData * timer_data,
306 OffloadHostPhase p_type)
308 timer_data->phases[p_type].total += _rdtsc() -
309 timer_data->phases[p_type].start;
312 void offload_timer_fill_target_data(OffloadHostTimerData * timer_data,
313 void *buf)
315 uint64_t *data = (uint64_t*) buf;
317 timer_data->target.frequency = *data++;
318 for (int i = 0; i < c_offload_target_max_phase; i++) {
319 timer_data->target.phases[i].total = *data++;
323 void offload_timer_fill_host_sdata(OffloadHostTimerData * timer_data,
324 uint64_t sent_bytes)
326 if (timer_data) {
327 timer_data->sent_bytes += sent_bytes;
331 void offload_timer_fill_host_rdata(OffloadHostTimerData * timer_data,
332 uint64_t received_bytes)
334 if (timer_data) {
335 timer_data->received_bytes += received_bytes;
339 void offload_timer_fill_host_mic_num(OffloadHostTimerData * timer_data,
340 int card_number)
342 if (timer_data) {
343 timer_data->card_number = card_number;
347 OffloadHostTimerData* offload_timer_init(const char *file, int line)
349 static bool first_time = true;
350 OffloadHostTimerData* timer_data = NULL;
352 timer_data_mutex.lock();
354 if (timer_enabled ||
355 (offload_report_level && offload_report_enabled)) {
356 timer_data = (OffloadHostTimerData*)
357 OFFLOAD_MALLOC(sizeof(OffloadHostTimerData), 0);
358 memset(timer_data, 0, sizeof(OffloadHostTimerData));
360 timer_data->offload_number = OFFLOAD_DEBUG_INCR_OFLD_NUM() - 1;
362 if (timer_data_head == 0) {
363 timer_data_head = timer_data;
364 timer_data_tail = timer_data;
366 else {
367 timer_data_tail->next = timer_data;
368 timer_data_tail = timer_data;
371 timer_data->file = file;
372 timer_data->line = line;
375 timer_data_mutex.unlock();
376 return timer_data;
379 #endif // TIMING_SUPPORT