aix.h (TARGET_IEEEQUAD_DEFAULT): Set long double default to IBM.
[official-gcc.git] / liboffloadmic / runtime / cean_util.cpp
blobf2c3e48a99ccdb88cab79f596aeb4596e5ad6a0c
1 /*
2 Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
8 * Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 * Neither the name of Intel Corporation nor the names of its
14 contributors may be used to endorse or promote products derived
15 from this software without specific prior written permission.
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 #include "cean_util.h"
32 #include "offload_common.h"
34 // 1. allocate element of CeanReadRanges type
35 // 2. initialized it for reading consequently contiguous ranges
36 // described by "ap" argument
37 CeanReadRanges * init_read_ranges_arr_desc(const Arr_Desc *ap)
39 CeanReadRanges * res;
41 // find the max contiguous range
42 int64_t rank = ap->rank - 1;
43 int64_t length = ap->dim[rank].size;
44 for (; rank >= 0; rank--) {
45 if (ap->dim[rank].stride == 1) {
46 length *= (ap->dim[rank].upper - ap->dim[rank].lower + 1);
47 if (rank > 0 && length != ap->dim[rank - 1].size) {
48 break;
51 else {
52 break;
56 res =(CeanReadRanges *)malloc(sizeof(CeanReadRanges) +
57 (ap->rank - rank) * sizeof(CeanReadDim));
58 if (res == NULL)
59 LIBOFFLOAD_ERROR(c_malloc);
61 res->arr_desc = const_cast<Arr_Desc*>(ap);
62 res->current_number = 0;
63 res->range_size = length;
64 res->last_noncont_ind = rank;
66 // calculate number of contiguous ranges inside noncontiguous dimensions
67 int count = 1;
68 bool prev_is_cont = true;
69 int64_t offset = 0;
71 for (; rank >= 0; rank--) {
72 res->Dim[rank].count = count;
73 res->Dim[rank].size = ap->dim[rank].stride * ap->dim[rank].size;
74 count *= (prev_is_cont && ap->dim[rank].stride == 1? 1 :
75 (ap->dim[rank].upper - ap->dim[rank].lower +
76 ap->dim[rank].stride) / ap->dim[rank].stride);
77 prev_is_cont = false;
78 offset +=(ap->dim[rank].lower - ap->dim[rank].lindex) *
79 ap->dim[rank].size;
81 res->range_max_number = count;
82 res -> ptr = (void*)ap->base;
83 res -> init_offset = offset;
84 return res;
87 // check if ranges described by 1 argument could be transferred into ranges
88 // described by 2-nd one
89 bool cean_ranges_match(
90 CeanReadRanges * read_rng1,
91 CeanReadRanges * read_rng2
94 return ( read_rng1 == NULL || read_rng2 == NULL ||
95 (read_rng1->range_size % read_rng2->range_size == 0 ||
96 read_rng2->range_size % read_rng1->range_size == 0));
99 // Set next offset and length and returns true for next range.
100 // Returns false if the ranges are over.
101 bool get_next_range(
102 CeanReadRanges * read_rng,
103 int64_t *offset
106 if (++read_rng->current_number > read_rng->range_max_number) {
107 read_rng->current_number = 0;
108 return false;
110 int rank = 0;
111 int num = read_rng->current_number - 1;
112 int64_t cur_offset = 0;
113 int num_loc;
114 for (; rank <= read_rng->last_noncont_ind; rank++) {
115 num_loc = num / read_rng->Dim[rank].count;
116 cur_offset += num_loc * read_rng->Dim[rank].size;
117 num = num % read_rng->Dim[rank].count;
119 *offset = cur_offset + read_rng->init_offset;
120 return true;
123 bool is_arr_desc_contiguous(const Arr_Desc *ap)
125 int64_t rank = ap->rank - 1;
126 int64_t length = ap->dim[rank].size;
127 for (; rank >= 0; rank--) {
128 if (ap->dim[rank].stride > 1 &&
129 ap->dim[rank].upper - ap->dim[rank].lower != 0) {
130 return false;
132 else if (length != ap->dim[rank].size) {
133 for (; rank >= 0; rank--) {
134 if (ap->dim[rank].upper - ap->dim[rank].lower != 0) {
135 return false;
138 return true;
140 length *= (ap->dim[rank].upper - ap->dim[rank].lower + 1);
142 return true;
145 int64_t cean_get_transf_size(CeanReadRanges * read_rng)
147 return(read_rng->range_max_number * read_rng->range_size);
150 static uint64_t last_left, last_right;
152 typedef void (*fpp)(
153 const char *spaces,
154 uint64_t low,
155 uint64_t high,
156 int esize,
157 bool print_values
160 static void generate_one_range(
161 const char *spaces,
162 uint64_t lrange,
163 uint64_t rrange,
164 fpp fp,
165 int esize,
166 bool print_values
169 OFFLOAD_TRACE(3,
170 "%s generate_one_range(lrange=%p, rrange=%p, esize=%d)\n",
171 spaces, (void*)lrange, (void*)rrange, esize);
172 if (last_left == -1) {
173 // First range
174 last_left = lrange;
176 else {
177 if (lrange == last_right+1) {
178 // Extend previous range, don't print
180 else {
181 (*fp)(spaces, last_left, last_right, esize, print_values);
182 last_left = lrange;
185 last_right = rrange;
188 static bool element_is_contiguous(
189 uint64_t rank,
190 const struct Dim_Desc *ddp
193 if (rank == 1) {
194 return (ddp[0].lower == ddp[0].upper || ddp[0].stride == 1);
196 else {
197 return ((ddp[0].size == (ddp[1].upper-ddp[1].lower+1)*ddp[1].size) &&
198 element_is_contiguous(rank-1, ddp++));
202 static void generate_mem_ranges_one_rank(
203 const char *spaces,
204 uint64_t base,
205 uint64_t rank,
206 const struct Dim_Desc *ddp,
207 fpp fp,
208 int esize,
209 bool print_values
212 uint64_t lindex = ddp->lindex;
213 uint64_t lower = ddp->lower;
214 uint64_t upper = ddp->upper;
215 uint64_t stride = ddp->stride;
216 uint64_t size = ddp->size;
217 OFFLOAD_TRACE(3,
218 "%s "
219 "generate_mem_ranges_one_rank(base=%p, rank=%lld, lindex=%lld, "
220 "lower=%lld, upper=%lld, stride=%lld, size=%lld, esize=%d)\n",
221 spaces, (void*)base, rank, lindex, lower, upper, stride, size, esize);
223 if (element_is_contiguous(rank, ddp)) {
224 uint64_t lrange, rrange;
225 lrange = base + (lower-lindex)*size;
226 rrange = lrange + (upper-lower+1)*size - 1;
227 generate_one_range(spaces, lrange, rrange, fp, esize, print_values);
229 else {
230 if (rank == 1) {
231 for (int i=lower-lindex; i<=upper-lindex; i+=stride) {
232 uint64_t lrange, rrange;
233 lrange = base + i*size;
234 rrange = lrange + size - 1;
235 generate_one_range(spaces, lrange, rrange,
236 fp, esize, print_values);
239 else {
240 for (int i=lower-lindex; i<=upper-lindex; i+=stride) {
241 generate_mem_ranges_one_rank(
242 spaces, base+i*size, rank-1, ddp+1,
243 fp, esize, print_values);
250 static void generate_mem_ranges(
251 const char *spaces,
252 const Arr_Desc *adp,
253 bool deref,
254 fpp fp,
255 bool print_values
258 uint64_t esize;
260 OFFLOAD_TRACE(3,
261 "%s "
262 "generate_mem_ranges(adp=%p, deref=%d, fp)\n",
263 spaces, adp, deref);
264 last_left = -1;
265 last_right = -2;
267 // Element size is derived from last dimension
268 esize = adp->dim[adp->rank-1].size;
270 generate_mem_ranges_one_rank(
271 // For c_cean_var the base addr is the address of the data
272 // For c_cean_var_ptr the base addr is dereferenced to get to the data
273 spaces, deref ? *((uint64_t*)(adp->base)) : adp->base,
274 adp->rank, &adp->dim[0], fp, esize, print_values);
275 (*fp)(spaces, last_left, last_right, esize, print_values);
278 // returns offset and length of the data to be transferred
279 void __arr_data_offset_and_length(
280 const Arr_Desc *adp,
281 int64_t &offset,
282 int64_t &length
285 int64_t rank = adp->rank - 1;
286 int64_t size = adp->dim[rank].size;
287 int64_t r_off = 0; // offset from right boundary
289 // find the rightmost dimension which takes just part of its
290 // range. We define it if the size of left rank is not equal
291 // the range's length between upper and lower boungaries
292 while (rank > 0) {
293 size *= (adp->dim[rank].upper - adp->dim[rank].lower + 1);
294 if (size != adp->dim[rank - 1].size) {
295 break;
297 rank--;
300 offset = (adp->dim[rank].lower - adp->dim[rank].lindex) *
301 adp->dim[rank].size;
303 // find gaps both from the left - offset and from the right - r_off
304 for (rank--; rank >= 0; rank--) {
305 offset += (adp->dim[rank].lower - adp->dim[rank].lindex) *
306 adp->dim[rank].size;
307 r_off += adp->dim[rank].size -
308 (adp->dim[rank + 1].upper - adp->dim[rank + 1].lindex + 1) *
309 adp->dim[rank + 1].size;
311 length = (adp->dim[0].upper - adp->dim[0].lindex + 1) *
312 adp->dim[0].size - offset - r_off;
315 #if OFFLOAD_DEBUG > 0
317 static void print_range(
318 const char *spaces,
319 uint64_t low,
320 uint64_t high,
321 int esize,
322 bool print_values
325 char buffer[1024];
326 char number[32];
328 OFFLOAD_TRACE(3, "%s print_range(low=%p, high=%p, esize=%d)\n",
329 spaces, (void*)low, (void*)high, esize);
331 if (console_enabled < 4 || !print_values) {
332 return;
334 OFFLOAD_TRACE(4, "%s values:\n", spaces);
335 int count = 0;
336 buffer[0] = '\0';
337 while (low <= high)
339 switch (esize)
341 case 1:
342 sprintf(number, "%d ", *((char *)low));
343 low += 1;
344 break;
345 case 2:
346 sprintf(number, "%d ", *((short *)low));
347 low += 2;
348 break;
349 case 4:
350 sprintf(number, "%d ", *((int *)low));
351 low += 4;
352 break;
353 default:
354 sprintf(number, "0x%016x ", *((uint64_t *)low));
355 low += 8;
356 break;
358 strcat(buffer, number);
359 count++;
360 if (count == 10) {
361 OFFLOAD_TRACE(4, "%s %s\n", spaces, buffer);
362 count = 0;
363 buffer[0] = '\0';
366 if (count != 0) {
367 OFFLOAD_TRACE(4, "%s %s\n", spaces, buffer);
371 void __arr_desc_dump(
372 const char *spaces,
373 const char *name,
374 const Arr_Desc *adp,
375 bool deref,
376 bool print_values
379 OFFLOAD_TRACE(2, "%s%s CEAN expression %p\n", spaces, name, adp);
381 if (adp != 0) {
382 OFFLOAD_TRACE(2, "%s base=%llx, rank=%lld\n",
383 spaces, adp->base, adp->rank);
385 for (int i = 0; i < adp->rank; i++) {
386 OFFLOAD_TRACE(2,
387 "%s dimension %d: size=%lld, lindex=%lld, "
388 "lower=%lld, upper=%lld, stride=%lld\n",
389 spaces, i, adp->dim[i].size, adp->dim[i].lindex,
390 adp->dim[i].lower, adp->dim[i].upper,
391 adp->dim[i].stride);
393 // For c_cean_var the base addr is the address of the data
394 // For c_cean_var_ptr the base addr is dereferenced to get to the data
395 generate_mem_ranges(spaces, adp, deref, &print_range, print_values);
399 void noncont_struct_dump(
400 const char *spaces,
401 const char *name,
402 struct NonContigDesc *desc_p)
404 OFFLOAD_TRACE(2, "%s%s NonCont Struct expression %p\n",
405 spaces, name, desc_p->base);
406 if (desc_p) {
407 OFFLOAD_TRACE(2, "%s%s base=%p\n", spaces, name, desc_p->base);
408 for (int i = 0; i < desc_p->interval_cnt; i++) {
409 OFFLOAD_TRACE(2,"%s dimension %d: lower=%lld, size=%lld\n",
410 spaces, i, desc_p->interval[i].lower, desc_p->interval[i].size);
415 int64_t get_noncont_struct_size(struct NonContigDesc *desc_p)
417 int index = desc_p->interval_cnt - 1;
418 return(desc_p->interval[index].lower + desc_p->interval[index].size);
421 #endif // OFFLOAD_DEBUG