PR libgomp/64635
[official-gcc.git] / liboffloadmic / runtime / cean_util.cpp
blob3258d7f3ade7d310b268ae2ed58ebd5cefba830e
1 /*
2 Copyright (c) 2014 Intel Corporation. All Rights Reserved.
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
8 * Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 * Neither the name of Intel Corporation nor the names of its
14 contributors may be used to endorse or promote products derived
15 from this software without specific prior written permission.
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 #include "cean_util.h"
32 #include "offload_common.h"
34 // 1. allocate element of CeanReadRanges type
35 // 2. initialized it for reading consequently contiguous ranges
36 // described by "ap" argument
37 CeanReadRanges * init_read_ranges_arr_desc(const arr_desc *ap)
39 CeanReadRanges * res;
41 // find the max contiguous range
42 int64_t rank = ap->rank - 1;
43 int64_t length = ap->dim[rank].size;
44 for (; rank >= 0; rank--) {
45 if (ap->dim[rank].stride == 1) {
46 length *= (ap->dim[rank].upper - ap->dim[rank].lower + 1);
47 if (rank > 0 && length != ap->dim[rank - 1].size) {
48 break;
51 else {
52 break;
56 res =(CeanReadRanges *)malloc(sizeof(CeanReadRanges) +
57 (ap->rank - rank) * sizeof(CeanReadDim));
58 if (res == NULL)
59 LIBOFFLOAD_ERROR(c_malloc);
60 res->current_number = 0;
61 res->range_size = length;
62 res->last_noncont_ind = rank;
64 // calculate number of contiguous ranges inside noncontiguous dimensions
65 int count = 1;
66 bool prev_is_cont = true;
67 int64_t offset = 0;
69 for (; rank >= 0; rank--) {
70 res->Dim[rank].count = count;
71 res->Dim[rank].size = ap->dim[rank].stride * ap->dim[rank].size;
72 count *= (prev_is_cont && ap->dim[rank].stride == 1? 1 :
73 (ap->dim[rank].upper - ap->dim[rank].lower +
74 ap->dim[rank].stride) / ap->dim[rank].stride);
75 prev_is_cont = false;
76 offset +=(ap->dim[rank].lower - ap->dim[rank].lindex) *
77 ap->dim[rank].size;
79 res->range_max_number = count;
80 res -> ptr = (void*)ap->base;
81 res -> init_offset = offset;
82 return res;
85 // check if ranges described by 1 argument could be transfered into ranges
86 // described by 2-nd one
87 bool cean_ranges_match(
88 CeanReadRanges * read_rng1,
89 CeanReadRanges * read_rng2
92 return ( read_rng1 == NULL || read_rng2 == NULL ||
93 (read_rng1->range_size % read_rng2->range_size == 0 ||
94 read_rng2->range_size % read_rng1->range_size == 0));
97 // Set next offset and length and returns true for next range.
98 // Returns false if the ranges are over.
99 bool get_next_range(
100 CeanReadRanges * read_rng,
101 int64_t *offset
104 if (++read_rng->current_number > read_rng->range_max_number) {
105 read_rng->current_number = 0;
106 return false;
108 int rank = 0;
109 int num = read_rng->current_number - 1;
110 int64_t cur_offset = 0;
111 int num_loc;
112 for (; rank <= read_rng->last_noncont_ind; rank++) {
113 num_loc = num / read_rng->Dim[rank].count;
114 cur_offset += num_loc * read_rng->Dim[rank].size;
115 num = num % read_rng->Dim[rank].count;
117 *offset = cur_offset + read_rng->init_offset;
118 return true;
121 bool is_arr_desc_contiguous(const arr_desc *ap)
123 int64_t rank = ap->rank - 1;
124 int64_t length = ap->dim[rank].size;
125 for (; rank >= 0; rank--) {
126 if (ap->dim[rank].stride > 1 &&
127 ap->dim[rank].upper - ap->dim[rank].lower != 0) {
128 return false;
130 else if (length != ap->dim[rank].size) {
131 for (; rank >= 0; rank--) {
132 if (ap->dim[rank].upper - ap->dim[rank].lower != 0) {
133 return false;
136 return true;
138 length *= (ap->dim[rank].upper - ap->dim[rank].lower + 1);
140 return true;
143 int64_t cean_get_transf_size(CeanReadRanges * read_rng)
145 return(read_rng->range_max_number * read_rng->range_size);
148 static uint64_t last_left, last_right;
149 typedef void (*fpp)(const char *spaces, uint64_t low, uint64_t high, int esize);
151 static void generate_one_range(
152 const char *spaces,
153 uint64_t lrange,
154 uint64_t rrange,
155 fpp fp,
156 int esize
159 OFFLOAD_TRACE(3,
160 "%s generate_one_range(lrange=%p, rrange=%p, esize=%d)\n",
161 spaces, (void*)lrange, (void*)rrange, esize);
162 if (last_left == -1) {
163 // First range
164 last_left = lrange;
166 else {
167 if (lrange == last_right+1) {
168 // Extend previous range, don't print
170 else {
171 (*fp)(spaces, last_left, last_right, esize);
172 last_left = lrange;
175 last_right = rrange;
178 static void generate_mem_ranges_one_rank(
179 const char *spaces,
180 uint64_t base,
181 uint64_t rank,
182 const struct dim_desc *ddp,
183 fpp fp,
184 int esize
187 uint64_t lindex = ddp->lindex;
188 uint64_t lower = ddp->lower;
189 uint64_t upper = ddp->upper;
190 uint64_t stride = ddp->stride;
191 uint64_t size = ddp->size;
192 OFFLOAD_TRACE(3,
193 "%s "
194 "generate_mem_ranges_one_rank(base=%p, rank=%lld, lindex=%lld, "
195 "lower=%lld, upper=%lld, stride=%lld, size=%lld, esize=%d)\n",
196 spaces, (void*)base, rank, lindex, lower, upper, stride, size, esize);
197 if (rank == 1) {
198 uint64_t lrange, rrange;
199 if (stride == 1) {
200 lrange = base + (lower-lindex)*size;
201 rrange = lrange + (upper-lower+1)*size - 1;
202 generate_one_range(spaces, lrange, rrange, fp, esize);
204 else {
205 for (int i=lower-lindex; i<=upper-lindex; i+=stride) {
206 lrange = base + i*size;
207 rrange = lrange + size - 1;
208 generate_one_range(spaces, lrange, rrange, fp, esize);
212 else {
213 for (int i=lower-lindex; i<=upper-lindex; i+=stride) {
214 generate_mem_ranges_one_rank(
215 spaces, base+i*size, rank-1, ddp+1, fp, esize);
221 static void generate_mem_ranges(
222 const char *spaces,
223 const arr_desc *adp,
224 bool deref,
225 fpp fp
228 uint64_t esize;
230 OFFLOAD_TRACE(3,
231 "%s "
232 "generate_mem_ranges(adp=%p, deref=%d, fp)\n",
233 spaces, adp, deref);
234 last_left = -1;
235 last_right = -2;
237 // Element size is derived from last dimension
238 esize = adp->dim[adp->rank-1].size;
240 generate_mem_ranges_one_rank(
241 // For c_cean_var the base addr is the address of the data
242 // For c_cean_var_ptr the base addr is dereferenced to get to the data
243 spaces, deref ? *((uint64_t*)(adp->base)) : adp->base,
244 adp->rank, &adp->dim[0], fp, esize);
245 (*fp)(spaces, last_left, last_right, esize);
248 // returns offset and length of the data to be transferred
249 void __arr_data_offset_and_length(
250 const arr_desc *adp,
251 int64_t &offset,
252 int64_t &length
255 int64_t rank = adp->rank - 1;
256 int64_t size = adp->dim[rank].size;
257 int64_t r_off = 0; // offset from right boundary
259 // find the rightmost dimension which takes just part of its
260 // range. We define it if the size of left rank is not equal
261 // the range's length between upper and lower boungaries
262 while (rank > 0) {
263 size *= (adp->dim[rank].upper - adp->dim[rank].lower + 1);
264 if (size != adp->dim[rank - 1].size) {
265 break;
267 rank--;
270 offset = (adp->dim[rank].lower - adp->dim[rank].lindex) *
271 adp->dim[rank].size;
273 // find gaps both from the left - offset and from the right - r_off
274 for (rank--; rank >= 0; rank--) {
275 offset += (adp->dim[rank].lower - adp->dim[rank].lindex) *
276 adp->dim[rank].size;
277 r_off += adp->dim[rank].size -
278 (adp->dim[rank + 1].upper - adp->dim[rank + 1].lindex + 1) *
279 adp->dim[rank + 1].size;
281 length = (adp->dim[0].upper - adp->dim[0].lindex + 1) *
282 adp->dim[0].size - offset - r_off;
285 #if OFFLOAD_DEBUG > 0
287 void print_range(
288 const char *spaces,
289 uint64_t low,
290 uint64_t high,
291 int esize
294 char buffer[1024];
295 char number[32];
297 OFFLOAD_TRACE(3, "%s print_range(low=%p, high=%p, esize=%d)\n",
298 spaces, (void*)low, (void*)high, esize);
300 if (console_enabled < 4) {
301 return;
303 OFFLOAD_TRACE(4, "%s values:\n", spaces);
304 int count = 0;
305 buffer[0] = '\0';
306 while (low <= high)
308 switch (esize)
310 case 1:
311 sprintf(number, "%d ", *((char *)low));
312 low += 1;
313 break;
314 case 2:
315 sprintf(number, "%d ", *((short *)low));
316 low += 2;
317 break;
318 case 4:
319 sprintf(number, "%d ", *((int *)low));
320 low += 4;
321 break;
322 default:
323 sprintf(number, "0x%016x ", *((uint64_t *)low));
324 low += 8;
325 break;
327 strcat(buffer, number);
328 count++;
329 if (count == 10) {
330 OFFLOAD_TRACE(4, "%s %s\n", spaces, buffer);
331 count = 0;
332 buffer[0] = '\0';
335 if (count != 0) {
336 OFFLOAD_TRACE(4, "%s %s\n", spaces, buffer);
340 void __arr_desc_dump(
341 const char *spaces,
342 const char *name,
343 const arr_desc *adp,
344 bool deref
347 OFFLOAD_TRACE(2, "%s%s CEAN expression %p\n", spaces, name, adp);
349 if (adp != 0) {
350 OFFLOAD_TRACE(2, "%s base=%llx, rank=%lld\n",
351 spaces, adp->base, adp->rank);
353 for (int i = 0; i < adp->rank; i++) {
354 OFFLOAD_TRACE(2,
355 "%s dimension %d: size=%lld, lindex=%lld, "
356 "lower=%lld, upper=%lld, stride=%lld\n",
357 spaces, i, adp->dim[i].size, adp->dim[i].lindex,
358 adp->dim[i].lower, adp->dim[i].upper,
359 adp->dim[i].stride);
361 // For c_cean_var the base addr is the address of the data
362 // For c_cean_var_ptr the base addr is dereferenced to get to the data
363 generate_mem_ranges(spaces, adp, deref, &print_range);
366 #endif // OFFLOAD_DEBUG