2 Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
8 * Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 * Neither the name of Intel Corporation nor the names of its
14 contributors may be used to endorse or promote products derived
15 from this software without specific prior written permission.
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 #include "cean_util.h"
32 #include "offload_common.h"
34 // 1. allocate element of CeanReadRanges type
35 // 2. initialized it for reading consequently contiguous ranges
36 // described by "ap" argument
37 CeanReadRanges
* init_read_ranges_arr_desc(const Arr_Desc
*ap
)
41 // find the max contiguous range
42 int64_t rank
= ap
->rank
- 1;
43 int64_t length
= ap
->dim
[rank
].size
;
44 for (; rank
>= 0; rank
--) {
45 if (ap
->dim
[rank
].stride
== 1) {
46 length
*= (ap
->dim
[rank
].upper
- ap
->dim
[rank
].lower
+ 1);
47 if (rank
> 0 && length
!= ap
->dim
[rank
- 1].size
) {
56 res
=(CeanReadRanges
*)malloc(sizeof(CeanReadRanges
) +
57 (ap
->rank
- rank
) * sizeof(CeanReadDim
));
59 LIBOFFLOAD_ERROR(c_malloc
);
61 res
->arr_desc
= const_cast<Arr_Desc
*>(ap
);
62 res
->current_number
= 0;
63 res
->range_size
= length
;
64 res
->last_noncont_ind
= rank
;
66 // calculate number of contiguous ranges inside noncontiguous dimensions
68 bool prev_is_cont
= true;
71 for (; rank
>= 0; rank
--) {
72 res
->Dim
[rank
].count
= count
;
73 res
->Dim
[rank
].size
= ap
->dim
[rank
].stride
* ap
->dim
[rank
].size
;
74 count
*= (prev_is_cont
&& ap
->dim
[rank
].stride
== 1? 1 :
75 (ap
->dim
[rank
].upper
- ap
->dim
[rank
].lower
+
76 ap
->dim
[rank
].stride
) / ap
->dim
[rank
].stride
);
78 offset
+=(ap
->dim
[rank
].lower
- ap
->dim
[rank
].lindex
) *
81 res
->range_max_number
= count
;
82 res
-> ptr
= (void*)ap
->base
;
83 res
-> init_offset
= offset
;
87 // check if ranges described by 1 argument could be transferred into ranges
88 // described by 2-nd one
89 bool cean_ranges_match(
90 CeanReadRanges
* read_rng1
,
91 CeanReadRanges
* read_rng2
94 return ( read_rng1
== NULL
|| read_rng2
== NULL
||
95 (read_rng1
->range_size
% read_rng2
->range_size
== 0 ||
96 read_rng2
->range_size
% read_rng1
->range_size
== 0));
99 // Set next offset and length and returns true for next range.
100 // Returns false if the ranges are over.
102 CeanReadRanges
* read_rng
,
106 if (++read_rng
->current_number
> read_rng
->range_max_number
) {
107 read_rng
->current_number
= 0;
111 int num
= read_rng
->current_number
- 1;
112 int64_t cur_offset
= 0;
114 for (; rank
<= read_rng
->last_noncont_ind
; rank
++) {
115 num_loc
= num
/ read_rng
->Dim
[rank
].count
;
116 cur_offset
+= num_loc
* read_rng
->Dim
[rank
].size
;
117 num
= num
% read_rng
->Dim
[rank
].count
;
119 *offset
= cur_offset
+ read_rng
->init_offset
;
123 bool is_arr_desc_contiguous(const Arr_Desc
*ap
)
125 int64_t rank
= ap
->rank
- 1;
126 int64_t length
= ap
->dim
[rank
].size
;
127 for (; rank
>= 0; rank
--) {
128 if (ap
->dim
[rank
].stride
> 1 &&
129 ap
->dim
[rank
].upper
- ap
->dim
[rank
].lower
!= 0) {
132 else if (length
!= ap
->dim
[rank
].size
) {
133 for (; rank
>= 0; rank
--) {
134 if (ap
->dim
[rank
].upper
- ap
->dim
[rank
].lower
!= 0) {
140 length
*= (ap
->dim
[rank
].upper
- ap
->dim
[rank
].lower
+ 1);
145 int64_t cean_get_transf_size(CeanReadRanges
* read_rng
)
147 return(read_rng
->range_max_number
* read_rng
->range_size
);
150 static uint64_t last_left
, last_right
;
160 static void generate_one_range(
170 "%s generate_one_range(lrange=%p, rrange=%p, esize=%d)\n",
171 spaces
, (void*)lrange
, (void*)rrange
, esize
);
172 if (last_left
== -1) {
177 if (lrange
== last_right
+1) {
178 // Extend previous range, don't print
181 (*fp
)(spaces
, last_left
, last_right
, esize
, print_values
);
188 static bool element_is_contiguous(
190 const struct Dim_Desc
*ddp
194 return (ddp
[0].lower
== ddp
[0].upper
|| ddp
[0].stride
== 1);
197 return ((ddp
[0].size
== (ddp
[1].upper
-ddp
[1].lower
+1)*ddp
[1].size
) &&
198 element_is_contiguous(rank
-1, ddp
++));
202 static void generate_mem_ranges_one_rank(
206 const struct Dim_Desc
*ddp
,
212 uint64_t lindex
= ddp
->lindex
;
213 uint64_t lower
= ddp
->lower
;
214 uint64_t upper
= ddp
->upper
;
215 uint64_t stride
= ddp
->stride
;
216 uint64_t size
= ddp
->size
;
219 "generate_mem_ranges_one_rank(base=%p, rank=%lld, lindex=%lld, "
220 "lower=%lld, upper=%lld, stride=%lld, size=%lld, esize=%d)\n",
221 spaces
, (void*)base
, rank
, lindex
, lower
, upper
, stride
, size
, esize
);
223 if (element_is_contiguous(rank
, ddp
)) {
224 uint64_t lrange
, rrange
;
225 lrange
= base
+ (lower
-lindex
)*size
;
226 rrange
= lrange
+ (upper
-lower
+1)*size
- 1;
227 generate_one_range(spaces
, lrange
, rrange
, fp
, esize
, print_values
);
231 for (int i
=lower
-lindex
; i
<=upper
-lindex
; i
+=stride
) {
232 uint64_t lrange
, rrange
;
233 lrange
= base
+ i
*size
;
234 rrange
= lrange
+ size
- 1;
235 generate_one_range(spaces
, lrange
, rrange
,
236 fp
, esize
, print_values
);
240 for (int i
=lower
-lindex
; i
<=upper
-lindex
; i
+=stride
) {
241 generate_mem_ranges_one_rank(
242 spaces
, base
+i
*size
, rank
-1, ddp
+1,
243 fp
, esize
, print_values
);
250 static void generate_mem_ranges(
262 "generate_mem_ranges(adp=%p, deref=%d, fp)\n",
267 // Element size is derived from last dimension
268 esize
= adp
->dim
[adp
->rank
-1].size
;
270 generate_mem_ranges_one_rank(
271 // For c_cean_var the base addr is the address of the data
272 // For c_cean_var_ptr the base addr is dereferenced to get to the data
273 spaces
, deref
? *((uint64_t*)(adp
->base
)) : adp
->base
,
274 adp
->rank
, &adp
->dim
[0], fp
, esize
, print_values
);
275 (*fp
)(spaces
, last_left
, last_right
, esize
, print_values
);
278 // returns offset and length of the data to be transferred
279 void __arr_data_offset_and_length(
285 int64_t rank
= adp
->rank
- 1;
286 int64_t size
= adp
->dim
[rank
].size
;
287 int64_t r_off
= 0; // offset from right boundary
289 // find the rightmost dimension which takes just part of its
290 // range. We define it if the size of left rank is not equal
291 // the range's length between upper and lower boungaries
293 size
*= (adp
->dim
[rank
].upper
- adp
->dim
[rank
].lower
+ 1);
294 if (size
!= adp
->dim
[rank
- 1].size
) {
300 offset
= (adp
->dim
[rank
].lower
- adp
->dim
[rank
].lindex
) *
303 // find gaps both from the left - offset and from the right - r_off
304 for (rank
--; rank
>= 0; rank
--) {
305 offset
+= (adp
->dim
[rank
].lower
- adp
->dim
[rank
].lindex
) *
307 r_off
+= adp
->dim
[rank
].size
-
308 (adp
->dim
[rank
+ 1].upper
- adp
->dim
[rank
+ 1].lindex
+ 1) *
309 adp
->dim
[rank
+ 1].size
;
311 length
= (adp
->dim
[0].upper
- adp
->dim
[0].lindex
+ 1) *
312 adp
->dim
[0].size
- offset
- r_off
;
315 #if OFFLOAD_DEBUG > 0
317 static void print_range(
328 OFFLOAD_TRACE(3, "%s print_range(low=%p, high=%p, esize=%d)\n",
329 spaces
, (void*)low
, (void*)high
, esize
);
331 if (console_enabled
< 4 || !print_values
) {
334 OFFLOAD_TRACE(4, "%s values:\n", spaces
);
342 sprintf(number
, "%d ", *((char *)low
));
346 sprintf(number
, "%d ", *((short *)low
));
350 sprintf(number
, "%d ", *((int *)low
));
354 sprintf(number
, "0x%016x ", *((uint64_t *)low
));
358 strcat(buffer
, number
);
361 OFFLOAD_TRACE(4, "%s %s\n", spaces
, buffer
);
367 OFFLOAD_TRACE(4, "%s %s\n", spaces
, buffer
);
371 void __arr_desc_dump(
379 OFFLOAD_TRACE(2, "%s%s CEAN expression %p\n", spaces
, name
, adp
);
382 OFFLOAD_TRACE(2, "%s base=%llx, rank=%lld\n",
383 spaces
, adp
->base
, adp
->rank
);
385 for (int i
= 0; i
< adp
->rank
; i
++) {
387 "%s dimension %d: size=%lld, lindex=%lld, "
388 "lower=%lld, upper=%lld, stride=%lld\n",
389 spaces
, i
, adp
->dim
[i
].size
, adp
->dim
[i
].lindex
,
390 adp
->dim
[i
].lower
, adp
->dim
[i
].upper
,
393 // For c_cean_var the base addr is the address of the data
394 // For c_cean_var_ptr the base addr is dereferenced to get to the data
395 generate_mem_ranges(spaces
, adp
, deref
, &print_range
, print_values
);
399 void noncont_struct_dump(
402 struct NonContigDesc
*desc_p
)
404 OFFLOAD_TRACE(2, "%s%s NonCont Struct expression %p\n",
405 spaces
, name
, desc_p
->base
);
407 OFFLOAD_TRACE(2, "%s%s base=%p\n", spaces
, name
, desc_p
->base
);
408 for (int i
= 0; i
< desc_p
->interval_cnt
; i
++) {
409 OFFLOAD_TRACE(2,"%s dimension %d: lower=%lld, size=%lld\n",
410 spaces
, i
, desc_p
->interval
[i
].lower
, desc_p
->interval
[i
].size
);
415 int64_t get_noncont_struct_size(struct NonContigDesc
*desc_p
)
417 int index
= desc_p
->interval_cnt
- 1;
418 return(desc_p
->interval
[index
].lower
+ desc_p
->interval
[index
].size
);
421 #endif // OFFLOAD_DEBUG