* auto-profile.c (afdo_annotate_cfg): Use update_max_bb_count.
[official-gcc.git] / liboffloadmic / runtime / offload_omp_host.cpp
blob0439fec313b6c09138a4677919dbb68bf58f484f
1 /*
2 Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
8 * Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 * Neither the name of Intel Corporation nor the names of its
14 contributors may be used to endorse or promote products derived
15 from this software without specific prior written permission.
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 #include <omp.h>
32 //#include <stdlib.h>
33 //#include "offload.h"
34 #include "compiler_if_host.h"
37 // OpenMP API
39 void omp_set_default_device(int num) __GOMP_NOTHROW
41 if (num >= 0) {
42 __omp_device_num = num;
46 int omp_get_default_device(void) __GOMP_NOTHROW
48 return __omp_device_num;
51 int omp_get_num_devices() __GOMP_NOTHROW
53 __offload_init_library();
54 return mic_engines_total;
57 // OpenMP 4.5 APIs
59 // COI supports 3-dim multiD transfers
60 #define MAX_ARRAY_RANK 3
62 int omp_get_initial_device(
63 void
64 ) __GOMP_NOTHROW
66 return -1;
69 void* omp_target_alloc(
70 size_t size,
71 int device_num
72 ) __GOMP_NOTHROW
74 __offload_init_library();
76 OFFLOAD_TRACE(2, "omp_target_alloc(%lld, %d)\n", size, device_num);
78 if (device_num < -1) {
79 LIBOFFLOAD_ERROR(c_invalid_device_number);
80 exit(1);
83 void* result = 0;
85 // malloc on CPU
86 if (device_num == -1) {
87 // We do not check for malloc returning NULL because the
88 // specification of this API includes the possibility of failure.
89 // The user will check the returned result
90 result = malloc(size);
91 return result;
94 OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(
95 TARGET_MIC, device_num, 0, NULL, __func__, 0);
96 if (ofld != 0) {
97 VarDesc vars[2] = {0};
99 vars[0].type.src = c_data;
100 vars[0].type.dst = c_data;
101 vars[0].direction.bits = c_parameter_in;
102 vars[0].size = sizeof(size);
103 vars[0].count = 1;
104 vars[0].ptr = &size;
106 vars[1].type.src = c_data;
107 vars[1].type.dst = c_data;
108 vars[1].direction.bits = c_parameter_out;
109 vars[1].size = sizeof(result);
110 vars[1].count = 1;
111 vars[1].ptr = &result;
113 OFFLOAD_OFFLOAD(ofld, "omp_target_alloc_target",
114 0, 2, vars, NULL, 0, 0, 0);
116 return result;
119 void omp_target_free(
120 void *device_ptr,
121 int device_num
122 ) __GOMP_NOTHROW
124 __offload_init_library();
126 OFFLOAD_TRACE(2, "omp_target_free(%p, %d)\n", device_ptr, device_num);
128 if (device_num < -1) {
129 LIBOFFLOAD_ERROR(c_invalid_device_number);
130 exit(1);
133 // free on CPU
134 if (device_num == -1) {
135 free(device_ptr);
136 return;
139 OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(
140 TARGET_MIC, device_num, 0, NULL, __func__, 0);
141 if (ofld) {
142 VarDesc vars[1] = {0};
144 vars[0].type.src = c_data;
145 vars[0].type.dst = c_data;
146 vars[0].direction.bits = c_parameter_in;
147 vars[0].size = sizeof(device_ptr);
148 vars[0].count = 1;
149 vars[0].ptr = &device_ptr;
151 OFFLOAD_OFFLOAD(ofld, "omp_target_free_target",
152 0, 1, vars, NULL, 0, 0, 0);
156 int omp_target_is_present(
157 void *ptr,
158 int device_num
159 ) __GOMP_NOTHROW
161 __offload_init_library();
163 OFFLOAD_TRACE(2, "omp_target_is_present(%p, %d)\n", ptr, device_num);
165 if (device_num < -1) {
166 LIBOFFLOAD_ERROR(c_invalid_device_number);
167 exit(1);
170 if (device_num == -1) {
171 return false;
174 // If OpenMP allows wrap-around for device numbers, enable next line
175 //device_num %= mic_engines_total;
177 // lookup existing association in pointer table
178 PtrData* ptr_data = mic_engines[device_num].find_ptr_data(ptr);
179 if (ptr_data == 0) {
180 OFFLOAD_TRACE(3, "Address %p is not mapped on device %d\n",
181 ptr, device_num);
182 return false;
185 OFFLOAD_TRACE(3, "Address %p found mapped on device %d\n",
186 ptr, device_num);
187 return true;
190 int omp_target_memcpy(
191 void *dst,
192 void *src,
193 size_t length,
194 size_t dst_offset,
195 size_t src_offset,
196 int dst_device,
197 int src_device
198 ) __GOMP_NOTHROW
200 __offload_init_library();
202 OFFLOAD_TRACE(2, "omp_target_memcpy(%p, %p, %lld, %lld, %lld, %d, %d)\n",
203 dst, src, length, dst_offset, src_offset, dst_device, src_device);
205 if (dst_device < -1 || src_device < -1) {
206 LIBOFFLOAD_ERROR(c_invalid_device_number);
207 exit(1);
210 char* srcp = (char *)src + src_offset;
211 char* dstp = (char *)dst + dst_offset;
213 if (src_device == -1) {
214 // Source is CPU
215 if (dst_device == -1) {
216 // CPU -> CPU
217 memcpy(dstp, srcp, length);
218 return 0;
219 } else {
220 // CPU -> MIC
221 // COIBufferWrite
222 // If OpenMP allows wrap-around for device numbers, enable next line
223 //dst_device %= mic_engines_total;
225 OFFLOAD_TRACE(3, "Creating buffer from sink memory %llx\n", dstp);
226 COIBUFFER mic_buf;
227 COIRESULT res = COI::BufferCreateFromMemory(length,
228 COI_BUFFER_NORMAL, COI_SINK_MEMORY, dstp,
229 1, &mic_engines[dst_device].get_process(),
230 &mic_buf);
231 if (res != COI_SUCCESS) {
232 LIBOFFLOAD_ERROR(c_buf_create_from_mem, res);
233 return 1;
235 res = COI::BufferWrite(mic_buf, 0, srcp, length,
236 COI_COPY_UNSPECIFIED, 0, 0, 0);
237 if (res != COI_SUCCESS) {
238 LIBOFFLOAD_ERROR(c_buf_write, res);
239 return 1;
241 res = COI::BufferDestroy(mic_buf);
242 if (res != COI_SUCCESS) {
243 LIBOFFLOAD_ERROR(c_buf_destroy, res);
244 return 1;
246 return 0;
248 } else {
249 // Source is device
250 if (dst_device == -1) {
251 // MIC -> CPU
252 // COIBufferRead
254 // If OpenMP allows wrap-around for device numbers, enable next line
255 //src_device %= mic_engines_total;
257 OFFLOAD_TRACE(3, "Creating buffer from sink memory %llx\n", srcp);
258 COIBUFFER mic_buf;
259 COIRESULT res = COI::BufferCreateFromMemory(length,
260 COI_BUFFER_NORMAL, COI_SINK_MEMORY, srcp,
261 1, &mic_engines[src_device].get_process(),
262 &mic_buf);
263 if (res != COI_SUCCESS) {
264 LIBOFFLOAD_ERROR(c_buf_create_from_mem, res);
265 return 1;
267 res = COI::BufferRead(mic_buf, 0, dstp, length,
268 COI_COPY_UNSPECIFIED, 0, 0, 0);
269 if (res != COI_SUCCESS) {
270 LIBOFFLOAD_ERROR(c_buf_read, res);
271 return 1;
273 res = COI::BufferDestroy(mic_buf);
274 if (res != COI_SUCCESS) {
275 LIBOFFLOAD_ERROR(c_buf_destroy, res);
276 return 1;
278 return 0;
279 } else {
280 // some MIC -> some MIC
281 if (src_device == dst_device) {
282 // MIC local copy will be done as remote memcpy
284 OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(TARGET_MIC, src_device,
285 0, NULL, __func__, 0);
286 if (ofld) {
287 VarDesc vars[3] = {0};
289 vars[0].type.src = c_data;
290 vars[0].type.dst = c_data;
291 vars[0].direction.bits = c_parameter_in;
292 vars[0].size = sizeof(dstp);
293 vars[0].count = 1;
294 vars[0].ptr = &dstp;
296 vars[1].type.src = c_data;
297 vars[1].type.dst = c_data;
298 vars[1].direction.bits = c_parameter_in;
299 vars[1].size = sizeof(srcp);
300 vars[1].count = 1;
301 vars[1].ptr = &srcp;
303 vars[2].type.src = c_data;
304 vars[2].type.dst = c_data;
305 vars[2].direction.bits = c_parameter_in;
306 vars[2].size = sizeof(length);
307 vars[2].count = 1;
308 vars[2].ptr = &length;
310 OFFLOAD_OFFLOAD(ofld, "omp_target_memcpy_target",
311 0, 3, vars, NULL, 0, 0, 0);
312 return 0;
313 } else {
314 return 1;
316 } else {
317 // MICx -> MICy
318 // Allocate CPU buffer
319 char *cpu_mem = (char *)malloc(length);
320 if (cpu_mem == 0) {
321 LIBOFFLOAD_ERROR(c_malloc);
322 return 1;
324 int retval = 1;
325 if (omp_target_memcpy(
326 cpu_mem, srcp, length, 0, 0, -1, src_device) == 0) {
327 retval = omp_target_memcpy(
328 dstp, cpu_mem, length, 0, 0, dst_device, -1);
330 free(cpu_mem);
331 return retval;
337 static size_t bytesize_at_this_dimension(
338 size_t element_size,
339 int num_dims,
340 const size_t* dimensions
343 if (num_dims > 1) {
344 return dimensions[1] *
345 bytesize_at_this_dimension(
346 element_size, num_dims-1, dimensions+1);
347 } else {
348 return element_size;
352 static void memcpy_rect(
353 char *dst,
354 char *src,
355 size_t element_size,
356 int num_dims,
357 const size_t *volume,
358 const size_t *dst_offsets,
359 const size_t *src_offsets,
360 const size_t *dst_dimensions,
361 const size_t *src_dimensions
364 if (num_dims > 1) {
365 int count = volume[0];
366 int dst_index = dst_offsets[0];
367 int src_index = src_offsets[0];
368 size_t dst_element_size =
369 bytesize_at_this_dimension(element_size, num_dims, dst_dimensions);
370 size_t src_element_size =
371 bytesize_at_this_dimension(element_size, num_dims, src_dimensions);
372 for (; count>0; dst_index++, src_index++, count--) {
373 memcpy_rect(dst+dst_element_size*dst_index,
374 src+src_element_size*src_index,
375 element_size, num_dims-1, volume+1,
376 dst_offsets+1, src_offsets+1,
377 dst_dimensions+1, src_dimensions+1);
379 } else {
380 memcpy(dst+dst_offsets[0]*element_size,
381 src+src_offsets[0]*element_size,
382 element_size * volume[0]);
386 int omp_target_memcpy_rect(
387 void *dst_,
388 void *src_,
389 size_t element_size,
390 int num_dims,
391 const size_t *volume,
392 const size_t *dst_offsets,
393 const size_t *src_offsets,
394 const size_t *dst_dimensions,
395 const size_t *src_dimensions,
396 int dst_device,
397 int src_device
398 ) __GOMP_NOTHROW
400 char *dst = (char *)dst_;
401 char *src = (char *)src_;
403 __offload_init_library();
405 OFFLOAD_TRACE(2, "omp_target_memcpy_rect(%p, %p, %lld, %d, "
406 "%p, %p, %p, %p, %p, %d, %d)\n",
407 dst, src, element_size, num_dims,
408 volume, dst_offsets, src_offsets,
409 dst_dimensions, src_dimensions, dst_device, src_device);
411 // MAX_ARRAY_RANK dimensions are supported
412 if (dst == 0 && src == 0) {
413 return MAX_ARRAY_RANK;
416 if (num_dims < 1 || num_dims > MAX_ARRAY_RANK ||
417 element_size < 1 ||
418 volume == 0 || dst_offsets == 0 || src_offsets == 0 ||
419 dst_dimensions == 0 || src_dimensions == 0) {
420 return 1;
423 if (dst_device < -1 || src_device < -1) {
424 LIBOFFLOAD_ERROR(c_invalid_device_number);
425 exit(1);
428 if (src_device == -1) {
429 // Source is CPU
430 if (dst_device == -1) {
431 // CPU -> CPU
432 memcpy_rect((char*)dst, (char*)src, element_size, num_dims, volume,
433 dst_offsets, src_offsets,
434 dst_dimensions, src_dimensions);
435 return 0;
436 } else {
437 // CPU -> MIC
438 // COIBufferWriteMultiD
439 struct arr_desc dst_desc;
440 struct arr_desc src_desc;
442 dst_desc.base = (int64_t)dst;
443 dst_desc.rank = num_dims;
445 src_desc.base = (int64_t)src;
446 src_desc.rank = num_dims;
448 for (int i=0; i<num_dims; i++)
450 dst_desc.dim[i].size = bytesize_at_this_dimension(
451 element_size,
452 num_dims - i,
453 dst_dimensions + i);
454 dst_desc.dim[i].lindex = 0;
455 dst_desc.dim[i].lower = dst_offsets[i];
456 dst_desc.dim[i].upper = dst_offsets[i] + volume[i] - 1;
457 dst_desc.dim[i].stride = 1;
459 src_desc.dim[i].size = bytesize_at_this_dimension(
460 element_size,
461 num_dims - i,
462 src_dimensions + i);
463 src_desc.dim[i].lindex = 0;
464 src_desc.dim[i].lower = src_offsets[i];
465 src_desc.dim[i].upper = src_offsets[i] + volume[i] - 1;
466 src_desc.dim[i].stride = 1;
468 __arr_desc_dump("", "dst", (const Arr_Desc*)&dst_desc, false, false);
469 __arr_desc_dump("", "src", (const Arr_Desc*)&src_desc, false, false);
471 // If OpenMP allows wrap-around for device numbers, enable next line
472 //dst_device %= mic_engines_total;
474 // Compute MIC buffer size
475 size_t dst_length = dst_dimensions[0] * bytesize_at_this_dimension(
476 element_size,
477 num_dims,
478 dst_dimensions);
480 OFFLOAD_TRACE(3,
481 "Creating buffer from sink memory %llx of size %lld\n",
482 dst, dst_length);
483 COIBUFFER mic_buf;
484 COIRESULT res = COI::BufferCreateFromMemory(dst_length,
485 COI_BUFFER_NORMAL, COI_SINK_MEMORY, dst,
486 1, &mic_engines[dst_device].get_process(),
487 &mic_buf);
488 if (res != COI_SUCCESS) {
489 LIBOFFLOAD_ERROR(c_buf_create_from_mem, res);
490 return 1;
492 res = COI::BufferWriteMultiD(mic_buf,
493 mic_engines[dst_device].get_process(),
494 0, &dst_desc, &src_desc,
495 COI_COPY_UNSPECIFIED, 0, 0, 0);
496 if (res != COI_SUCCESS) {
497 LIBOFFLOAD_ERROR(c_buf_write, res);
498 return 1;
500 res = COI::BufferDestroy(mic_buf);
501 if (res != COI_SUCCESS) {
502 LIBOFFLOAD_ERROR(c_buf_destroy, res);
503 return 1;
505 return 0;
507 } else {
508 // Source is device
509 if (dst_device == -1) {
510 // COIBufferReadMultiD
511 struct arr_desc dst_desc;
512 struct arr_desc src_desc;
514 dst_desc.base = (int64_t)dst;
515 dst_desc.rank = num_dims;
517 src_desc.base = (int64_t)src;
518 src_desc.rank = num_dims;
520 for (int i=0; i<num_dims; i++)
522 dst_desc.dim[i].size = bytesize_at_this_dimension(
523 element_size,
524 num_dims - i,
525 dst_dimensions + i);
526 dst_desc.dim[i].lindex = 0;
527 dst_desc.dim[i].lower = dst_offsets[i];
528 dst_desc.dim[i].upper = dst_offsets[i] + volume[i] - 1;
529 dst_desc.dim[i].stride = 1;
531 src_desc.dim[i].size = bytesize_at_this_dimension(
532 element_size,
533 num_dims - i,
534 src_dimensions + i);
535 src_desc.dim[i].lindex = 0;
536 src_desc.dim[i].lower = src_offsets[i];
537 src_desc.dim[i].upper = src_offsets[i] + volume[i] - 1;
538 src_desc.dim[i].stride = 1;
540 __arr_desc_dump("", "dst", (const Arr_Desc*)&dst_desc, false, false);
541 __arr_desc_dump("", "src", (const Arr_Desc*)&src_desc, false, false);
543 // If OpenMP allows wrap-around for device numbers, enable next line
544 //src_device %= mic_engines_total;
546 // Compute MIC buffer size
547 size_t src_length = src_dimensions[0] * bytesize_at_this_dimension(
548 element_size,
549 num_dims,
550 src_dimensions);
552 OFFLOAD_TRACE(3,
553 "Creating buffer from sink memory %llx of size %lld\n",
554 src, src_length);
555 COIBUFFER mic_buf;
556 COIRESULT res = COI::BufferCreateFromMemory(src_length,
557 COI_BUFFER_NORMAL, COI_SINK_MEMORY, src,
558 1, &mic_engines[src_device].get_process(),
559 &mic_buf);
560 if (res != COI_SUCCESS) {
561 LIBOFFLOAD_ERROR(c_buf_create_from_mem, res);
562 return 1;
564 res = COI::BufferReadMultiD(mic_buf, 0,
565 &dst_desc, &src_desc,
566 COI_COPY_UNSPECIFIED, 0, 0, 0);
567 if (res != COI_SUCCESS) {
568 LIBOFFLOAD_ERROR(c_buf_write, res);
569 return 1;
571 res = COI::BufferDestroy(mic_buf);
572 if (res != COI_SUCCESS) {
573 LIBOFFLOAD_ERROR(c_buf_destroy, res);
574 return 1;
576 return 0;
577 } else {
578 // some MIC -> some MIC
579 if (src_device == dst_device) {
580 // MIC local copy will be done as remote memcpy_rect
581 struct parameters {
582 void *dst;
583 void *src;
584 size_t element_size;
585 int num_dims;
586 size_t array_info[MAX_ARRAY_RANK*5];
587 } parameters = {dst, src, element_size, num_dims};
588 int result;
590 for (int i=0; i<num_dims; i++)
592 parameters.array_info[i] = volume[i];
593 parameters.array_info[i+num_dims] = dst_offsets[i];
594 parameters.array_info[i+num_dims*2] = src_offsets[i];
595 parameters.array_info[i+num_dims*3] = dst_dimensions[i];
596 parameters.array_info[i+num_dims*4] = src_dimensions[i];
599 OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(TARGET_MIC, src_device,
600 0, NULL, __func__, 0);
601 if (ofld) {
602 VarDesc vars[1] = {0};
604 vars[0].type.src = c_data;
605 vars[0].type.dst = c_data;
606 vars[0].direction.bits = c_parameter_in;
607 vars[0].size = sizeof(parameters) -
608 (MAX_ARRAY_RANK - num_dims) *
609 5 * sizeof(size_t);
610 vars[0].count = 1;
611 vars[0].ptr = &parameters;
613 OFFLOAD_OFFLOAD(ofld, "omp_target_memcpy_rect_target",
614 0, 1, vars, NULL, 0, 0, 0);
615 return 0;
616 } else {
617 return 1;
619 } else {
620 // MICx -> MICy
622 // Compute transfer byte-count
623 size_t dst_length = element_size;
624 for (int i=0; i<num_dims; i++) {
625 dst_length *= volume[i];
628 // Allocate CPU buffer
629 char *cpu_mem = (char *)malloc(dst_length);
630 if (cpu_mem == 0) {
631 LIBOFFLOAD_ERROR(c_malloc);
632 return 1;
635 // Create CPU offset and dimension arrays
636 // The CPU array collects the data in a contiguous block
637 size_t cpu_offsets[MAX_ARRAY_RANK];
638 size_t cpu_dimensions[MAX_ARRAY_RANK];
639 for (int i=0; i<num_dims; i++) {
640 cpu_offsets[i] = 0;
641 cpu_dimensions[i] = volume[i];
644 int retval = 1;
645 if (omp_target_memcpy_rect(
646 cpu_mem, src, element_size, num_dims, volume,
647 cpu_offsets, src_offsets,
648 cpu_dimensions, src_dimensions,
649 -1, src_device) == 0) {
650 retval = omp_target_memcpy_rect(
651 dst, cpu_mem, element_size, num_dims, volume,
652 dst_offsets, cpu_offsets,
653 dst_dimensions, cpu_dimensions,
654 dst_device, -1);
656 free(cpu_mem);
657 return retval;
663 // host_ptr is key in table that yields association on device
664 // A COIBUFFER of specified size is created from the memory at
665 // device_ptr+device_offset on device_num
666 int omp_target_associate_ptr(
667 void *host_ptr,
668 void *device_ptr,
669 size_t size,
670 size_t device_offset,
671 int device_num
672 ) __GOMP_NOTHROW
674 COIRESULT res;
676 __offload_init_library();
678 OFFLOAD_TRACE(2, "omp_target_associate_ptr(%p, %p, %lld, %lld, %d)\n",
679 host_ptr, device_ptr, size, device_offset, device_num);
681 if (device_num < -1) {
682 LIBOFFLOAD_ERROR(c_invalid_device_number);
683 exit(1);
686 // Associating to CPU is treated as failure
687 if (device_num == -1) {
688 return 1;
691 // An incorrect size is treated as failure
692 if (size < 0) {
693 return 1;
696 // If OpenMP allows wrap-around for device numbers, enable next line
697 //Engine& device = mic_engines[device_num % mic_engines_total];
698 Engine& device = mic_engines[device_num];
700 // Does host pointer have association already?
701 // lookup existing association in pointer table
702 PtrData* ptr_data = device.find_ptr_data(host_ptr);
703 if (ptr_data != 0) {
704 OFFLOAD_TRACE(3, "Address %p is already mapped on device %d\n",
705 host_ptr, device_num);
706 // Is current device pointer and offset same as existing?
707 if ((void*)ptr_data->mic_addr == device_ptr &&
708 (size_t)ptr_data->alloc_disp == device_offset) {
709 return 0;
710 } else {
711 return 1;
715 // Create association
716 OFFLOAD_TRACE(3, "Creating association for data: addr %p, length %lld\n",
717 host_ptr, size);
719 bool is_new;
720 ptr_data = device.insert_ptr_data(host_ptr, size, is_new);
721 ptr_data->is_omp_associate = true;
723 // create CPU buffer
724 OFFLOAD_TRACE(3,
725 "Creating buffer from source memory %p, length %lld\n",
726 host_ptr, size);
728 // result is not checked because we can continue without cpu
729 // buffer. In this case we will use COIBufferRead/Write
730 // instead of COIBufferCopy.
732 COI::BufferCreateFromMemory(size,
733 COI_BUFFER_OPENCL,
735 host_ptr,
737 &device.get_process(),
738 &ptr_data->cpu_buf);
740 // create MIC buffer
741 OFFLOAD_TRACE(3,
742 "Creating buffer from sink memory: addr %p, size %lld\n",
743 (char *)device_ptr + device_offset, size);
744 res = COI::BufferCreateFromMemory(size,
745 COI_BUFFER_NORMAL,
746 COI_SINK_MEMORY,
747 device_ptr,
749 &device.get_process(),
750 &ptr_data->mic_buf);
751 if (res != COI_SUCCESS) {
752 ptr_data->alloc_ptr_data_lock.unlock();
753 return 1;
756 // make buffer valid on the device.
757 res = COI::BufferSetState(ptr_data->mic_buf,
758 device.get_process(),
759 COI_BUFFER_VALID,
760 COI_BUFFER_NO_MOVE,
761 0, 0, 0);
762 if (res != COI_SUCCESS) {
763 ptr_data->alloc_ptr_data_lock.unlock();
764 return 1;
767 res = COI::BufferSetState(ptr_data->mic_buf,
768 COI_PROCESS_SOURCE,
769 COI_BUFFER_INVALID,
770 COI_BUFFER_NO_MOVE,
771 0, 0, 0);
772 if (res != COI_SUCCESS) {
773 ptr_data->alloc_ptr_data_lock.unlock();
774 return 1;
776 ptr_data->alloc_disp = device_offset;
777 ptr_data->alloc_ptr_data_lock.unlock();
779 return 0;
782 int omp_target_disassociate_ptr(
783 void *host_ptr,
784 int device_num
785 ) __GOMP_NOTHROW
787 COIRESULT res;
789 __offload_init_library();
791 OFFLOAD_TRACE(2, "omp_target_disassociate_ptr(%p, %d)\n",
792 host_ptr, device_num);
794 if (device_num < -1) {
795 LIBOFFLOAD_ERROR(c_invalid_device_number);
796 exit(1);
799 // Dissociating from CPU is treated as failure
800 if (device_num == -1) {
801 return 1;
804 // If OpenMP allows wrap-around for device numbers, enable next line
805 //Engine& device = mic_engines[device_num % mic_engines_total];
806 Engine& device = mic_engines[device_num];
808 // Lookup existing association in pointer table
809 PtrData* ptr_data = device.find_ptr_data(host_ptr);
811 // Attempt to disassociate unassociated pointer is a failure
812 if (ptr_data == 0) {
813 return 1;
816 // Destroy buffers
817 if (ptr_data->cpu_buf != 0) {
818 OFFLOAD_TRACE(3, "Destroying CPU buffer %p\n", ptr_data->cpu_buf);
819 COI::BufferDestroy(ptr_data->cpu_buf);
821 if (ptr_data->mic_buf != 0) {
822 OFFLOAD_TRACE(3, "Destroying MIC buffer %p\n", ptr_data->mic_buf);
823 COI::BufferDestroy(ptr_data->mic_buf);
826 // Remove association from map
827 OFFLOAD_TRACE(3, "Removing association for addr %p\n",
828 ptr_data->cpu_addr.start());
829 device.remove_ptr_data(ptr_data->cpu_addr.start());
831 return 0;
834 // End of OpenMP 4.5 APIs
837 // OpenMP API wrappers
839 static void omp_set_int_target(
840 TARGET_TYPE target_type,
841 int target_number,
842 int setting,
843 const char* f_name
846 OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
847 f_name, 0);
848 if (ofld) {
849 VarDesc vars[1] = {0};
851 vars[0].type.src = c_data;
852 vars[0].type.dst = c_data;
853 vars[0].direction.bits = c_parameter_in;
854 vars[0].size = sizeof(int);
855 vars[0].count = 1;
856 vars[0].ptr = &setting;
858 OFFLOAD_OFFLOAD(ofld, f_name, 0, 1, vars, NULL, 0, 0, 0);
862 static int omp_get_int_target(
863 TARGET_TYPE target_type,
864 int target_number,
865 const char * f_name
868 int setting = 0;
870 OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
871 f_name, 0);
872 if (ofld) {
873 VarDesc vars[1] = {0};
875 vars[0].type.src = c_data;
876 vars[0].type.dst = c_data;
877 vars[0].direction.bits = c_parameter_out;
878 vars[0].size = sizeof(int);
879 vars[0].count = 1;
880 vars[0].ptr = &setting;
882 OFFLOAD_OFFLOAD(ofld, f_name, 0, 1, vars, NULL, 0, 0, 0);
884 return setting;
887 void omp_set_num_threads_target(
888 TARGET_TYPE target_type,
889 int target_number,
890 int num_threads
893 omp_set_int_target(target_type, target_number, num_threads,
894 "omp_set_num_threads_target");
897 int omp_get_max_threads_target(
898 TARGET_TYPE target_type,
899 int target_number
902 return omp_get_int_target(target_type, target_number,
903 "omp_get_max_threads_target");
906 int omp_get_num_procs_target(
907 TARGET_TYPE target_type,
908 int target_number
911 return omp_get_int_target(target_type, target_number,
912 "omp_get_num_procs_target");
915 void omp_set_dynamic_target(
916 TARGET_TYPE target_type,
917 int target_number,
918 int num_threads
921 omp_set_int_target(target_type, target_number, num_threads,
922 "omp_set_dynamic_target");
925 int omp_get_dynamic_target(
926 TARGET_TYPE target_type,
927 int target_number
930 return omp_get_int_target(target_type, target_number,
931 "omp_get_dynamic_target");
934 void omp_set_nested_target(
935 TARGET_TYPE target_type,
936 int target_number,
937 int nested
940 omp_set_int_target(target_type, target_number, nested,
941 "omp_set_nested_target");
944 int omp_get_nested_target(
945 TARGET_TYPE target_type,
946 int target_number
949 return omp_get_int_target(target_type, target_number,
950 "omp_get_nested_target");
953 void omp_set_schedule_target(
954 TARGET_TYPE target_type,
955 int target_number,
956 omp_sched_t kind,
957 int modifier
960 OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
961 __func__, 0);
962 if (ofld != 0) {
963 VarDesc vars[2] = {0};
965 vars[0].type.src = c_data;
966 vars[0].type.dst = c_data;
967 vars[0].direction.bits = c_parameter_in;
968 vars[0].size = sizeof(omp_sched_t);
969 vars[0].count = 1;
970 vars[0].ptr = &kind;
972 vars[1].type.src = c_data;
973 vars[1].type.dst = c_data;
974 vars[1].direction.bits = c_parameter_in;
975 vars[1].size = sizeof(int);
976 vars[1].count = 1;
977 vars[1].ptr = &modifier;
979 OFFLOAD_OFFLOAD(ofld, "omp_set_schedule_target",
980 0, 2, vars, NULL, 0, 0, 0);
984 void omp_get_schedule_target(
985 TARGET_TYPE target_type,
986 int target_number,
987 omp_sched_t *kind,
988 int *modifier
991 OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
992 __func__, 0);
993 if (ofld != 0) {
994 VarDesc vars[2] = {0};
996 vars[0].type.src = c_data;
997 vars[0].type.dst = c_data;
998 vars[0].direction.bits = c_parameter_out;
999 vars[0].size = sizeof(omp_sched_t);
1000 vars[0].count = 1;
1001 vars[0].ptr = kind;
1003 vars[1].type.src = c_data;
1004 vars[1].type.dst = c_data;
1005 vars[1].direction.bits = c_parameter_out;
1006 vars[1].size = sizeof(int);
1007 vars[1].count = 1;
1008 vars[1].ptr = modifier;
1010 OFFLOAD_OFFLOAD(ofld, "omp_get_schedule_target",
1011 0, 2, vars, NULL, 0, 0, 0);
1015 // lock API functions
1017 void omp_init_lock_target(
1018 TARGET_TYPE target_type,
1019 int target_number,
1020 omp_lock_target_t *lock
1023 OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
1024 __func__, 0);
1025 if (ofld != 0) {
1026 VarDesc vars[1] = {0};
1028 vars[0].type.src = c_data;
1029 vars[0].type.dst = c_data;
1030 vars[0].direction.bits = c_parameter_out;
1031 vars[0].size = sizeof(omp_lock_target_t);
1032 vars[0].count = 1;
1033 vars[0].ptr = lock;
1035 OFFLOAD_OFFLOAD(ofld, "omp_init_lock_target",
1036 0, 1, vars, NULL, 0, 0, 0);
1040 void omp_destroy_lock_target(
1041 TARGET_TYPE target_type,
1042 int target_number,
1043 omp_lock_target_t *lock
1046 OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
1047 __func__, 0);
1048 if (ofld != 0) {
1049 VarDesc vars[1] = {0};
1051 vars[0].type.src = c_data;
1052 vars[0].type.dst = c_data;
1053 vars[0].direction.bits = c_parameter_in;
1054 vars[0].size = sizeof(omp_lock_target_t);
1055 vars[0].count = 1;
1056 vars[0].ptr = lock;
1058 OFFLOAD_OFFLOAD(ofld, "omp_destroy_lock_target",
1059 0, 1, vars, NULL, 0, 0, 0);
1063 void omp_set_lock_target(
1064 TARGET_TYPE target_type,
1065 int target_number,
1066 omp_lock_target_t *lock
1069 OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
1070 __func__, 0);
1071 if (ofld != 0) {
1072 VarDesc vars[1] = {0};
1074 vars[0].type.src = c_data;
1075 vars[0].type.dst = c_data;
1076 vars[0].direction.bits = c_parameter_inout;
1077 vars[0].size = sizeof(omp_lock_target_t);
1078 vars[0].count = 1;
1079 vars[0].ptr = lock;
1081 OFFLOAD_OFFLOAD(ofld, "omp_set_lock_target",
1082 0, 1, vars, NULL, 0, 0, 0);
1086 void omp_unset_lock_target(
1087 TARGET_TYPE target_type,
1088 int target_number,
1089 omp_lock_target_t *lock
1092 OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
1093 __func__, 0);
1094 if (ofld != 0) {
1095 VarDesc vars[1] = {0};
1097 vars[0].type.src = c_data;
1098 vars[0].type.dst = c_data;
1099 vars[0].direction.bits = c_parameter_inout;
1100 vars[0].size = sizeof(omp_lock_target_t);
1101 vars[0].count = 1;
1102 vars[0].ptr = lock;
1104 OFFLOAD_OFFLOAD(ofld, "omp_unset_lock_target",
1105 0, 1, vars, NULL, 0, 0, 0);
1109 int omp_test_lock_target(
1110 TARGET_TYPE target_type,
1111 int target_number,
1112 omp_lock_target_t *lock
1115 int result = 0;
1117 OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
1118 __func__, 0);
1119 if (ofld != 0) {
1120 VarDesc vars[2] = {0};
1122 vars[0].type.src = c_data;
1123 vars[0].type.dst = c_data;
1124 vars[0].direction.bits = c_parameter_inout;
1125 vars[0].size = sizeof(omp_lock_target_t);
1126 vars[0].count = 1;
1127 vars[0].ptr = lock;
1129 vars[1].type.src = c_data;
1130 vars[1].type.dst = c_data;
1131 vars[1].direction.bits = c_parameter_out;
1132 vars[1].size = sizeof(int);
1133 vars[1].count = 1;
1134 vars[1].ptr = &result;
1136 OFFLOAD_OFFLOAD(ofld, "omp_test_lock_target",
1137 0, 2, vars, NULL, 0, 0, 0);
1139 return result;
1142 // nested lock API functions
1144 void omp_init_nest_lock_target(
1145 TARGET_TYPE target_type,
1146 int target_number,
1147 omp_nest_lock_target_t *lock
1150 OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
1151 __func__, 0);
1152 if (ofld != 0) {
1153 VarDesc vars[1] = {0};
1155 vars[0].type.src = c_data;
1156 vars[0].type.dst = c_data;
1157 vars[0].direction.bits = c_parameter_out;
1158 vars[0].size = sizeof(omp_nest_lock_target_t);
1159 vars[0].count = 1;
1160 vars[0].ptr = lock;
1162 OFFLOAD_OFFLOAD(ofld, "omp_init_nest_lock_target",
1163 0, 1, vars, NULL, 0, 0, 0);
1167 void omp_destroy_nest_lock_target(
1168 TARGET_TYPE target_type,
1169 int target_number,
1170 omp_nest_lock_target_t *lock
1173 OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
1174 __func__, 0);
1175 if (ofld != 0) {
1176 VarDesc vars[1] = {0};
1178 vars[0].type.src = c_data;
1179 vars[0].type.dst = c_data;
1180 vars[0].direction.bits = c_parameter_in;
1181 vars[0].size = sizeof(omp_nest_lock_target_t);
1182 vars[0].count = 1;
1183 vars[0].ptr = lock;
1185 OFFLOAD_OFFLOAD(ofld, "omp_destroy_nest_lock_target",
1186 0, 1, vars, NULL, 0, 0, 0);
1190 void omp_set_nest_lock_target(
1191 TARGET_TYPE target_type,
1192 int target_number,
1193 omp_nest_lock_target_t *lock
1196 OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
1197 __func__, 0);
1198 if (ofld != 0) {
1199 VarDesc vars[1] = {0};
1201 vars[0].type.src = c_data;
1202 vars[0].type.dst = c_data;
1203 vars[0].direction.bits = c_parameter_inout;
1204 vars[0].size = sizeof(omp_nest_lock_target_t);
1205 vars[0].count = 1;
1206 vars[0].ptr = lock;
1208 OFFLOAD_OFFLOAD(ofld, "omp_set_nest_lock_target",
1209 0, 1, vars, NULL, 0, 0, 0);
1213 void omp_unset_nest_lock_target(
1214 TARGET_TYPE target_type,
1215 int target_number,
1216 omp_nest_lock_target_t *lock
1219 OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
1220 __func__, 0);
1221 if (ofld != 0) {
1222 VarDesc vars[1] = {0};
1224 vars[0].type.src = c_data;
1225 vars[0].type.dst = c_data;
1226 vars[0].direction.bits = c_parameter_inout;
1227 vars[0].size = sizeof(omp_nest_lock_target_t);
1228 vars[0].count = 1;
1229 vars[0].ptr = lock;
1231 OFFLOAD_OFFLOAD(ofld, "omp_unset_nest_lock_target",
1232 0, 1, vars, NULL, 0, 0, 0);
1236 int omp_test_nest_lock_target(
1237 TARGET_TYPE target_type,
1238 int target_number,
1239 omp_nest_lock_target_t *lock
1242 int result = 0;
1244 OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
1245 __func__, 0);
1246 if (ofld != 0) {
1247 VarDesc vars[2] = {0};
1249 vars[0].type.src = c_data;
1250 vars[0].type.dst = c_data;
1251 vars[0].direction.bits = c_parameter_inout;
1252 vars[0].size = sizeof(omp_nest_lock_target_t);
1253 vars[0].count = 1;
1254 vars[0].ptr = lock;
1256 vars[1].type.src = c_data;
1257 vars[1].type.dst = c_data;
1258 vars[1].direction.bits = c_parameter_out;
1259 vars[1].size = sizeof(int);
1260 vars[1].count = 1;
1261 vars[1].ptr = &result;
1263 OFFLOAD_OFFLOAD(ofld, "omp_test_nest_lock_target",
1264 0, 2, vars, NULL, 0, 0, 0);
1266 return result;