1 /* GRAPHITE-OpenCL pass.
2 Copyright (C) 2009, 2010 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass implements transformation of perfect loop nests without
21 dependencies to OpenCL kernels.
23 For example, loop nest like this
25 | for (s_i = lb_i; s_i <= ub_i; s_i += stride_i)
26 | for (s_{i_1} = lb_{i+1}; s_{i+1} <= ub_{i+1}; s_{i+1} += stride_{i+1})
28 | for (s_j = lb_j; s_j <= ub_j; s_j += stride_j)
30 | STMT(s_i, s_{i+1}, ..., s_j);
33 will be transformed to this OpenCL kernel and all required OpenCL
34 calls will be created.
36 | __kernel void opencl_auto_function_N
37 | (base_i, mod_i, step_i, first_i,
39 | base_j, mod_j, step_i, first_j,
42 | unsigned int global_id = get_global_id (0);
43 | unsigned int s_i = ((global_id / base_i) % mod_i) * step_i + first_i;
44 | unsigned int s_j = ((global_id / base_j) % mod_j) * step_j + first_j;
45 | STMT(s_i, s_{i+1}, ..., s_j);
51 #include "coretypes.h"
56 #include "basic-block.h"
57 #include "diagnostic.h"
58 #include "tree-flow.h"
60 #include "tree-dump.h"
63 #include "tree-chrec.h"
64 #include "tree-data-ref.h"
65 #include "tree-scalar-evolution.h"
66 #include "tree-pass.h"
68 #include "value-prof.h"
69 #include "pointer-set.h"
77 /* Variable, which holds OpenCL context. */
78 static GTY(()) tree h_context
;
80 /* Variable, which holds OpenCL command queue. */
81 static GTY(()) tree h_cmd_queue
;
83 /* Variable, which holds OpenCL program for current function. */
84 static GTY(()) tree h_program
;
87 #include "cloog/cloog.h"
89 #include "graphite-ppl.h"
91 #include "graphite-poly.h"
92 #include "graphite-scop-detection.h"
93 #include "graphite-clast-to-gimple.h"
94 #include "graphite-dependences.h"
95 #include "dyn-string.h"
96 #include "graphite-opencl.h"
98 /* Data structure to be used in data_reference_p to opencl_data hash
100 struct map_ref_to_data_def
102 data_reference_p key
;
106 typedef struct map_ref_to_data_def
*map_ref_to_data
;
108 /* Calculate hash value from map_ref_to_data. */
111 map_ref_to_data_to_hash (const void *data
)
113 const struct map_ref_to_data_def
*obj
114 = (const struct map_ref_to_data_def
*) data
;
116 return htab_hash_pointer (obj
->key
);
119 /* Compare to map_ref_to_data pointers. */
122 map_ref_to_data_cmp (const void *v1
, const void *v2
)
124 const struct map_ref_to_data_def
*obj_1
125 = (const struct map_ref_to_data_def
*) v1
;
126 const struct map_ref_to_data_def
*obj_2
127 = (const struct map_ref_to_data_def
*) v2
;
129 return (obj_1
->key
== obj_2
->key
);
132 /* Create new map_ref_to_data with NEW_KEY as key and NEW_VALUE as value. */
134 static map_ref_to_data
135 map_ref_to_data_create (data_reference_p new_key
,
136 opencl_data new_value
)
138 map_ref_to_data tmp
= XNEW (struct map_ref_to_data_def
);
141 tmp
->value
= new_value
;
145 /* Data structure to be used in tree to opencl_data hash table. */
147 struct map_tree_to_data_def
153 typedef struct map_tree_to_data_def
*map_tree_to_data
;
155 /* Calculate hash value from map_tree_to_data. */
158 map_tree_to_data_to_hash (const void *data
)
160 const struct map_tree_to_data_def
*obj
161 = (const struct map_tree_to_data_def
*) data
;
163 return htab_hash_pointer (obj
->key
);
166 /* Compare to map_tree_to_data pointers. */
169 map_tree_to_data_cmp (const void *v1
, const void *v2
)
171 const struct map_tree_to_data_def
*obj_1
172 = (const struct map_tree_to_data_def
*) v1
;
173 const struct map_tree_to_data_def
*obj_2
174 = (const struct map_tree_to_data_def
*) v2
;
176 return (obj_1
->key
== obj_2
->key
);
179 /* Create new map_tree_to_data with NEW_KEY as key and NEW_VALUE as value. */
181 static map_tree_to_data
182 map_tree_to_data_create (tree new_key
,
183 opencl_data new_value
)
185 map_tree_to_data tmp
= XNEW (struct map_tree_to_data_def
);
188 tmp
->value
= new_value
;
192 /* Create and init new temporary variable with name NAME and
196 opencl_create_tmp_var (tree type
, const char *name
)
198 tree tmp
= create_tmp_var (type
, name
);
200 TREE_ADDRESSABLE (tmp
) = 1;
204 /* Create new var in basic block DEST to store EXPR and return it. */
207 opencl_tree_to_var (basic_block dest
, tree expr
)
209 tree type
= TREE_TYPE (expr
);
210 tree var
= opencl_create_tmp_var (type
, "__ocl_general_tmp_var");
211 gimple_stmt_iterator g_iter
= gsi_last_bb (dest
);
213 tree call
= build2 (MODIFY_EXPR
, type
, var
, expr
);
215 force_gimple_operand_gsi (&g_iter
, call
, true, NULL
, false,
216 GSI_CONTINUE_LINKING
);
221 /* Set rw flags to false for all datas, referenced in CODE_GEN. */
224 opencl_fflush_rw_flags (opencl_main code_gen
)
226 VEC (opencl_data
, heap
) *datas
= code_gen
->opencl_function_data
;
230 for (i
= 0; VEC_iterate (opencl_data
, datas
, i
, curr
); i
++)
232 curr
->written_in_current_body
= false;
233 curr
->read_in_current_body
= false;
234 curr
->privatized
= false;
238 /* Create new basic block on CODE_GEN->main edge and update it. */
241 opencl_create_bb (opencl_main code_gen
)
243 basic_block tmp
= split_edge (code_gen
->main_edge
);
245 code_gen
->main_edge
= single_succ_edge (tmp
);
249 /* All kernels of current function. */
250 static VEC(tree
, heap
) *opencl_function_kernels
;
252 /* OpenCL code for all kernels of current function. */
253 static dyn_string_t main_program_src
;
255 /* Delete clast meta DATA. */
258 opencl_clast_meta_delete (opencl_clast_meta data
)
263 opencl_clast_meta_delete (data
->body
);
264 opencl_clast_meta_delete (data
->next
);
265 BITMAP_FREE (data
->modified_on_device
);
266 BITMAP_FREE (data
->modified_on_host
);
268 if (data
->access
!= NULL
)
269 BITMAP_FREE (data
->access
);
271 if (data
->can_be_private
)
272 BITMAP_FREE (data
->can_be_private
);
280 #ifdef ENABLE_CHECKING
281 verify_loop_structure ();
282 verify_dominators (CDI_DOMINATORS
);
283 verify_loop_closed_ssa (true);
287 /* OpenCL definitions. */
288 #define CL_CONTEXT_PLATFORM 0x1084
289 #define CL_CONTEXT_DEVICES 0x1081
290 #define CL_DEVICE_TYPE_CPU (1 << 1)
291 #define CL_DEVICE_TYPE_GPU (1 << 2)
292 #define CL_MEM_COPY_HOST_PTR (1 << 5)
293 #define CL_MEM_USE_HOST_PTR (1 << 3)
294 #define CL_MEM_READ_WRITE (1 << 0)
295 #define CL_MEM_WRITE_ONLY (1 << 1)
296 #define CL_MEM_READ_ONLY (1 << 2)
299 #define DEFOPENCLCODE(CODE, FN_NAME) CODE,
301 /* Enum for all OpenCL functions used in GRAPHITE-OpenCL. */
302 enum OPENCL_FUNCTIONS
304 #include "graphite-opencl-functions.def"
310 #define DEFOPENCLCODE(CODE, FN_NAME) FN_NAME,
312 /* Names of all OpenCL functions, used in GRAPHITE-OpenCL. */
313 static const char *opencl_function_names
[] =
315 #include "graphite-opencl-functions.def"
320 /* This vector holds opencl_data, which represents arrays.
321 Arrays have constant sizes, so buffers for each of them can
322 be created only once. */
323 static VEC (opencl_data
, heap
) *opencl_array_data
;
325 /* Hash table, which maps opencl_data, related to arrays, to
326 trees, which represents corresponding array. */
327 static htab_t array_data_to_tree
;
329 /* Check whether VAR is a zero dimension array. */
332 zero_dim_array_p (tree var
)
334 tree type
= TREE_TYPE (var
);
338 if (TREE_CODE (type
) != ARRAY_TYPE
339 || TREE_CODE (TREE_TYPE (type
)) == ARRAY_TYPE
340 || (domain
= TYPE_DOMAIN (type
)) == NULL
)
343 up_bound
= TYPE_MAX_VALUE (domain
);
345 if (TREE_CODE (up_bound
) != INTEGER_CST
)
348 return TREE_INT_CST_LOW (up_bound
) == 0;
351 /* Check whether NAME is the name of the artificial array, which can be
355 opencl_private_var_name_p (const char *name
)
357 static const char *general_reduction
= "General_Reduction";
358 static const char *close_phi
= "Close_Phi";
359 static const char *cross_bb
= "Cross_BB_scalar_dependence";
360 static const char *commutative
= "Commutative_Associative_Reduction";
366 ((strstr (name
, general_reduction
) == name
)
367 || (strstr (name
, close_phi
) == name
)
368 || (strstr (name
, commutative
) == name
)
369 || (strstr (name
, cross_bb
) == name
));
372 /* Check whether VAR is an artificial array, which can be privatized. */
375 graphite_artificial_array_p (tree var
)
379 if (TREE_CODE (var
) != VAR_DECL
380 || !zero_dim_array_p (var
)
381 || !(name
= DECL_NAME (var
)))
384 return opencl_private_var_name_p (IDENTIFIER_POINTER (name
));
387 /* Get depth of type TYPE scalar (base) part. */
390 opencl_get_non_scalar_type_depth (tree type
)
394 while (TREE_CODE (type
) == ARRAY_TYPE
395 || TREE_CODE (type
) == POINTER_TYPE
)
398 type
= TREE_TYPE (type
);
404 /* Constructors & destructors.
405 <name>_create - creates a new object of such type and returns it.
406 <name>_delete - delete object (like destructor). */
409 opencl_data_create (tree var
, tree size
)
411 opencl_data tmp
= XNEW (struct opencl_data_def
);
412 tree type
= TREE_TYPE (var
);
414 tmp
->can_be_private
= graphite_artificial_array_p (var
);
415 tmp
->exact_object
= var
;
417 tmp
->supported
= TREE_CODE (var
) == VAR_DECL
|| TREE_CODE (var
) == SSA_NAME
;
419 if (TREE_CODE (type
) == ARRAY_TYPE
)
420 var
= build_addr (var
, current_function_decl
);
422 tmp
->data_dim
= opencl_get_non_scalar_type_depth (type
);
425 tmp
->size_value
= size
;
427 = opencl_create_tmp_var (size_type_node
, "__opencl_data_size");
429 tmp
->up_to_date_on_host
= true;
430 tmp
->up_to_date_on_device
= true;
431 tmp
->used_on_device
= false;
432 tmp
->ever_read_on_device
= false;
433 tmp
->ever_written_on_device
= false;
438 opencl_data_delete (opencl_data data
)
444 opencl_main_create (CloogNames
*names
, sese region
, edge main_edge
,
447 opencl_main tmp
= XNEW (struct graphite_opencl_creator
);
449 tmp
->root_names
= names
;
450 tmp
->defined_vars
= NULL
;
451 tmp
->global_defined_vars
= NULL
;
452 tmp
->region
= region
;
453 tmp
->main_edge
= main_edge
;
454 tmp
->main_program
= dyn_string_new (OPENCL_INIT_BUFF_SIZE
);
455 tmp
->current_body
= NULL
;
456 tmp
->clast_meta
= NULL
;
457 tmp
->curr_meta
= NULL
;
458 tmp
->params_index
= params_index
;
459 tmp
->newivs_index
= htab_create (10, clast_name_index_elt_info
,
460 eq_clast_name_indexes
, free
);
461 tmp
->ref_to_data
= htab_create (10, map_ref_to_data_to_hash
,
462 map_ref_to_data_cmp
, free
);
463 tmp
->tree_to_data
= htab_create (10, map_tree_to_data_to_hash
,
464 map_tree_to_data_cmp
, free
);
465 tmp
->newivs
= VEC_alloc (tree
, heap
, 10);
466 tmp
->context_loop
= SESE_ENTRY (region
)->src
->loop_father
;
467 tmp
->opencl_function_data
= VEC_alloc (opencl_data
, heap
,
468 OPENCL_INIT_BUFF_SIZE
);
473 opencl_main_delete (opencl_main data
)
478 dyn_string_delete (data
->main_program
);
479 htab_delete (data
->newivs_index
);
480 htab_delete (data
->ref_to_data
);
481 htab_delete (data
->tree_to_data
);
482 opencl_clast_meta_delete (data
->clast_meta
);
484 for (i
= 0; VEC_iterate (opencl_data
, data
->opencl_function_data
, i
, curr
);
486 if (!curr
->is_static
)
487 opencl_data_delete (curr
);
489 VEC_free (tree
, heap
, data
->newivs
);
490 VEC_free (opencl_data
, heap
, data
->opencl_function_data
);
494 /* Add function call CALL to edge SRC. If FLAG_GRAPHITE_OPENCL_DEBUG is
495 enabled, then add the following:
497 | int result = call ();
498 | if (call == 0 != ZERO_RETURN)
501 Otherwise just add CALL as function call. */
504 opencl_add_safe_call_on_edge (tree call
, bool zero_return
, edge src
)
506 if (!flag_graphite_opencl_debug
)
508 basic_block bb
= split_edge (src
);
509 gimple_stmt_iterator g_iter
= gsi_last_bb (bb
);
511 force_gimple_operand_gsi (&g_iter
, call
, true, NULL
, false,
512 GSI_CONTINUE_LINKING
);
513 return single_succ_edge (bb
);
519 basic_block abort_bb
;
522 gimple_stmt_iterator g_iter
;
526 tree correct_result
= build1 (CONVERT_EXPR
, TREE_TYPE (call
),
528 cmp
= build2 (EQ_EXPR
, boolean_type_node
,
529 call
, correct_result
);
533 tree incorrect_result
= build1 (CONVERT_EXPR
, TREE_TYPE (call
),
536 cmp
= build2 (NE_EXPR
, boolean_type_node
,
537 call
, incorrect_result
);
540 result
= create_empty_if_region_on_edge (src
, cmp
);
541 abort_bb
= get_false_edge_from_guard_bb (src
->dest
)->dest
;
542 abort_funtion
= build_fn_decl ("abort", build_function_type_list
543 (void_type_node
, NULL_TREE
));
545 abort_call
= build_call_expr (abort_funtion
, 0);
547 g_iter
= gsi_last_bb (abort_bb
);
548 force_gimple_operand_gsi (&g_iter
, abort_call
, true, NULL
, false,
549 GSI_CONTINUE_LINKING
);
554 /* Add call CALL to the current edge from CODE_GEN. ZERO_RETURN indicates
555 whether NULL or 0 is the success return value for CALL. */
558 opencl_add_safe_call (opencl_main code_gen
, tree call
, bool zero_return
)
561 = opencl_add_safe_call_on_edge (call
, zero_return
, code_gen
->main_edge
);
564 /* Get base object for OBJ. */
567 opencl_get_base_object_by_tree (tree obj
)
569 while (TREE_CODE (obj
) == INDIRECT_REF
570 || TREE_CODE (obj
) == ARRAY_REF
)
571 obj
= TREE_OPERAND (obj
, 0);
576 /* Get base object from data reference DR. */
579 dr_outermost_base_object (data_reference_p dr
)
581 tree addr
= DR_BASE_ADDRESS (dr
);
585 /* In case, we don't know base object. For example:
593 Just return the innermost object when the base address is unknown. */
594 tree ref
= DR_REF (dr
);
596 return opencl_get_base_object_by_tree (ref
);
599 if (TREE_CODE (addr
) == ADDR_EXPR
)
600 addr
= TREE_OPERAND (addr
, 0);
605 /* Get correct basic block for data with DATA_ID transfer. If DEVICE
606 is true, then it's host to device transfer, otherwise it's device
607 to host transfer. CODE_GEN holds information related to code
611 opencl_get_edge_for_init (opencl_main code_gen
, int data_id
, bool device
)
613 opencl_clast_meta curr
= code_gen
->curr_meta
;
620 opencl_clast_meta parent
= curr
->parent
;
622 = device
? parent
->modified_on_host
: parent
->modified_on_device
;
624 if (bitmap_bit_p (curr_bitmap
, data_id
))
630 return curr
->init_edge
;
633 /* Return tree, which represents function selected by ID.
634 If ID is STATIC_INIT, init all required data. */
637 opencl_create_function_decl (enum OPENCL_FUNCTIONS id
)
639 static tree create_context_from_type_decl
= NULL
;
640 static tree get_context_info_decl
= NULL
;
641 static tree create_command_queue_decl
= NULL
;
642 static tree create_program_with_source_decl
= NULL
;
643 static tree build_program_decl
= NULL
;
644 static tree create_kernel_decl
= NULL
;
645 static tree create_buffer_decl
= NULL
;
646 static tree set_kernel_arg_decl
= NULL
;
647 static tree enqueue_nd_range_kernel_decl
= NULL
;
648 static tree enqueue_read_buffer_decl
= NULL
;
649 static tree enqueue_write_buffer_decl
= NULL
;
650 static tree release_memory_obj_decl
= NULL
;
651 static tree release_context_decl
= NULL
;
652 static tree release_command_queue_decl
= NULL
;
653 static tree release_program_decl
= NULL
;
654 static tree release_kernel_decl
= NULL
;
655 static tree get_platform_ids_decl
= NULL
;
656 static tree get_wait_for_events_decl
= NULL
;
662 tree const_char_type
= build_qualified_type (char_type_node
,
664 tree const_char_ptr
= build_pointer_type (const_char_type
);
665 tree const_char_ptr_ptr
= build_pointer_type (const_char_ptr
);
667 tree const_size_t
= build_qualified_type (size_type_node
,
669 tree const_size_t_ptr
= build_pointer_type (const_size_t
);
671 tree size_t_ptr
= build_pointer_type (size_type_node
);
673 tree cl_device_type
= integer_type_node
;
674 tree cl_context_info
= unsigned_type_node
;
675 tree cl_command_queue_properties
= long_unsigned_type_node
;
676 tree cl_mem_flags
= long_unsigned_type_node
;
678 tree cl_context
= ptr_type_node
;
679 tree cl_context_properties
= ptr_type_node
;
680 tree cl_command_queue
= ptr_type_node
;
681 tree cl_device_id
= ptr_type_node
;
682 tree cl_program
= ptr_type_node
;
683 tree cl_kernel
= ptr_type_node
;
684 tree cl_event
= ptr_type_node
;
685 tree cl_mem
= ptr_type_node
;
687 tree const_cl_event
= build_qualified_type (cl_event
,
689 tree cl_event_ptr
= build_pointer_type (cl_event
);
690 tree const_cl_event_ptr
= build_pointer_type (const_cl_event
);
692 tree const_cl_device_id
= build_qualified_type (cl_device_id
,
694 tree const_cl_device_id_ptr
= build_pointer_type (const_cl_device_id
);
696 tree cl_platford_id
= long_integer_type_node
;
697 tree cl_platford_id_ptr
= build_pointer_type (cl_platford_id
);
701 | clCreateContextFromType (cl_context_properties *properties,
702 | cl_device_type device_type,
703 | void (*pfn_notify) (const char *errinfo,
704 | const void *private_info, size_t cb,
707 | cl_int *errcode_ret) */
709 = build_function_type_list (cl_context
,
710 cl_context_properties
,
714 integer_ptr_type_node
,
716 create_context_from_type_decl
717 = build_fn_decl (opencl_function_names
[CREATE_CONTEXT_FROM_TYPE
],
720 /* | cl_int clGetContextInfo (cl_context context,
721 | cl_context_info param_name,
722 | size_t param_value_size,
724 | size_t *param_value_size_ret) */
726 = build_function_type_list (integer_type_node
,
733 get_context_info_decl
734 = build_fn_decl (opencl_function_names
[GET_CONTEXT_INFO
],
737 /* | cl_command_queue
738 | clCreateCommandQueue (cl_context context,
739 | cl_device_id device,
740 | cl_command_queue_properties properties,
741 | cl_int *errcode_ret) */
743 = build_function_type_list (cl_command_queue
,
746 cl_command_queue_properties
,
747 integer_ptr_type_node
,
749 create_command_queue_decl
750 = build_fn_decl (opencl_function_names
[CREATE_COMMAND_QUEUE
],
753 /* | cl_program clCreateProgramWithSource (cl_context context,
755 | const char **strings,
756 | const size_t *lengths,
757 | cl_int *errcode_ret) */
759 = build_function_type_list (cl_program
,
764 integer_ptr_type_node
,
766 create_program_with_source_decl
767 = build_fn_decl (opencl_function_names
[CREATE_PROGRAM_WITH_SOURCE
],
771 | clBuildProgram (cl_program program,
772 | cl_uint num_devices,
773 | const cl_device_id *device_list,
774 | const char *options,
775 | void (*pfn_notify) (cl_program, void *user_data),
776 | void *user_data) */
778 = build_function_type_list (integer_type_node
,
781 const_cl_device_id_ptr
,
787 = build_fn_decl (opencl_function_names
[BUILD_PROGRAM
],
790 /* | cl_kernel clCreateKernel (cl_program program,
791 | const char *kernel_name,
792 | cl_int *errcode_ret) */
794 = build_function_type_list (cl_kernel
,
797 integer_ptr_type_node
,
801 = build_fn_decl (opencl_function_names
[CREATE_KERNEL
],
804 /* | cl_mem clCreateBuffer (cl_context context,
805 | cl_mem_flags flags,
808 | cl_int *errcode_ret) */
811 = build_function_type_list (cl_mem
,
816 integer_ptr_type_node
,
819 = build_fn_decl (opencl_function_names
[CREATE_BUFFER
],
823 /* | cl_int clSetKernelArg (cl_kernel kernel,
826 | const void *arg_value) */
829 = build_function_type_list (integer_type_node
,
836 = build_fn_decl (opencl_function_names
[SET_KERNEL_ARG
],
839 /* | cl_int clEnqueueNDRangeKernel (cl_command_queue command_queue,
842 | const size_t *global_work_offset,
843 | const size_t *global_work_size,
844 | const size_t *local_work_size,
845 | cl_uint num_events_in_wait_list,
846 | const cl_event *event_wait_list,
847 | cl_event *event) */
850 = build_function_type_list (integer_type_node
,
862 enqueue_nd_range_kernel_decl
863 = build_fn_decl (opencl_function_names
[ENQUEUE_ND_RANGE_KERNEL
],
866 /* | cl_int clEnqueueReadBuffer (cl_command_queue command_queue,
868 | cl_bool blocking_read,
872 | cl_uint num_events_in_wait_list,
873 | const cl_event *event_wait_list,
874 | cl_event *event) */
877 = build_function_type_list (integer_type_node
,
889 enqueue_read_buffer_decl
890 = build_fn_decl (opencl_function_names
[ENQUEUE_READ_BUFFER
],
893 /* | cl_int clEnqueueWriteBuffer (cl_command_queue command_queue,
895 | cl_bool blocking_write,
899 | cl_uint num_events_in_wait_list,
900 | const cl_event *event_wait_list,
901 | cl_event *event) */
904 = build_function_type_list (integer_type_node
,
916 enqueue_write_buffer_decl
917 = build_fn_decl (opencl_function_names
[ENQUEUE_WRITE_BUFFER
],
921 /* cl_int clReleaseMemObject (cl_mem memobj) */
924 = build_function_type_list (integer_type_node
, cl_mem
, NULL_TREE
);
926 release_memory_obj_decl
927 = build_fn_decl (opencl_function_names
[RELEASE_MEMORY_OBJ
],
931 /* cl_int clReleaseContext (cl_context context) */
933 = build_function_type_list (integer_type_node
, cl_context
,
937 = build_fn_decl (opencl_function_names
[RELEASE_CONTEXT
],
940 /* cl_int clReleaseCommandQueue (cl_command_queue command_queue) */
942 = build_function_type_list (integer_type_node
, cl_command_queue
,
945 release_command_queue_decl
946 = build_fn_decl (opencl_function_names
[RELEASE_COMMAND_QUEUE
],
949 /* cl_int clReleaseProgram (cl_program program) */
951 = build_function_type_list (integer_type_node
, cl_program
,
955 = build_fn_decl (opencl_function_names
[RELEASE_PROGRAM
],
958 /* cl_int clReleaseKernel (cl_kernel kernel) */
960 = build_function_type_list (integer_type_node
, cl_kernel
, NULL_TREE
);
963 = build_fn_decl (opencl_function_names
[RELEASE_KERNEL
],
966 /* | cl_int clGetPlatformIDs (cl_uint num_entries,
967 | cl_platform_id *platforms,
968 | cl_uint *num_platforms) */
972 = build_function_type_list (integer_type_node
,
975 build_pointer_type (unsigned_type_node
),
977 get_platform_ids_decl
978 = build_fn_decl (opencl_function_names
[GET_PLATFORM_IDS
],
982 /* | cl_int clWaitForEvents (cl_uint num_events,
983 | const cl_event *event_list) */
986 = build_function_type_list (integer_type_node
,
991 get_wait_for_events_decl
992 = build_fn_decl (opencl_function_names
[WAIT_FOR_EVENTS
],
998 case CREATE_CONTEXT_FROM_TYPE
:
999 return create_context_from_type_decl
;
1001 case GET_CONTEXT_INFO
:
1002 return get_context_info_decl
;
1004 case CREATE_COMMAND_QUEUE
:
1005 return create_command_queue_decl
;
1007 case CREATE_PROGRAM_WITH_SOURCE
:
1008 return create_program_with_source_decl
;
1011 return build_program_decl
;
1014 return create_kernel_decl
;
1017 return create_buffer_decl
;
1019 case SET_KERNEL_ARG
:
1020 return set_kernel_arg_decl
;
1022 case ENQUEUE_ND_RANGE_KERNEL
:
1023 return enqueue_nd_range_kernel_decl
;
1025 case ENQUEUE_READ_BUFFER
:
1026 return enqueue_read_buffer_decl
;
1028 case ENQUEUE_WRITE_BUFFER
:
1029 return enqueue_write_buffer_decl
;
1031 case RELEASE_MEMORY_OBJ
:
1032 return release_memory_obj_decl
;
1034 case RELEASE_CONTEXT
:
1035 return release_context_decl
;
1037 case RELEASE_COMMAND_QUEUE
:
1038 return release_command_queue_decl
;
1040 case RELEASE_PROGRAM
:
1041 return release_program_decl
;
1043 case RELEASE_KERNEL
:
1044 return release_kernel_decl
;
1046 case GET_PLATFORM_IDS
:
1047 return get_platform_ids_decl
;
1049 case WAIT_FOR_EVENTS
:
1050 return get_wait_for_events_decl
;
1052 default: gcc_unreachable ();
1056 /* Add clWaitForEvent (1, EVENT_VAR); call to CODE_GEN->main_edge. */
1059 opencl_wait_for_event (opencl_main code_gen
, tree event_var
)
1061 tree function
= opencl_create_function_decl (WAIT_FOR_EVENTS
);
1062 tree call
= build_call_expr (function
, 2,
1066 opencl_add_safe_call (code_gen
, call
, true);
1069 /* Add host to device memory transfer. DATA - data, which must be
1070 transfered to device. CODE_GEN holds information related to code
1074 opencl_pass_to_device (opencl_main code_gen
, opencl_data data
)
1080 tree index_type
= build_index_type (build_int_cst (NULL_TREE
, 2));
1081 tree array_type
= build_array_type (ptr_type_node
, index_type
);
1082 tree var
= opencl_create_tmp_var (array_type
, "wait_event");
1085 TREE_STATIC (var
) = 1;
1086 assemble_variable (var
, 1, 0, 1);
1088 /* If data is wrutten in device, mark it is not up to date on host. */
1089 if (data
->written_in_current_body
)
1090 data
->up_to_date_on_host
= false;
1092 /* If data is up to date on device, but it was initialized befor
1093 current loop, then mark it as initialized in current loop and
1096 Consider an example: D - device, H - host, W - write, R - read.
1104 While analyzing statement (2), data will be up to date on host
1105 because of statement (1), but while executing after (3) in loop,
1106 (2) will read incorrect data.
1108 So, we have to add device to host memory transfer after statement (3).
1110 if (flag_graphite_opencl_cpu
)
1111 return data
->device_object
;
1113 if (data
->up_to_date_on_device
)
1115 if (!data
->inited_in_current_loop_on_device
1116 && code_gen
&& code_gen
->curr_meta
1117 && code_gen
->curr_meta
->parent
)
1118 VEC_safe_push (opencl_data
, heap
,
1119 code_gen
->curr_meta
->parent
->post_pass_to_device
,
1122 data
->inited_in_current_loop_on_device
= true;
1123 return data
->device_object
;
1126 data
->inited_in_current_loop_on_device
= true;
1127 init_edge
= opencl_get_edge_for_init (code_gen
, data
->id
, true);
1130 function
= opencl_create_function_decl (ENQUEUE_WRITE_BUFFER
);
1132 event_call
= build4 (ARRAY_REF
, ptr_type_node
, var
,
1133 integer_zero_node
, NULL_TREE
, NULL_TREE
);
1134 event_call
= build_addr (event_call
, current_function_decl
);
1135 call
= build_call_expr (function
, 9,
1137 data
->device_object
,
1138 build_int_cst (NULL_TREE
, CL_TRUE
),
1140 data
->size_variable
,
1147 opencl_add_safe_call_on_edge (call
, true, init_edge
);
1149 opencl_add_safe_call (code_gen
, call
, true);
1151 data
->up_to_date_on_device
= true;
1152 opencl_wait_for_event (code_gen
, event_call
);
1153 return data
->device_object
;
1156 /* Add device to host memory transfer. DATA - data, which must be
1157 transfered to host. CODE_GEN holds information related to code
1161 opencl_pass_to_host (opencl_main code_gen
, opencl_data data
)
1168 tree index_type
= build_index_type (build_int_cst (NULL_TREE
, 2));
1169 tree array_type
= build_array_type (ptr_type_node
, index_type
);
1170 tree var
= opencl_create_tmp_var (array_type
, "wait_event");
1173 TREE_STATIC (var
) = 1;
1174 assemble_variable (var
, 1, 0, 1);
1176 if (data
->written_in_current_body
)
1177 data
->up_to_date_on_device
= false;
1179 if (data
->up_to_date_on_host
)
1181 if (!data
->inited_in_current_loop_on_host
1182 && code_gen
&& code_gen
->curr_meta
&&
1183 code_gen
->curr_meta
->parent
)
1184 VEC_safe_push (opencl_data
, heap
,
1185 code_gen
->curr_meta
->parent
->post_pass_to_host
, data
);
1187 data
->inited_in_current_loop_on_host
= true;
1191 data
->inited_in_current_loop_on_host
= true;
1193 if (flag_graphite_opencl_cpu
1194 || data
->privatized
)
1197 init_edge
= opencl_get_edge_for_init (code_gen
, data
->id
, false);
1199 function
= opencl_create_function_decl (ENQUEUE_READ_BUFFER
);
1200 curr_type
= TREE_TYPE (data
->object
);
1201 curr
= data
->object
;
1203 if (TREE_CODE (curr_type
) == ARRAY_TYPE
)
1204 curr
= build_addr (curr
, current_function_decl
);
1206 event_call
= build4 (ARRAY_REF
, ptr_type_node
, var
,
1207 integer_zero_node
, NULL_TREE
, NULL_TREE
);
1208 event_call
= build_addr (event_call
, current_function_decl
);
1210 call
= build_call_expr (function
, 9,
1212 data
->device_object
,
1213 build_int_cst (NULL_TREE
, CL_TRUE
),
1215 data
->size_variable
,
1216 curr
, integer_zero_node
,
1221 opencl_add_safe_call_on_edge (call
, true, init_edge
);
1223 opencl_add_safe_call (code_gen
, call
, true);
1225 opencl_wait_for_event (code_gen
, event_call
);
1226 data
->up_to_date_on_host
= true;
1229 /* Pass all data from device to host. This function must be called when
1230 we need all data to be up to date on host. CODE_GEN holds information
1231 related to code generation. */
1234 opencl_fflush_all_device_buffers_to_host (opencl_main code_gen
)
1236 VEC (opencl_data
, heap
) *datas
= code_gen
->opencl_function_data
;
1239 tree function
= opencl_create_function_decl (RELEASE_MEMORY_OBJ
);
1241 for (i
= 0; VEC_iterate (opencl_data
, datas
, i
, curr
); i
++)
1243 curr
->written_in_current_body
= true;
1244 opencl_pass_to_host (code_gen
, curr
);
1247 for (i
= 0; VEC_iterate (opencl_data
, datas
, i
, curr
); i
++)
1248 if (curr
->used_on_device
&& !curr
->is_static
)
1250 tree var
= curr
->device_object
;
1251 tree call
= build_call_expr (function
, 1, var
);
1253 opencl_add_safe_call (code_gen
, call
, true);
1257 /* Calculate correct flags for clCreateBuffer. READ means, that
1258 buffer must be readable on device, WRITE - that buffer must be
1259 writable on device. */
1262 opencl_get_mem_flags (bool read
, bool write
)
1267 gcc_assert (read
|| write
);
1270 rw_flags
= CL_MEM_READ_WRITE
;
1274 rw_flags
= CL_MEM_READ_ONLY
;
1276 rw_flags
= CL_MEM_WRITE_ONLY
;
1279 if (flag_graphite_opencl_cpu
)
1280 location_flags
= CL_MEM_USE_HOST_PTR
;
1282 location_flags
= CL_MEM_COPY_HOST_PTR
;
1284 return location_flags
| rw_flags
;
1287 /* Create memory on device for DATA and init it by data from host.
1288 ptr is pointer to host memory location. Function returns tree,
1289 corresponding to memory location on device. */
1292 opencl_create_memory_for_pointer (opencl_data data
)
1294 tree ptr
= data
->object
;
1295 tree arr_size
= data
->size_variable
;
1296 tree function
= opencl_create_function_decl (CREATE_BUFFER
);
1297 bool ever_read
= data
->ever_read_on_device
;
1298 bool ever_written
= data
->ever_written_on_device
;
1299 tree mem_flags
= build_int_cst (NULL_TREE
,
1300 opencl_get_mem_flags (ever_read
,
1302 if (TREE_CODE (TREE_TYPE (ptr
)) == ARRAY_TYPE
)
1303 ptr
= build_addr (ptr
, current_function_decl
);
1305 if (flag_graphite_opencl_debug
)
1307 tree result
= opencl_create_tmp_var (integer_type_node
,
1308 "__opencl_create_buffer_result");
1310 return build_call_expr (function
, 5,
1311 h_context
, mem_flags
,
1314 integer_ptr_type_node
,
1318 return build_call_expr (function
, 5,
1319 h_context
, mem_flags
,
1320 arr_size
, ptr
, null_pointer_node
);
1323 /* Create memory buffers on host for all required host memory objects.
1324 CODE_GEN holds information related to code generation. */
1327 opencl_init_all_device_buffers (opencl_main code_gen
)
1329 VEC (opencl_data
, heap
) *datas
= code_gen
->opencl_function_data
;
1332 edge data_init_edge
= single_succ_edge (code_gen
->data_init_bb
);
1334 for (i
= 0; VEC_iterate (opencl_data
, datas
, i
, curr
); i
++)
1338 if (!curr
->used_on_device
|| curr
->is_static
)
1341 tmp
= opencl_create_memory_for_pointer (curr
);
1342 tmp
= build2 (MODIFY_EXPR
, ptr_type_node
, curr
->device_object
, tmp
);
1343 data_init_edge
= opencl_add_safe_call_on_edge (tmp
, false,
1348 /* Create new static void * variable with name __ocl_ + NAME. */
1351 opencl_create_static_ptr_variable (const char *name
)
1353 const char *id_name
= concat ("__ocl_",name
, NULL
);
1354 tree var
= build_decl (UNKNOWN_LOCATION
, VAR_DECL
,
1355 create_tmp_var_name (id_name
), ptr_type_node
);
1356 TREE_STATIC (var
) = 1;
1357 TREE_PUBLIC (var
) = 0;
1358 DECL_ARTIFICIAL (var
) = 1;
1359 TREE_USED (var
) = 1;
1360 TREE_ADDRESSABLE (var
) = 1;
1361 DECL_INITIAL (var
) = null_pointer_node
;
1362 assemble_variable (var
, 1, 0, 1);
1367 /* Insert several opencl calls to output program. */
1369 /* | cl_program h_program;
1370 | h_program = clCreateProgramWithSource (h_context, 1,
1371 | sProgramSource, 0, 0);
1373 SRC is the program source code and DEST is the edge where
1374 call must be inserted. */
1377 opencl_insert_create_program_with_source_call (const char *src
, edge base
)
1379 /* Get tree with function definition. */
1380 tree function
= opencl_create_function_decl (CREATE_PROGRAM_WITH_SOURCE
);
1381 tree code_tree
= build_string_literal (strlen (src
) + 1, src
);
1383 basic_block bb
= split_edge (base
);
1384 tree tmp_var
= opencl_tree_to_var (bb
, code_tree
);
1386 call
= build_call_expr (function
, 5, h_context
,
1388 build_addr (tmp_var
, current_function_decl
),
1392 call
= build2 (MODIFY_EXPR
, ptr_type_node
,
1394 return opencl_add_safe_call_on_edge (call
, false, single_succ_edge (bb
));
1397 /* clBuildProgram (h_program, 0, 0, 0, 0, 0);
1398 BASE is the edge where call must be inserted. */
1401 opencl_insert_build_program_call (edge base
)
1403 tree function
= opencl_create_function_decl (BUILD_PROGRAM
);
1404 tree call
= build_call_expr (function
, 6,
1406 integer_zero_node
, null_pointer_node
,
1407 null_pointer_node
, null_pointer_node
,
1409 return opencl_add_safe_call_on_edge (call
, true, base
);
1412 /* cl_kernel tmm_kernel;
1413 tmp_kernel = clCreateKernel (h_program, func_name, 0);
1414 FUNCTION_NAME is the name of the kernel function,
1415 CODE_GEN holds information related to code generation. */
1418 opencl_insert_create_kernel_call (opencl_main code_gen
,
1419 const char *function_name
)
1421 tree new_kernel_var
;
1423 gimple_stmt_iterator g_iter
;
1429 new_kernel_var
= opencl_create_tmp_var (ptr_type_node
, function_name
);
1430 bb
= split_edge (code_gen
->kernel_edge
);
1431 g_iter
= gsi_last_bb (bb
);
1432 function
= opencl_create_function_decl (CREATE_KERNEL
);
1433 kernel_name
= build_string_literal (strlen (function_name
) + 1,
1435 call
= build_call_expr (function
, 3, h_program
, kernel_name
,
1437 tmp_tree
= build2 (MODIFY_EXPR
, ptr_type_node
,
1438 new_kernel_var
, call
);
1440 code_gen
->kernel_edge
= single_succ_edge (bb
);
1441 force_gimple_operand_gsi (&g_iter
, tmp_tree
, true, NULL
, false,
1442 GSI_CONTINUE_LINKING
);
1443 VEC_safe_push (tree
, heap
, opencl_function_kernels
, new_kernel_var
);
1444 code_gen
->kernel_edge
1445 = opencl_add_safe_call_on_edge (new_kernel_var
, false,
1446 code_gen
->kernel_edge
);
1447 return new_kernel_var
;
1450 /* Init memory on device. Only one levell of pointers are suppoted.
1451 So in case of char ** only array of char * will be created.
1452 Function return tree, corresponding to new pointer (pointer
1455 | cl_mem clCreateBuffer (cl_context context,
1456 | cl_mem_flags flags,
1459 | cl_int *errcode_ret) */
1461 /* Calculate size of data reference, represented by REF. PTR is a
1462 base object of data reference. */
1465 opencl_get_indirect_size (tree ptr
, poly_dr_p ref
)
1467 ptr
= TREE_TYPE (ptr
);
1469 switch (TREE_CODE (ptr
))
1472 return TYPE_SIZE_UNIT (ptr
);
1476 tree inner_type
= TREE_TYPE (ptr
);
1477 tree t
= graphite_outer_subscript_bound (ref
, false);
1478 tree inner_type_size
= TYPE_SIZE_UNIT (inner_type
);
1480 if (inner_type_size
== NULL
)
1483 if (DECL_P (inner_type_size
))
1484 add_referenced_var (inner_type_size
);
1487 t
= fold_build2 (TRUNC_DIV_EXPR
, sizetype
, t
, inner_type_size
);
1488 t
= fold_build2 (PLUS_EXPR
, sizetype
, t
, size_one_node
);
1489 t
= fold_build2 (MULT_EXPR
, sizetype
, t
, inner_type_size
);
1500 /* Create variables for kernel KERNEL arguments. Each argument is
1501 represented by new variable with it's value and it's size. If arg
1502 is a pointer or array, it's represented by device buffer with data
1503 from host memory. CODE_GEN holds information related to code
1507 opencl_init_local_device_memory (opencl_main code_gen
, opencl_body kernel
)
1509 VEC (tree
, heap
) **args
= &kernel
->function_args
;
1510 VEC (tree
, heap
) **args_to_pass
= &kernel
->function_args_to_pass
;
1511 VEC (opencl_data
, heap
) **refs
= &kernel
->data_refs
;
1513 opencl_data curr_data
;
1515 basic_block bb
= opencl_create_bb (code_gen
);
1516 basic_block kernel_bb
= split_edge (code_gen
->kernel_edge
);
1518 code_gen
->kernel_edge
= single_succ_edge (kernel_bb
);
1520 for (i
= 0; VEC_iterate (tree
, *args
, i
, curr
); i
++)
1522 gimple_stmt_iterator g_iter
= gsi_last_bb (bb
);
1523 gimple_stmt_iterator kernel_g_iter
= gsi_last_bb (kernel_bb
);
1524 tree curr_type
= TREE_TYPE (curr
);
1528 tree curr_var
= opencl_create_tmp_var (curr_type
, "__ocl_iv");
1530 if (TREE_CODE (curr
) != PARM_DECL
1531 && TREE_CODE (curr
) != VAR_DECL
)
1533 mov
= build2 (MODIFY_EXPR
, curr_type
, curr_var
, curr
);
1535 force_gimple_operand_gsi (&g_iter
, mov
, false, NULL
, false,
1536 GSI_CONTINUE_LINKING
);
1539 force_gimple_operand_gsi (&g_iter
, curr
, false, curr_var
, false,
1540 GSI_CONTINUE_LINKING
);
1543 new_type
= build_pointer_type (curr_type
);
1544 tmp_var
= opencl_create_tmp_var (new_type
, "__opencl_scalar_arg");
1545 mov
= build1 (ADDR_EXPR
, new_type
, curr
);
1547 mov
= build2 (MODIFY_EXPR
, new_type
, tmp_var
, mov
);
1549 force_gimple_operand_gsi (&kernel_g_iter
, mov
, false, NULL
, false,
1550 GSI_CONTINUE_LINKING
);
1551 VEC_safe_push (tree
, heap
, *args_to_pass
, tmp_var
);
1554 for (i
= 0; VEC_iterate (opencl_data
, *refs
, i
, curr_data
); i
++)
1556 gimple_stmt_iterator kernel_g_iter
= gsi_last_bb (kernel_bb
);
1560 tree curr
= opencl_pass_to_device (code_gen
, curr_data
);
1561 tree curr_type
= ptr_type_node
;
1563 new_type
= build_pointer_type (curr_type
);
1564 tmp_var
= opencl_create_tmp_var (new_type
, "__opencl_non_scalar_arg");
1565 mov
= build1 (ADDR_EXPR
, new_type
, curr
);
1567 mov
= build2 (MODIFY_EXPR
, new_type
, tmp_var
, mov
);
1569 force_gimple_operand_gsi (&kernel_g_iter
, mov
, false, NULL
, false,
1570 GSI_CONTINUE_LINKING
);
1571 VEC_safe_push (tree
, heap
, *args_to_pass
, tmp_var
);
1576 /* cl_int clSetKernelArg (cl_kernel kernel,
1579 const void *arg_value)
1581 Set all kernel args for OpenCL kernel, represented by KERNEL_VAR.
1582 KERNEL holds all data, related to given kernel.
1583 CODE_GEN holds information related to code generation.
1584 All arguments are passed by pointer. */
1587 opencl_pass_kernel_arguments (opencl_main code_gen
, opencl_body kernel
,
1590 VEC (tree
, heap
) *args_to_pass
= kernel
->function_args_to_pass
;
1593 tree function
= opencl_create_function_decl (SET_KERNEL_ARG
);
1595 for (i
= 0; VEC_iterate (tree
, args_to_pass
, i
, arg
); i
++)
1598 = build_call_expr (function
, 4, kernel_var
,
1599 build_int_cst (NULL_TREE
, i
),
1600 TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (arg
))),
1602 opencl_add_safe_call (code_gen
, call
, true);
1606 /* clEnqueueNDRangeKernel (h_cmd_queue, hKernel, 1, 0,
1607 &cnDimension, 0, 0, 0, 0);
1609 Execute kernel, represented by KERNEL_VAR in NUM_OF_EXEC threads.
1610 Use EVENT_VAR as event variable for asynchronous call.
1611 CODE_GEN holds information related to code generation. */
1614 opencl_execute_kernel (opencl_main code_gen
, tree num_of_exec
,
1615 tree kernel_var
, tree event_var
)
1617 tree function
= opencl_create_function_decl (ENQUEUE_ND_RANGE_KERNEL
);
1618 tree num_of_threads
= opencl_create_tmp_var (integer_type_node
,
1619 "__opencl_num_of_exec");
1620 gimple_stmt_iterator g_iter
= gsi_last_bb (opencl_create_bb (code_gen
));
1623 TREE_STATIC (num_of_threads
) = 1;
1624 assemble_variable (num_of_threads
, 1, 0, 1);
1626 call
= build2 (MODIFY_EXPR
, integer_type_node
, num_of_threads
, num_of_exec
);
1628 force_gimple_operand_gsi (&g_iter
, call
, true, NULL
, false,
1629 GSI_CONTINUE_LINKING
);
1631 call
= build1 (ADDR_EXPR
, integer_ptr_type_node
, num_of_threads
);
1633 call
= build_call_expr (function
, 9,
1644 opencl_add_safe_call (code_gen
, call
, true);
1647 /* Place building program from single source string to edge BASE.
1648 Current implementation performs single build per function.
1649 String contains kernels from all scops of current function.
1650 Functions returns true if any kernel has been created. */
1653 opencl_create_function_call (edge base
)
1658 /* Required for addressing types with size less then 4 bytes. */
1659 dyn_string_prepend_cstr
1661 "#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable\n");
1662 /* Required for double type. */
1663 dyn_string_prepend_cstr
1664 (main_program_src
, "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n ");
1665 src
= dyn_string_buf (main_program_src
);
1667 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1669 fprintf (dump_file
, "\nGenerated OpenCL code: \n");
1670 fprintf (dump_file
, "%s", src
);
1673 new_edge
= opencl_insert_create_program_with_source_call (src
, base
);
1675 return opencl_insert_build_program_call (new_edge
);
1678 /* Mark privatizable data for current loop nest. Information where
1679 given data can be privatized is taken from meta information of
1680 current loop nest, which is stored in CODE_GEN. */
1683 opencl_mark_privatized_data (opencl_main code_gen
)
1685 VEC (opencl_data
, heap
) *datas
= code_gen
->opencl_function_data
;
1688 bitmap can_be_private
= code_gen
->curr_meta
->can_be_private
;
1690 for (i
= 0; VEC_iterate (opencl_data
, datas
, i
, curr
); i
++)
1691 curr
->privatized
= bitmap_bit_p (can_be_private
, curr
->id
);
1695 /* Store calculated sizes of all pointers or arrays to variables.
1696 CODE_GEN holds information related to code generation. */
1699 opencl_set_data_size (opencl_main code_gen
)
1701 VEC (opencl_data
, heap
) *datas
= code_gen
->opencl_function_data
;
1704 gimple_stmt_iterator g_iter
= gsi_last_bb (code_gen
->data_init_bb
);
1706 for (i
= 0; VEC_iterate (opencl_data
, datas
, i
, curr
); i
++)
1711 || !curr
->used_on_device
1712 || curr
->size_value
== NULL
)
1715 call
= build2 (MODIFY_EXPR
, size_type_node
,
1716 curr
->size_variable
, curr
->size_value
);
1718 force_gimple_operand_gsi (&g_iter
, call
, true, NULL
, false,
1719 GSI_CONTINUE_LINKING
);
1723 /* Find opencl_data which represents array VAR. */
1726 opencl_get_static_data_by_tree (tree var
)
1728 map_tree_to_data tmp
= map_tree_to_data_create (var
, NULL
);
1729 map_tree_to_data
*slot
1730 = (map_tree_to_data
*) htab_find_slot (array_data_to_tree
,
1735 return (*slot
)->value
;
1739 /* Create required OpenCL variable for given DATA. */
1742 opencl_data_init_object (opencl_data data
)
1744 if (TREE_CODE (TREE_TYPE (data
->exact_object
)) == POINTER_TYPE
)
1747 = opencl_create_tmp_var (ptr_type_node
, "__opencl_data");
1748 data
->is_static
= false;
1752 /* (TREE_CODE (TREE_TYPE (data->exact_object)) == ARRAY_TYPE) */
1753 map_tree_to_data tree_ptr
1754 = map_tree_to_data_create (data
->exact_object
, data
);
1755 map_tree_to_data
*tree_slot
1756 = (map_tree_to_data
*) htab_find_slot (array_data_to_tree
,
1759 gcc_assert (*tree_slot
== NULL
);
1760 *tree_slot
= tree_ptr
;
1763 = opencl_create_static_ptr_variable ("__opencl_data");
1764 data
->is_static
= true;
1765 data
->size_variable
= data
->size_value
;
1766 VEC_safe_push (opencl_data
, heap
, opencl_array_data
, data
);
1770 /* Register reference to DATA via data reference REF_KEY and
1771 variable TREE_KEY in CODE_GEN structures. */
1774 opencl_register_data (opencl_main code_gen
, opencl_data data
,
1775 tree tree_key
, data_reference_p ref_key
)
1777 htab_t ref_to_data
= code_gen
->ref_to_data
;
1778 htab_t tree_to_data
= code_gen
->tree_to_data
;
1779 map_ref_to_data ref_ptr
= map_ref_to_data_create (ref_key
, data
);
1780 map_tree_to_data tree_ptr
= map_tree_to_data_create (tree_key
, data
);
1781 map_ref_to_data
*ref_slot
;
1782 map_tree_to_data
*tree_slot
;
1785 = (map_ref_to_data
*) htab_find_slot (ref_to_data
, ref_ptr
, INSERT
);
1786 gcc_assert (*ref_slot
== NULL
);
1787 *ref_slot
= ref_ptr
;
1790 = (map_tree_to_data
*) htab_find_slot (tree_to_data
, tree_ptr
, INSERT
);
1791 gcc_assert (*tree_slot
== NULL
|| (*tree_slot
)->value
== data
);
1792 *tree_slot
= tree_ptr
;
1795 /* Analyze single data reference REF and update CODE_GEN structures.
1796 If it access data, which has been accessed in data references
1797 before, update it's size. Otherwise add data to array. */
1800 opencl_parse_single_data_ref (poly_dr_p ref
, opencl_main code_gen
)
1802 data_reference_p d_ref
= (data_reference_p
) PDR_CDR (ref
);
1803 tree data_ref_tree
= dr_outermost_base_object (d_ref
);
1804 opencl_data curr
= opencl_get_data_by_tree (code_gen
, data_ref_tree
);
1805 tree size
= opencl_get_indirect_size (data_ref_tree
, ref
);
1809 if (!curr
->is_static
)
1811 if (!size
|| !curr
->size_value
)
1812 curr
->size_value
= NULL
;
1814 curr
->size_value
= fold_build2 (MAX_EXPR
, sizetype
,
1815 size
, curr
->size_value
);
1820 curr
= opencl_get_static_data_by_tree (data_ref_tree
);
1824 curr
= opencl_data_create (data_ref_tree
, size
);
1825 opencl_data_init_object (curr
);
1828 curr
->id
= VEC_length (opencl_data
, code_gen
->opencl_function_data
);
1829 VEC_safe_push (opencl_data
, heap
, code_gen
->opencl_function_data
, curr
);
1832 opencl_register_data (code_gen
, curr
, data_ref_tree
, d_ref
);
1835 /* Analyse all data reference for poly basic block PBB and update CODE_GEN
1839 opencl_parse_data_refs (poly_bb_p pbb
, opencl_main code_gen
)
1841 VEC (poly_dr_p
, heap
) *drs
= PBB_DRS (pbb
);
1845 for (i
= 0; VEC_iterate (poly_dr_p
, drs
, i
, curr
); i
++)
1846 opencl_parse_single_data_ref (curr
, code_gen
);
1849 /* Analyse all data reference for scop M_SCOP and update
1850 CODE_GEN structures. */
1853 opencl_init_data (scop_p m_scop
, opencl_main code_gen
)
1855 VEC (poly_bb_p
, heap
) *bbs
= SCOP_BBS (m_scop
);
1859 for (i
= 0; VEC_iterate (poly_bb_p
, bbs
, i
, curr
); i
++)
1860 opencl_parse_data_refs (curr
, code_gen
);
1863 /* Init basic block in CODE_GEN structures. */
1866 opencl_init_basic_blocks (opencl_main code_gen
)
1868 code_gen
->data_init_bb
= opencl_create_bb (code_gen
);
1869 code_gen
->kernel_edge
= code_gen
->main_edge
;
1872 /* Add function calls to create and launch kernel KERNEL to
1873 CODE_GEN->main_edge. */
1876 opencl_create_gimple_for_body (opencl_body kernel
, opencl_main code_gen
)
1878 tree num_of_exec
= kernel
->num_of_exec
;
1882 = opencl_insert_create_kernel_call (code_gen
, (const char *) kernel
->name
);
1884 tree index_type
= build_index_type (build_int_cst (NULL_TREE
, 2));
1885 tree array_type
= build_array_type (ptr_type_node
, index_type
);
1886 tree var
= opencl_create_tmp_var (array_type
, "wait_event");
1888 TREE_STATIC (var
) = 1;
1889 assemble_variable (var
, 1, 0, 1);
1891 call
= build4 (ARRAY_REF
, ptr_type_node
, var
,
1892 integer_zero_node
, NULL_TREE
, NULL_TREE
);
1893 call
= build_addr (call
, current_function_decl
);
1895 opencl_init_local_device_memory (code_gen
, kernel
);
1896 opencl_pass_kernel_arguments (code_gen
, kernel
, kernel_var
);
1898 opencl_execute_kernel (code_gen
, num_of_exec
, kernel_var
, call
);
1899 opencl_wait_for_event (code_gen
, call
);
1902 /* Prepare memory for gimple (host) statement, represented by PBB.
1903 Copy memory from device to host if it's nessesary.
1904 CODE_GEN holds information related to code generation. */
1907 opencl_prepare_memory_for_gimple_stmt (poly_bb_p pbb
, opencl_main code_gen
)
1909 VEC (poly_dr_p
, heap
) *drs
= PBB_DRS (pbb
);
1913 for (i
= 0; VEC_iterate (poly_dr_p
, drs
, i
, curr
); i
++)
1915 data_reference_p d_ref
= (data_reference_p
) PDR_CDR (curr
);
1919 /* Scalar variables can be passed directly. */
1920 data
= opencl_get_data_by_data_ref (code_gen
, d_ref
);
1922 /* Private variables should not be passed from device to host. */
1923 if (data
->privatized
)
1926 is_read
= DR_IS_READ (d_ref
);
1929 data
->read_in_current_body
= is_read
;
1930 data
->written_in_current_body
= !is_read
;
1931 opencl_pass_to_host (code_gen
, data
);
1934 bitmap_set_bit (code_gen
->curr_meta
->modified_on_host
, data
->id
);
1938 /* Add basic block from clast_user_stmt STMT to gimple.
1939 CODE_GEN holds information related to code generation. */
1942 opencl_add_gimple_for_user_stmt (struct clast_user_stmt
*stmt
,
1943 opencl_main code_gen
)
1946 CloogStatement
*cs
= stmt
->statement
;
1947 poly_bb_p pbb
= (poly_bb_p
) cloog_statement_usr (cs
);
1948 sese region
= code_gen
->region
;
1949 int nb_loops
= number_of_loops ();
1951 VEC (tree
, heap
) *iv_map
= VEC_alloc (tree
, heap
, nb_loops
);
1952 htab_t newivs_index
= code_gen
->newivs_index
;
1953 VEC (tree
, heap
) *newivs
= code_gen
->newivs
;
1955 /* Get basic block to add. */
1956 gbb
= PBB_BLACK_BOX (pbb
);
1958 if (GBB_BB (gbb
) == ENTRY_BLOCK_PTR
)
1962 opencl_fflush_rw_flags (code_gen
);
1964 /* Pass all required memory to host. */
1965 opencl_prepare_memory_for_gimple_stmt (pbb
, code_gen
);
1967 for (i
= 0; i
< nb_loops
; i
++)
1968 VEC_quick_push (tree
, iv_map
, NULL_TREE
);
1970 build_iv_mapping (iv_map
, region
, newivs
, newivs_index
,
1971 stmt
, code_gen
->params_index
);
1973 = copy_bb_and_scalar_dependences (GBB_BB (gbb
), region
,
1974 code_gen
->main_edge
, iv_map
);
1975 VEC_free (tree
, heap
, iv_map
);
1976 recompute_all_dominators ();
1977 update_ssa (TODO_update_ssa
);
1982 /* Delete opencl_body DATA. */
1985 opencl_body_delete (opencl_body data
)
1987 dyn_string_delete (data
->body
);
1988 dyn_string_delete (data
->header
);
1989 dyn_string_delete (data
->pre_header
);
1990 dyn_string_delete (data
->non_scalar_args
);
1991 VEC_free (tree
, heap
, data
->function_args
);
1992 VEC_free (tree
, heap
, data
->function_args_to_pass
);
1993 VEC_free (opencl_data
, heap
, data
->data_refs
);
1997 /* Reset data structures before processing loop, represented by META.
1998 CODE_GEN holds information related to code generation. */
2001 opencl_init_new_loop (opencl_clast_meta meta
, opencl_main code_gen
)
2006 meta
->post_pass_to_host
2007 = VEC_alloc (opencl_data
, heap
, OPENCL_INIT_BUFF_SIZE
);
2008 meta
->post_pass_to_device
2009 = VEC_alloc (opencl_data
, heap
, OPENCL_INIT_BUFF_SIZE
);
2012 VEC_iterate (opencl_data
, code_gen
->opencl_function_data
, i
, curr
);
2015 curr
->inited_in_current_loop_on_host
= false;
2016 curr
->inited_in_current_loop_on_device
= false;
2020 /* Post loop init. Loop through all data stored in POST_PASS_TO_HOST
2021 and POST_PASS_TO_DEVICE vectors in META. This data must be up to
2022 date on host or device respectively at the end of current loop.
2023 CODE_GEN holds information related to code generation. */
2026 opencl_postpass_data (opencl_main code_gen
, opencl_clast_meta meta
)
2032 VEC_iterate (opencl_data
, meta
->post_pass_to_host
, i
, curr
); i
++)
2034 curr
->written_in_current_body
= false;
2035 opencl_pass_to_host (code_gen
, curr
);
2038 if (!flag_graphite_opencl_cpu
)
2040 VEC_iterate (opencl_data
, meta
->post_pass_to_device
, i
, curr
); i
++)
2042 curr
->written_in_current_body
= false;
2043 opencl_pass_to_device (code_gen
, curr
);
2048 VEC (opencl_data
, heap
) *parent_vec_host
2049 = meta
->parent
->post_pass_to_host
;
2050 VEC (opencl_data
, heap
) *parent_vec_device
2051 = meta
->parent
->post_pass_to_device
;
2054 VEC_iterate (opencl_data
, meta
->post_pass_to_host
, i
, curr
); i
++)
2055 VEC_safe_push (opencl_data
, heap
, parent_vec_host
, curr
);
2058 VEC_iterate (opencl_data
, meta
->post_pass_to_device
, i
, curr
); i
++)
2059 VEC_safe_push (opencl_data
, heap
, parent_vec_device
, curr
);
2062 VEC_free (opencl_data
, heap
, meta
->post_pass_to_host
);
2063 VEC_free (opencl_data
, heap
, meta
->post_pass_to_device
);
2066 static void opencl_transform_stmt_list (struct clast_stmt
*, opencl_main
, int);
2068 /* Add loop body, of the loop, represented by S, on host.
2069 Loop body can contain device code.
2070 DEPTH contains depth of given loop in current loop nest.
2071 DEPENDENCY indicates where given loop has any dependencies.
2072 CODE_GEN holds information related to code generation. */
2075 opencl_add_gimple_for_loop (struct clast_for
*s
, opencl_main code_gen
,
2076 int depth
, bool dependency
)
2078 loop_p old_parent
= code_gen
->context_loop
;
2079 loop_p new_loop
= graphite_create_new_loop
2080 (code_gen
->region
, code_gen
->main_edge
, s
, code_gen
->context_loop
,
2081 &code_gen
->newivs
, code_gen
->newivs_index
, code_gen
->params_index
, depth
);
2082 edge last_e
= single_exit (new_loop
);
2083 edge to_body
= single_succ_edge (new_loop
->header
);
2084 basic_block after
= to_body
->dest
;
2085 opencl_clast_meta parent
= code_gen
->curr_meta
->parent
;
2087 last_e
= single_succ_edge (split_edge (last_e
));
2089 code_gen
->context_loop
= new_loop
;
2090 code_gen
->main_edge
= single_succ_edge (new_loop
->header
);
2092 opencl_init_new_loop (parent
, code_gen
);
2093 opencl_transform_stmt_list (s
->body
, code_gen
, depth
+ 1);
2095 code_gen
->context_loop
= old_parent
;
2097 redirect_edge_succ_nodup (code_gen
->main_edge
, after
);
2098 set_immediate_dominator (CDI_DOMINATORS
, code_gen
->main_edge
->dest
,
2099 code_gen
->main_edge
->src
);
2101 opencl_postpass_data (code_gen
, parent
);
2102 code_gen
->main_edge
= last_e
;
2104 if (flag_loop_parallelize_all
&& !dependency
)
2105 new_loop
->can_be_parallel
= true;
2110 /* Add loop, represented by S, on host. Loop body can contain device code.
2111 DEPTH contains depth of given loop in current loop nest.
2112 DEPENDENCY indicates where given loop has any dependencies.
2113 CODE_GEN holds information related to code generation. */
2116 opencl_add_gimple_for_stmt_for (struct clast_for
*s
, opencl_main code_gen
,
2117 int depth
, bool dependency
)
2119 edge last_e
= graphite_create_new_loop_guard (code_gen
->region
,
2120 code_gen
->main_edge
,
2121 s
, code_gen
->newivs
,
2122 code_gen
->newivs_index
,
2123 code_gen
->params_index
);
2124 edge true_e
= get_true_edge_from_guard_bb (code_gen
->main_edge
->dest
);
2126 code_gen
->main_edge
= true_e
;
2127 opencl_add_gimple_for_loop (s
, code_gen
, depth
, dependency
);
2128 code_gen
->main_edge
= last_e
;
2131 /* Calculate parent data access flags in META based on children.
2132 parent->modified_on_host = OR_{forall children} child->modified_on_host.
2133 parent->modified_on_device = OR_{forall children} child->modified_on_device.
2137 opencl_fix_meta_flags (opencl_clast_meta meta
)
2139 opencl_clast_meta curr
= meta
->body
;
2143 bitmap_ior_into (meta
->modified_on_host
, curr
->modified_on_host
);
2144 bitmap_ior_into (meta
->modified_on_device
, curr
->modified_on_device
);
2149 /* Add if statement, represented by S to current gimple.
2150 CODE_GEN holds information related to code generation. */
2153 opencl_add_gimple_for_stmt_guard (struct clast_guard
*s
,
2154 opencl_main code_gen
, int depth
)
2156 edge last_e
= graphite_create_new_guard (code_gen
->region
,
2157 code_gen
->main_edge
, s
,
2159 code_gen
->newivs_index
,
2160 code_gen
->params_index
);
2161 edge true_e
= get_true_edge_from_guard_bb (code_gen
->main_edge
->dest
);
2163 code_gen
->main_edge
= true_e
;
2164 opencl_transform_stmt_list (s
->then
, code_gen
, depth
);
2165 code_gen
->main_edge
= last_e
;
2167 recompute_all_dominators ();
2171 /* Parse clast statement list S, located on depth DEPTH in current loop nest.
2172 This function generates gimple from clast statements, but in case of
2173 stmt_for either host or device code can be generated.
2174 CODE_GEN holds information related to code generation. */
2177 opencl_transform_stmt_list (struct clast_stmt
*s
, opencl_main code_gen
,
2180 bool dump_p
= dump_file
&& (dump_flags
& TDF_DETAILS
);
2182 for ( ; s
; s
= s
->next
)
2184 opencl_clast_meta tmp
= code_gen
->curr_meta
;
2186 if (CLAST_STMT_IS_A (s
, stmt_root
))
2189 else if (CLAST_STMT_IS_A (s
, stmt_user
))
2191 code_gen
->curr_meta
->init_edge
= code_gen
->main_edge
;
2192 opencl_add_gimple_for_user_stmt ((struct clast_user_stmt
*) s
,
2194 code_gen
->curr_meta
= code_gen
->curr_meta
->next
;
2196 else if (CLAST_STMT_IS_A (s
, stmt_for
))
2198 opencl_clast_meta current_clast
= code_gen
->curr_meta
;
2199 struct clast_for
*for_stmt
= (struct clast_for
*) s
;
2200 bool dependency
= false;
2201 bool parallel
= false;
2203 /* If there are dependencies in loop, it can't be parallelized. */
2204 if (!flag_graphite_opencl_no_dep_check
&&
2205 dependency_in_clast_loop_p (code_gen
, current_clast
,
2209 fprintf (dump_file
, "dependency in loop\n");
2215 parallel
= opencl_should_be_parallel_p (code_gen
, current_clast
,
2218 /* Create init block for memory transfer befor loop. */
2219 current_clast
->init_edge
= code_gen
->main_edge
;
2221 if (parallel
&& !dependency
)
2223 opencl_body current_body
;
2225 opencl_fflush_rw_flags (code_gen
);
2226 opencl_mark_privatized_data (code_gen
);
2227 current_clast
->on_device
= true;
2229 = opencl_clast_to_kernel (for_stmt
, code_gen
, depth
);
2231 if (current_body
->num_of_data_writes
)
2233 dyn_string_t header
= current_body
->header
;
2234 dyn_string_t pre_header
= current_body
->pre_header
;
2235 dyn_string_t body
= current_body
->body
;
2237 dyn_string_append (code_gen
->main_program
, header
);
2238 dyn_string_append (code_gen
->main_program
, pre_header
);
2239 dyn_string_append (code_gen
->main_program
, body
);
2241 opencl_create_gimple_for_body (current_body
, code_gen
);
2243 htab_delete (code_gen
->global_defined_vars
);
2244 update_ssa (TODO_update_ssa
);
2246 opencl_body_delete (current_body
);
2247 code_gen
->current_body
= NULL
;
2251 code_gen
->curr_meta
= code_gen
->curr_meta
->body
;
2252 opencl_add_gimple_for_stmt_for (for_stmt
, code_gen
,
2256 opencl_fix_meta_flags (current_clast
);
2257 code_gen
->curr_meta
= current_clast
->next
;
2259 else if (CLAST_STMT_IS_A (s
, stmt_guard
))
2260 opencl_add_gimple_for_stmt_guard ((struct clast_guard
*) s
,
2262 else if (CLAST_STMT_IS_A (s
, stmt_block
))
2263 opencl_transform_stmt_list (((struct clast_block
*) s
)->body
,
2269 opencl_fix_meta_flags (tmp
->parent
);
2273 /* Transform clast statement DATA from scop SCOP to OpenCL calls
2274 in region REGION. Place all calls to edge MAIN. PARAM_INDEX
2275 holds external scop params. */
2278 opencl_transform_clast (struct clast_stmt
*data
, sese region
,
2279 edge main
, scop_p scop
, htab_t params_index
)
2281 opencl_main code_gen
;
2282 /* Create main data struture for code generation. */
2284 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2286 fprintf (dump_file
, "\nGenerating OpenCL code for SCoP: \n");
2287 print_scop (dump_file
, scop
, 0);
2290 code_gen
= opencl_main_create (((struct clast_root
*) data
)->names
,
2291 region
, main
, params_index
);
2293 opencl_init_basic_blocks (code_gen
);
2294 opencl_init_data (scop
, code_gen
);
2296 code_gen
->clast_meta
= opencl_create_meta_from_clast (code_gen
, data
, 1,
2298 code_gen
->curr_meta
= code_gen
->clast_meta
;
2300 opencl_transform_stmt_list (data
, code_gen
, 1);
2302 if (dyn_string_length (code_gen
->main_program
) != 0)
2304 dyn_string_append (main_program_src
, code_gen
->main_program
);
2305 opencl_set_data_size (code_gen
);
2306 opencl_init_all_device_buffers (code_gen
);
2307 opencl_fflush_all_device_buffers_to_host (code_gen
);
2310 recompute_all_dominators ();
2311 update_ssa (TODO_update_ssa
);
2312 opencl_main_delete (code_gen
);
2315 /* Find opencl_data object by host object OBJ in CODE_GEN hash maps. */
2318 opencl_get_data_by_tree (opencl_main code_gen
, tree obj
)
2320 map_tree_to_data tmp
= map_tree_to_data_create (obj
, NULL
);
2321 map_tree_to_data
*slot
2322 = (map_tree_to_data
*) htab_find_slot (code_gen
->tree_to_data
,
2327 return (*slot
)->value
;
2330 /* Find opencl_data object by data reference REF in CODE_GEN hash maps. */
2333 opencl_get_data_by_data_ref (opencl_main code_gen
, data_reference_p ref
)
2335 map_ref_to_data tmp
= map_ref_to_data_create (ref
, NULL
);
2336 map_ref_to_data
*slot
2337 = (map_ref_to_data
*) htab_find_slot (code_gen
->ref_to_data
,
2342 return (*slot
)->value
;
2345 /* Create global variables for opencl code. */
2348 opencl_create_gimple_variables (void)
2350 static bool opencl_var_created
= false;
2352 if (opencl_var_created
)
2355 opencl_var_created
= true;
2357 /* cl_context h_context */
2358 h_context
= opencl_create_static_ptr_variable ("__ocl_h_context");
2360 /* cl_command_queue h_cmd_queue */
2361 h_cmd_queue
= opencl_create_static_ptr_variable ("__ocl_h_cmd_queue");
2365 | clGetContextInfo (h_context, CL_CONTEXT_DEVICES, 0, 0,
2366 | &n_context_descriptor_size);
2368 POINTER_TO_SIZE if &n_context_descriptor_size. */
2371 opencl_create_clGetContextInfo_1 (tree pointer_to_size
)
2373 tree function
= opencl_create_function_decl (GET_CONTEXT_INFO
);
2374 tree zero_pointer
= null_pointer_node
;
2375 tree cl_contex_devices
= build_int_cst (NULL_TREE
, CL_CONTEXT_DEVICES
);
2376 tree context_var
= h_context
;
2378 return build_call_expr (function
, 5,
2387 | clGetContextInfo (h_context, CL_CONTEXT_DEVICES,
2388 | n_context_descriptor_size, A_DEVICES, 0);
2390 POINTER_TO_SIZE if &n_context_descriptor_size. */
2393 opencl_create_clGetContextInfo_2 (tree size
, tree a_devices
)
2395 tree function
= opencl_create_function_decl (GET_CONTEXT_INFO
);
2396 tree zero_pointer
= null_pointer_node
;
2397 tree cl_contex_devices
= build_int_cst (NULL_TREE
, CL_CONTEXT_DEVICES
);
2398 tree context_var
= h_context
;
2400 return build_call_expr (function
, 5,
2408 /* Create context_properties array variable. */
2411 opencl_create_context_properties (void)
2413 tree cl_context_properties_type
= long_integer_type_node
;
2415 tree index_type
= build_index_type (build_int_cst (NULL_TREE
, 3));
2416 tree array_type
= build_array_type (cl_context_properties_type
,
2419 return opencl_create_tmp_var (array_type
, "context_properties");
2422 /* Place calls to obtain current platform id to INIT_EDGE.
2423 Place obtained id to VAR. */
2426 opencl_set_context_properties (edge init_edge
, tree var
)
2428 tree function
= opencl_create_function_decl (GET_PLATFORM_IDS
);
2429 tree cl_context_properties_type
= long_integer_type_node
;
2432 gimple_stmt_iterator g_iter
;
2434 basic_block bb
= split_edge (init_edge
);
2436 init_edge
= single_succ_edge (bb
);
2438 g_iter
= gsi_last_bb (bb
);
2439 call
= build4 (ARRAY_REF
, cl_context_properties_type
,
2440 var
, integer_zero_node
, NULL_TREE
, NULL_TREE
);
2441 call2
= build_int_cst (NULL_TREE
, CL_CONTEXT_PLATFORM
);
2442 call2
= build1 (CONVERT_EXPR
, cl_context_properties_type
, call2
);
2444 call
= build2 (MODIFY_EXPR
, cl_context_properties_type
,
2447 force_gimple_operand_gsi (&g_iter
, call
, true, NULL
, false,
2448 GSI_CONTINUE_LINKING
);
2449 g_iter
= gsi_last_bb (bb
);
2451 call
= build4 (ARRAY_REF
, cl_context_properties_type
,
2452 var
, integer_one_node
, NULL_TREE
, NULL_TREE
);
2453 call2
= build_call_expr (function
, 3,
2455 build_addr (call
, current_function_decl
),
2457 force_gimple_operand_gsi (&g_iter
, call2
, true, NULL
, false,
2458 GSI_CONTINUE_LINKING
);
2460 call
= build4 (ARRAY_REF
, cl_context_properties_type
,
2461 var
, build_int_cst (NULL_TREE
, 2), NULL_TREE
, NULL_TREE
);
2462 call
= build2 (MODIFY_EXPR
, cl_context_properties_type
,
2463 call
, fold_convert (cl_context_properties_type
,
2464 integer_zero_node
));
2465 force_gimple_operand_gsi (&g_iter
, call
, true, NULL
, false,
2466 GSI_CONTINUE_LINKING
);
2471 clCreateContextFromType (PROPERTIES, CL_DEVICE_TYPE_GPU, 0, 0, 0); */
2474 opencl_create_clCreateContextFromType (tree properties
)
2476 tree function
= opencl_create_function_decl (CREATE_CONTEXT_FROM_TYPE
);
2477 tree zero_pointer
= null_pointer_node
;
2479 = build_int_cst (NULL_TREE
, flag_graphite_opencl_cpu
2480 ? CL_DEVICE_TYPE_CPU
: CL_DEVICE_TYPE_GPU
);
2482 return build_call_expr (function
, 5,
2483 build_addr (properties
, current_function_decl
),
2491 clCreateCommandQueue (h_context, DEV_ID, 0, 0); */
2494 opencl_create_clCreateCommandQueue (tree dev_id
)
2497 tree function
= opencl_create_function_decl (CREATE_COMMAND_QUEUE
);
2498 tree zero_pointer
= null_pointer_node
;
2499 tree context
= h_context
;
2501 return build_call_expr (function
, 4,
2508 /* Create call malloc (ARG). */
2511 opencl_create_malloc_call (tree arg
)
2513 tree function_type
= build_function_type_list (ptr_type_node
,
2516 tree function
= build_fn_decl ("malloc", function_type
);
2518 return build_call_expr (function
, 1, arg
);
2521 /* Generate calls for opencl init functions and place them to INIT_EDGE.
2522 Must be called only once in each function. */
2525 opencl_create_init_context (edge init_edge
)
2529 tree n_context_descriptor_size
2530 = opencl_create_tmp_var (size_type_node
, "__ocl_nContextDescriptorSize");
2531 tree a_devices
= opencl_create_tmp_var (build_pointer_type (ptr_type_node
),
2533 tree properties
= opencl_create_context_properties ();
2535 init_edge
= opencl_set_context_properties (init_edge
, properties
);
2536 call
= opencl_create_clCreateContextFromType (properties
);
2537 call
= build2 (MODIFY_EXPR
, TREE_TYPE (h_context
),
2539 init_edge
= opencl_add_safe_call_on_edge (call
, false, init_edge
);
2540 tmp_type
= build_pointer_type
2541 (TREE_TYPE (n_context_descriptor_size
));
2542 call
= build1 (ADDR_EXPR
, tmp_type
,
2543 n_context_descriptor_size
);
2544 call
= opencl_create_clGetContextInfo_1 (call
);
2545 init_edge
= opencl_add_safe_call_on_edge (call
, true, init_edge
);
2546 call
= opencl_create_malloc_call (n_context_descriptor_size
);
2547 call
= fold_convert (TREE_TYPE (a_devices
), call
);
2548 call
= build2 (MODIFY_EXPR
, TREE_TYPE (a_devices
), a_devices
, call
);
2549 init_edge
= opencl_add_safe_call_on_edge (call
, false, init_edge
);
2550 call
= opencl_create_clGetContextInfo_2 (n_context_descriptor_size
,
2552 init_edge
= opencl_add_safe_call_on_edge (call
, true, init_edge
);
2553 tmp_type
= TREE_TYPE (TREE_TYPE (a_devices
));
2554 call
= build1 (INDIRECT_REF
, tmp_type
, a_devices
);
2555 call
= opencl_create_clCreateCommandQueue (call
);
2556 call
= build2 (MODIFY_EXPR
, TREE_TYPE (h_cmd_queue
),
2558 init_edge
= opencl_add_safe_call_on_edge (call
, false, init_edge
);
2562 /* Fill array VEC with all poly basic blocks in clast statement ROOT. */
2565 build_poly_bb_vec (struct clast_stmt
*root
,
2566 VEC (poly_bb_p
, heap
) **vec
)
2570 if (CLAST_STMT_IS_A (root
, stmt_user
))
2573 = (poly_bb_p
) cloog_statement_usr
2574 (((struct clast_user_stmt
*) root
)->statement
);
2576 VEC_safe_push (poly_bb_p
, heap
, *vec
, tmp
);
2579 else if (CLAST_STMT_IS_A (root
, stmt_for
))
2580 build_poly_bb_vec (((struct clast_for
*) root
)->body
, vec
);
2582 else if (CLAST_STMT_IS_A (root
, stmt_guard
))
2583 build_poly_bb_vec (((struct clast_guard
*) root
)->then
, vec
);
2585 else if (CLAST_STMT_IS_A (root
, stmt_block
))
2586 build_poly_bb_vec (((struct clast_block
*) root
)->body
, vec
);
2592 /* Check whether there is a dependency between PBB1 and PBB2 on level LEVEL.
2593 CAN_BE_PRIVATE indicates which variables can be privatizated.
2594 CODE_GEN holds information related to code generation. */
2597 opencl_dependency_between_pbbs_p (opencl_main code_gen
, poly_bb_p pbb1
,
2598 poly_bb_p pbb2
, int level
,
2599 bitmap can_be_private
)
2602 poly_dr_p pdr1
, pdr2
;
2604 timevar_push (TV_GRAPHITE_DATA_DEPS
);
2606 for (i
= 0; VEC_iterate (poly_dr_p
, PBB_DRS (pbb1
), i
, pdr1
); i
++)
2608 data_reference_p ref1
= (data_reference_p
)PDR_CDR (pdr1
);
2609 opencl_data data_1
= opencl_get_data_by_data_ref (code_gen
, ref1
);
2611 if (bitmap_bit_p (can_be_private
, data_1
->id
))
2614 for (j
= 0; VEC_iterate (poly_dr_p
, PBB_DRS (pbb2
), j
, pdr2
); j
++)
2616 data_reference_p ref2
= (data_reference_p
)PDR_CDR (pdr2
);
2618 opencl_data data_2
= opencl_get_data_by_data_ref (code_gen
, ref2
);
2620 if (bitmap_bit_p (can_be_private
, data_2
->id
))
2623 if (graphite_carried_dependence_level_k (pdr1
, pdr2
, level
))
2625 timevar_pop (TV_GRAPHITE_DATA_DEPS
);
2631 timevar_pop (TV_GRAPHITE_DATA_DEPS
);
2635 /* Returns true, if there is dependency in clast loop STMT on depth DEPTH.
2636 CODE_GEN holds information related to code generation. */
2639 dependency_in_clast_loop_p (opencl_main code_gen
, opencl_clast_meta meta
,
2640 struct clast_for
*stmt
, int depth
)
2642 VEC (poly_bb_p
, heap
) *pbbs
= VEC_alloc (poly_bb_p
, heap
, 10);
2643 int level
= get_scattering_level (depth
);
2646 bitmap can_be_private
;
2648 build_poly_bb_vec (stmt
->body
, &pbbs
);
2649 can_be_private
= meta
->can_be_private
;
2651 for (i
= 0; VEC_iterate (poly_bb_p
, pbbs
, i
, pbb1
); i
++)
2656 for (j
= 0; VEC_iterate (poly_bb_p
, pbbs
, j
, pbb2
); j
++)
2657 if (opencl_dependency_between_pbbs_p (code_gen
, pbb1
, pbb1
,
2658 level
, can_be_private
))
2660 VEC_free (poly_bb_p
, heap
, pbbs
);
2665 VEC_free (poly_bb_p
, heap
, pbbs
);
2669 /* Init graphite-opencl pass. Must be called in each function before
2670 any scop processing. */
2673 graphite_opencl_init (void)
2675 opencl_create_gimple_variables ();
2677 /* cl_program h_program */
2679 = opencl_create_static_ptr_variable ("__ocl_h_program");
2681 opencl_function_kernels
= VEC_alloc (tree
, heap
, OPENCL_INIT_BUFF_SIZE
);
2682 main_program_src
= dyn_string_new (100);
2684 opencl_array_data
= VEC_alloc (opencl_data
, heap
, OPENCL_INIT_BUFF_SIZE
);
2685 array_data_to_tree
= htab_create (10, map_tree_to_data_to_hash
,
2686 map_tree_to_data_cmp
, free
);
2688 opencl_create_function_decl (STATIC_INIT
);
2691 /* Create calls to initialize static data for current function and
2692 place them to INIT_EDGE. */
2695 opencl_init_static_data (edge init_edge
)
2700 for (i
= 0; VEC_iterate (opencl_data
, opencl_array_data
, i
, curr
); i
++)
2704 if (!curr
->used_on_device
)
2707 tmp
= opencl_create_memory_for_pointer (curr
);
2708 tmp
= build2 (MODIFY_EXPR
, ptr_type_node
, curr
->device_object
, tmp
);
2709 init_edge
= opencl_add_safe_call_on_edge (tmp
, false, init_edge
);
2715 /* Finalize graphite-opencl pass for current function. Place all required
2716 calls to STATIC_INIT_EDGE. Must be called after all scop processing
2717 in current function. */
2720 graphite_opencl_finalize (edge static_init_edge
)
2725 if (dyn_string_length (main_program_src
) != 0)
2727 tree call
= build2 (EQ_EXPR
, boolean_type_node
,
2728 h_program
, null_pointer_node
);
2729 basic_block buff_init_block
= split_edge (static_init_edge
);
2733 static_init_edge
= single_succ_edge (buff_init_block
);
2735 create_empty_if_region_on_edge (static_init_edge
, call
);
2737 static_init_edge
= opencl_create_function_call
2738 (get_true_edge_from_guard_bb (static_init_edge
->dest
));
2739 static_init_edge
= opencl_init_static_data (static_init_edge
);
2740 before_init
= single_pred_edge (buff_init_block
);
2741 call
= build2 (EQ_EXPR
, boolean_type_node
,
2742 h_context
, null_pointer_node
);
2744 create_empty_if_region_on_edge (before_init
, call
);
2745 init_edge
= get_true_edge_from_guard_bb (before_init
->dest
);
2746 init_edge
= opencl_create_init_context (init_edge
);
2749 dyn_string_delete (main_program_src
);
2751 for (i
= 0; VEC_iterate (opencl_data
, opencl_array_data
, i
, curr
); i
++)
2752 opencl_data_delete (curr
);
2754 VEC_free (tree
, heap
, opencl_function_kernels
);
2755 VEC_free (opencl_data
, heap
, opencl_array_data
);
2756 recompute_all_dominators ();
2757 update_ssa (TODO_update_ssa
);
2760 /* Debug functions for deined data structures. */
2763 dump_flag_to_file (const char *name
, bool cond
,
2764 FILE *file
, int indent
)
2766 indent_to (file
, indent
);
2767 fprintf (file
, "%s = %s", name
, cond
? "true" : "false");
2771 dump_opencl_data (opencl_data data
, FILE *file
, bool verbose
)
2773 fprintf (file
, "Data id = %d\n", data
->id
);
2774 fprintf (file
, "Data dimension = %d\n", data
->data_dim
);
2775 fprintf (file
, "Data depth = %d\n", data
->depth
);
2776 fprintf (file
, "Flags");
2777 indent_to (file
, 2);
2778 fprintf (file
, "Global");
2779 dump_flag_to_file ("Static", data
->is_static
, file
, 4);
2780 dump_flag_to_file ("Can be private", data
->can_be_private
, file
, 4);
2781 dump_flag_to_file ("Used on device", data
->used_on_device
, file
, 4);
2782 dump_flag_to_file ("Ever read on device",
2783 data
->ever_read_on_device
, file
, 4);
2785 dump_flag_to_file ("Ever written on device",
2786 data
->ever_written_on_device
, file
, 4);
2788 dump_flag_to_file ("Supported", data
->supported
, file
, 4);
2789 indent_to (file
, 2);
2790 fprintf (file
, "Local");
2792 dump_flag_to_file ("Up to date on device",
2793 data
->up_to_date_on_device
, file
, 4);
2794 dump_flag_to_file ("Up to date on host",
2795 data
->up_to_date_on_host
, file
, 4);
2797 dump_flag_to_file ("Inited in current loop on host",
2798 data
->inited_in_current_loop_on_host
, file
, 4);
2800 dump_flag_to_file ("Inited in current loop on device",
2801 data
->inited_in_current_loop_on_device
, file
, 4);
2803 dump_flag_to_file ("Written in current body",
2804 data
->written_in_current_body
, file
, 4);
2806 dump_flag_to_file ("Read in current body",
2807 data
->read_in_current_body
, file
, 4);
2808 dump_flag_to_file ("Privatized", data
->privatized
, file
, 4);
2810 fprintf (file
, "\n");
2814 fprintf (file
, "\nObject\n");
2815 print_node_brief (file
, "", data
->object
, 2);
2817 fprintf (file
, "\nDevice object\n");
2818 print_node_brief (file
, "", data
->device_object
, 2);
2820 fprintf (file
, "\nSize value\n");
2821 print_node_brief (file
, "", data
->size_value
, 2);
2823 fprintf (file
, "\nSize variable\n");
2824 print_node_brief (file
, "", data
->size_variable
, 2);
2826 fprintf (file
, "\nExact object\n");
2827 print_node_brief (file
, "", data
->exact_object
, 2);
2832 debug_opencl_data (opencl_data data
, bool verbose
)
2834 dump_opencl_data (data
, stderr
, verbose
);
2838 dump_opencl_body (opencl_body body
, FILE *file
, bool verbose
)
2840 fprintf (file
, "\n%s\n\n", body
->name
);
2841 fprintf (file
, "First iterator: %s\n", body
->first_iter
);
2842 fprintf (file
, "Last iterator: %s\n", body
->last_iter
);
2843 fprintf (file
, "Number of data writes = %d\n\n", body
->num_of_data_writes
);
2844 fprintf (file
, "Function header::\n");
2845 fprintf (file
, "%s\n\n", dyn_string_buf (body
->header
));
2846 fprintf (file
, "Non scalar args::\n");
2847 fprintf (file
, "%s\n\n", dyn_string_buf (body
->non_scalar_args
));
2848 fprintf (file
, "Pre header::\n");
2849 fprintf (file
, "%s\n\n", dyn_string_buf (body
->pre_header
));
2850 fprintf (file
, "Body::\n");
2851 fprintf (file
, "%s\n\n", dyn_string_buf (body
->body
));
2853 fprintf (file
, "Number of executions::\n");
2854 print_node_brief (file
, "", body
->num_of_exec
, 2);
2857 print_clast_stmt (file
, body
->clast_body
);
2861 debug_opencl_body (opencl_body body
, bool verbose
)
2863 dump_opencl_body (body
, stderr
, verbose
);
2867 dump_opencl_clast_meta (opencl_clast_meta meta
, FILE *file
,
2868 bool verbose
, int indent
)
2871 /* Just print structure of meta. */
2875 indent_to (file
, indent
);
2876 fprintf (file
, "<in = %d, out = %d, dev = %s, ok = %s>",
2877 meta
->in_depth
, meta
->out_depth
,
2878 meta
->on_device
?"true":"false",
2879 meta
->access_unsupported
?"false":"true");
2880 dump_opencl_clast_meta (meta
->body
, file
, false, indent
+ 4);
2886 fprintf (file
, "<in = %d, out = %d, dev = %s, ok = %s>",
2887 meta
->in_depth
, meta
->out_depth
,
2888 meta
->on_device
?"true":"false",
2889 meta
->access_unsupported
?"false":"true");
2891 fprintf (file
, "\nModified on host::\n");
2892 debug_bitmap_file (file
, meta
->modified_on_host
);
2894 fprintf (file
, "\nModified on device::\n");
2895 debug_bitmap_file (file
, meta
->modified_on_device
);
2897 fprintf (file
, "\nAccess::\n");
2898 debug_bitmap_file (file
, meta
->access
);
2900 fprintf (file
, "\nCan be private::\n");
2901 debug_bitmap_file (file
, meta
->can_be_private
);
2906 debug_opencl_clast_meta (opencl_clast_meta meta
, bool verbose
)
2908 dump_opencl_clast_meta (meta
, stderr
, verbose
, 0);
2912 print_char_p_htab (void **h
, void *v
)
2914 char **ptr
= (char **) h
;
2915 FILE *file
= (FILE *) v
;
2917 fprintf (file
, " %s\n", *ptr
);
2922 print_tree_to_data_htab (void **h
, void *v
)
2924 map_tree_to_data
*map
= (map_tree_to_data
*) h
;
2925 FILE *file
= (FILE *) v
;
2926 tree key
= (*map
)->key
;
2927 opencl_data data
= (*map
)->value
;
2929 print_node_brief (file
, "key = ", key
, 2);
2930 fprintf (file
, " data_id = %d\n", data
->id
);
2935 print_ref_to_data_htab (void **h
, void *v
)
2937 map_ref_to_data
*map
= (map_ref_to_data
*) h
;
2938 FILE *file
= (FILE *) v
;
2939 data_reference_p key
= (*map
)->key
;
2940 opencl_data data
= (*map
)->value
;
2942 fprintf (file
, "key::\n");
2943 dump_data_reference (file
, key
);
2944 fprintf (file
, "data_id = %d\n\n", data
->id
);
2949 dump_opencl_main (opencl_main code_gen
, FILE *file
, bool verbose
)
2951 fprintf (file
, "Current meta::\n");
2952 dump_opencl_clast_meta (code_gen
->curr_meta
, file
, false, 2);
2953 fprintf (file
, "\n");
2955 if (code_gen
->current_body
)
2957 fprintf (file
, "Current body::\n");
2958 dump_opencl_body (code_gen
->current_body
, file
, verbose
);
2961 fprintf (file
, "\n\nData init basic block::\n");
2962 dump_bb (code_gen
->data_init_bb
, stderr
, 0);
2964 if (code_gen
->defined_vars
)
2966 fprintf (file
, "Defined variables::\n");
2967 htab_traverse_noresize (code_gen
->defined_vars
, print_char_p_htab
,
2971 if (code_gen
->global_defined_vars
)
2973 fprintf (file
, "Global defined variables::\n");
2974 htab_traverse_noresize (code_gen
->global_defined_vars
,
2975 print_char_p_htab
, file
);
2978 fprintf (file
, "Refs to data::\n");
2979 htab_traverse_noresize (code_gen
->ref_to_data
,
2980 print_ref_to_data_htab
, file
);
2982 fprintf (file
, "Trees to data::\n");
2983 htab_traverse_noresize (code_gen
->tree_to_data
,
2984 print_tree_to_data_htab
, file
);
2987 fprintf (file
, "%s\n", dyn_string_buf (code_gen
->main_program
));
2991 debug_opencl_main (opencl_main code_gen
, bool verbose
)
2993 dump_opencl_main (code_gen
, stderr
, verbose
);
2997 debug_opencl_program (void)
2999 fprintf (stderr
, "%s", dyn_string_buf (main_program_src
));
3003 #include "gt-graphite-opencl.h"