Move code out of ifdef HAVE_cloog section.
[official-gcc/graphite-test-results.git] / gcc / graphite-opencl.c
blob9b4743e6fe94ff4d7f93b58cd098bded01c85e3b
1 /* GRAPHITE-OpenCL pass.
2 Copyright (C) 2009, 2010 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass implements transformation of perfect loop nests without
21 dependencies to OpenCL kernels.
23 For example, loop nest like this
25 | for (s_i = lb_i; s_i <= ub_i; s_i += stride_i)
26 | for (s_{i_1} = lb_{i+1}; s_{i+1} <= ub_{i+1}; s_{i+1} += stride_{i+1})
27 | ...
28 | for (s_j = lb_j; s_j <= ub_j; s_j += stride_j)
29 | {
30 | STMT(s_i, s_{i+1}, ..., s_j);
31 | }
33 will be transformed to this OpenCL kernel and all required OpenCL
34 calls will be created.
36 | __kernel void opencl_auto_function_N
37 | (base_i, mod_i, step_i, first_i,
38 | ...,
39 | base_j, mod_j, step_i, first_j,
40 | ...)
41 | {
42 | unsigned int global_id = get_global_id (0);
43 | unsigned int s_i = ((global_id / base_i) % mod_i) * step_i + first_i;
44 | unsigned int s_j = ((global_id / base_j) % mod_j) * step_j + first_j;
45 | STMT(s_i, s_{i+1}, ..., s_j);
46 | }
49 #include "config.h"
50 #include "system.h"
51 #include "coretypes.h"
52 #include "tm.h"
53 #include "ggc.h"
54 #include "tree.h"
55 #include "rtl.h"
56 #include "basic-block.h"
57 #include "diagnostic.h"
58 #include "tree-flow.h"
59 #include "toplev.h"
60 #include "tree-dump.h"
61 #include "timevar.h"
62 #include "cfgloop.h"
63 #include "tree-chrec.h"
64 #include "tree-data-ref.h"
65 #include "tree-scalar-evolution.h"
66 #include "tree-pass.h"
67 #include "domwalk.h"
68 #include "value-prof.h"
69 #include "pointer-set.h"
70 #include "gimple.h"
71 #include "sese.h"
72 #include "output.h"
73 #include <sys/time.h>
74 #include "hashtab.h"
75 #include "tree.h"
77 /* Variable, which holds OpenCL context. */
78 static GTY(()) tree h_context;
80 /* Variable, which holds OpenCL command queue. */
81 static GTY(()) tree h_cmd_queue;
83 /* Variable, which holds OpenCL program for current function. */
84 static GTY(()) tree h_program;
86 #ifdef HAVE_cloog
87 #include "cloog/cloog.h"
88 #include "ppl_c.h"
89 #include "graphite-ppl.h"
90 #include "graphite.h"
91 #include "graphite-poly.h"
92 #include "graphite-scop-detection.h"
93 #include "graphite-clast-to-gimple.h"
94 #include "graphite-dependences.h"
95 #include "dyn-string.h"
96 #include "graphite-opencl.h"
98 /* Data structure to be used in data_reference_p to opencl_data hash
99 table. */
100 struct map_ref_to_data_def
102 data_reference_p key;
103 opencl_data value;
106 typedef struct map_ref_to_data_def *map_ref_to_data;
108 /* Calculate hash value from map_ref_to_data. */
110 static hashval_t
111 map_ref_to_data_to_hash (const void *data)
113 const struct map_ref_to_data_def *obj
114 = (const struct map_ref_to_data_def *) data;
116 return htab_hash_pointer (obj->key);
119 /* Compare to map_ref_to_data pointers. */
121 static int
122 map_ref_to_data_cmp (const void *v1, const void *v2)
124 const struct map_ref_to_data_def *obj_1
125 = (const struct map_ref_to_data_def *) v1;
126 const struct map_ref_to_data_def *obj_2
127 = (const struct map_ref_to_data_def *) v2;
129 return (obj_1->key == obj_2->key);
132 /* Create new map_ref_to_data with NEW_KEY as key and NEW_VALUE as value. */
134 static map_ref_to_data
135 map_ref_to_data_create (data_reference_p new_key,
136 opencl_data new_value)
138 map_ref_to_data tmp = XNEW (struct map_ref_to_data_def);
140 tmp->key = new_key;
141 tmp->value = new_value;
142 return tmp;
145 /* Data structure to be used in tree to opencl_data hash table. */
147 struct map_tree_to_data_def
149 tree key;
150 opencl_data value;
153 typedef struct map_tree_to_data_def *map_tree_to_data;
155 /* Calculate hash value from map_tree_to_data. */
157 static hashval_t
158 map_tree_to_data_to_hash (const void *data)
160 const struct map_tree_to_data_def *obj
161 = (const struct map_tree_to_data_def *) data;
163 return htab_hash_pointer (obj->key);
166 /* Compare to map_tree_to_data pointers. */
168 static int
169 map_tree_to_data_cmp (const void *v1, const void *v2)
171 const struct map_tree_to_data_def *obj_1
172 = (const struct map_tree_to_data_def *) v1;
173 const struct map_tree_to_data_def *obj_2
174 = (const struct map_tree_to_data_def *) v2;
176 return (obj_1->key == obj_2->key);
179 /* Create new map_tree_to_data with NEW_KEY as key and NEW_VALUE as value. */
181 static map_tree_to_data
182 map_tree_to_data_create (tree new_key,
183 opencl_data new_value)
185 map_tree_to_data tmp = XNEW (struct map_tree_to_data_def);
187 tmp->key = new_key;
188 tmp->value = new_value;
189 return tmp;
192 /* Create and init new temporary variable with name NAME and
193 type TYPE. */
195 static tree
196 opencl_create_tmp_var (tree type, const char *name)
198 tree tmp = create_tmp_var (type, name);
200 TREE_ADDRESSABLE (tmp) = 1;
201 return tmp;
204 /* Create new var in basic block DEST to store EXPR and return it. */
206 tree
207 opencl_tree_to_var (basic_block dest, tree expr)
209 tree type = TREE_TYPE (expr);
210 tree var = opencl_create_tmp_var (type, "__ocl_general_tmp_var");
211 gimple_stmt_iterator g_iter = gsi_last_bb (dest);
213 tree call = build2 (MODIFY_EXPR, type, var, expr);
215 force_gimple_operand_gsi (&g_iter, call, true, NULL, false,
216 GSI_CONTINUE_LINKING);
218 return var;
221 /* Set rw flags to false for all datas, referenced in CODE_GEN. */
223 static void
224 opencl_fflush_rw_flags (opencl_main code_gen)
226 VEC (opencl_data, heap) *datas = code_gen->opencl_function_data;
227 int i;
228 opencl_data curr;
230 for (i = 0; VEC_iterate (opencl_data, datas, i, curr); i ++)
232 curr->written_in_current_body = false;
233 curr->read_in_current_body = false;
234 curr->privatized = false;
238 /* Create new basic block on CODE_GEN->main edge and update it. */
240 basic_block
241 opencl_create_bb (opencl_main code_gen)
243 basic_block tmp = split_edge (code_gen->main_edge);
245 code_gen->main_edge = single_succ_edge (tmp);
246 return tmp;
249 /* All kernels of current function. */
250 static VEC(tree, heap) *opencl_function_kernels;
252 /* OpenCL code for all kernels of current function. */
253 static dyn_string_t main_program_src;
255 /* Delete clast meta DATA. */
257 static void
258 opencl_clast_meta_delete (opencl_clast_meta data)
260 if (!data)
261 return;
263 opencl_clast_meta_delete (data->body);
264 opencl_clast_meta_delete (data->next);
265 BITMAP_FREE (data->modified_on_device);
266 BITMAP_FREE (data->modified_on_host);
268 if (data->access != NULL)
269 BITMAP_FREE (data->access);
271 if (data->can_be_private)
272 BITMAP_FREE (data->can_be_private);
274 free (data);
277 static inline void
278 opencl_verify (void)
280 #ifdef ENABLE_CHECKING
281 verify_loop_structure ();
282 verify_dominators (CDI_DOMINATORS);
283 verify_loop_closed_ssa (true);
284 #endif
287 /* OpenCL definitions. */
288 #define CL_CONTEXT_PLATFORM 0x1084
289 #define CL_CONTEXT_DEVICES 0x1081
290 #define CL_DEVICE_TYPE_CPU (1 << 1)
291 #define CL_DEVICE_TYPE_GPU (1 << 2)
292 #define CL_MEM_COPY_HOST_PTR (1 << 5)
293 #define CL_MEM_USE_HOST_PTR (1 << 3)
294 #define CL_MEM_READ_WRITE (1 << 0)
295 #define CL_MEM_WRITE_ONLY (1 << 1)
296 #define CL_MEM_READ_ONLY (1 << 2)
297 #define CL_TRUE 1
299 #define DEFOPENCLCODE(CODE, FN_NAME) CODE,
301 /* Enum for all OpenCL functions used in GRAPHITE-OpenCL. */
302 enum OPENCL_FUNCTIONS
304 #include "graphite-opencl-functions.def"
305 STATIC_INIT
308 #undef DEFOPENCLCODE
310 #define DEFOPENCLCODE(CODE, FN_NAME) FN_NAME,
312 /* Names of all OpenCL functions, used in GRAPHITE-OpenCL. */
313 static const char *opencl_function_names[] =
315 #include "graphite-opencl-functions.def"
318 #undef DEFOPENCLCODE
320 /* This vector holds opencl_data, which represents arrays.
321 Arrays have constant sizes, so buffers for each of them can
322 be created only once. */
323 static VEC (opencl_data, heap) *opencl_array_data;
325 /* Hash table, which maps opencl_data, related to arrays, to
326 trees, which represents corresponding array. */
327 static htab_t array_data_to_tree;
329 /* Check whether VAR is a zero dimension array. */
331 static bool
332 zero_dim_array_p (tree var)
334 tree type = TREE_TYPE (var);
335 tree domain;
336 tree up_bound;
338 if (TREE_CODE (type) != ARRAY_TYPE
339 || TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE
340 || (domain = TYPE_DOMAIN (type)) == NULL)
341 return false;
343 up_bound = TYPE_MAX_VALUE (domain);
345 if (TREE_CODE (up_bound) != INTEGER_CST)
346 return false;
348 return TREE_INT_CST_LOW (up_bound) == 0;
351 /* Check whether NAME is the name of the artificial array, which can be
352 privatized. */
354 static bool
355 opencl_private_var_name_p (const char *name)
357 static const char *general_reduction = "General_Reduction";
358 static const char *close_phi = "Close_Phi";
359 static const char *cross_bb = "Cross_BB_scalar_dependence";
360 static const char *commutative = "Commutative_Associative_Reduction";
362 if (!name)
363 return false;
365 return
366 ((strstr (name, general_reduction) == name)
367 || (strstr (name, close_phi) == name)
368 || (strstr (name, commutative) == name)
369 || (strstr (name, cross_bb) == name));
372 /* Check whether VAR is an artificial array, which can be privatized. */
374 static bool
375 graphite_artificial_array_p (tree var)
377 tree name;
379 if (TREE_CODE (var) != VAR_DECL
380 || !zero_dim_array_p (var)
381 || !(name = DECL_NAME (var)))
382 return false;
384 return opencl_private_var_name_p (IDENTIFIER_POINTER (name));
387 /* Get depth of type TYPE scalar (base) part. */
389 static int
390 opencl_get_non_scalar_type_depth (tree type)
392 int count = 0;
394 while (TREE_CODE (type) == ARRAY_TYPE
395 || TREE_CODE (type) == POINTER_TYPE)
397 count++;
398 type = TREE_TYPE (type);
401 return count;
404 /* Constructors & destructors.
405 <name>_create - creates a new object of such type and returns it.
406 <name>_delete - delete object (like destructor). */
408 static opencl_data
409 opencl_data_create (tree var, tree size)
411 opencl_data tmp = XNEW (struct opencl_data_def);
412 tree type = TREE_TYPE (var);
414 tmp->can_be_private = graphite_artificial_array_p (var);
415 tmp->exact_object = var;
417 tmp->supported = TREE_CODE (var) == VAR_DECL || TREE_CODE (var) == SSA_NAME;
419 if (TREE_CODE (type) == ARRAY_TYPE)
420 var = build_addr (var, current_function_decl);
422 tmp->data_dim = opencl_get_non_scalar_type_depth (type);
423 tmp->object = var;
425 tmp->size_value = size;
426 tmp->size_variable
427 = opencl_create_tmp_var (size_type_node, "__opencl_data_size");
429 tmp->up_to_date_on_host = true;
430 tmp->up_to_date_on_device = true;
431 tmp->used_on_device = false;
432 tmp->ever_read_on_device = false;
433 tmp->ever_written_on_device = false;
434 return tmp;
437 static void
438 opencl_data_delete (opencl_data data)
440 free (data);
443 static opencl_main
444 opencl_main_create (CloogNames *names, sese region, edge main_edge,
445 htab_t params_index)
447 opencl_main tmp = XNEW (struct graphite_opencl_creator);
449 tmp->root_names = names;
450 tmp->defined_vars = NULL;
451 tmp->global_defined_vars = NULL;
452 tmp->region = region;
453 tmp->main_edge = main_edge;
454 tmp->main_program = dyn_string_new (OPENCL_INIT_BUFF_SIZE);
455 tmp->current_body = NULL;
456 tmp->clast_meta = NULL;
457 tmp->curr_meta = NULL;
458 tmp->params_index = params_index;
459 tmp->newivs_index = htab_create (10, clast_name_index_elt_info,
460 eq_clast_name_indexes, free);
461 tmp->ref_to_data = htab_create (10, map_ref_to_data_to_hash,
462 map_ref_to_data_cmp, free);
463 tmp->tree_to_data = htab_create (10, map_tree_to_data_to_hash,
464 map_tree_to_data_cmp, free);
465 tmp->newivs = VEC_alloc (tree, heap, 10);
466 tmp->context_loop = SESE_ENTRY (region)->src->loop_father;
467 tmp->opencl_function_data = VEC_alloc (opencl_data, heap,
468 OPENCL_INIT_BUFF_SIZE);
469 return tmp;
472 static void
473 opencl_main_delete (opencl_main data)
475 int i;
476 opencl_data curr;
478 dyn_string_delete (data->main_program);
479 htab_delete (data->newivs_index);
480 htab_delete (data->ref_to_data);
481 htab_delete (data->tree_to_data);
482 opencl_clast_meta_delete (data->clast_meta);
484 for (i = 0; VEC_iterate (opencl_data, data->opencl_function_data, i, curr);
485 i++)
486 if (!curr->is_static)
487 opencl_data_delete (curr);
489 VEC_free (tree, heap, data->newivs);
490 VEC_free (opencl_data, heap, data->opencl_function_data);
491 free (data);
494 /* Add function call CALL to edge SRC. If FLAG_GRAPHITE_OPENCL_DEBUG is
495 enabled, then add the following:
497 | int result = call ();
498 | if (call == 0 != ZERO_RETURN)
499 | abort ();
501 Otherwise just add CALL as function call. */
503 static edge
504 opencl_add_safe_call_on_edge (tree call, bool zero_return, edge src)
506 if (!flag_graphite_opencl_debug)
508 basic_block bb = split_edge (src);
509 gimple_stmt_iterator g_iter = gsi_last_bb (bb);
511 force_gimple_operand_gsi (&g_iter, call, true, NULL, false,
512 GSI_CONTINUE_LINKING);
513 return single_succ_edge (bb);
515 else
517 tree cmp;
518 edge result;
519 basic_block abort_bb;
520 tree abort_funtion;
521 tree abort_call;
522 gimple_stmt_iterator g_iter;
524 if (zero_return)
526 tree correct_result = build1 (CONVERT_EXPR, TREE_TYPE (call),
527 integer_zero_node);
528 cmp = build2 (EQ_EXPR, boolean_type_node,
529 call, correct_result);
531 else
533 tree incorrect_result = build1 (CONVERT_EXPR, TREE_TYPE (call),
534 null_pointer_node);
536 cmp = build2 (NE_EXPR, boolean_type_node,
537 call, incorrect_result);
540 result = create_empty_if_region_on_edge (src, cmp);
541 abort_bb = get_false_edge_from_guard_bb (src->dest)->dest;
542 abort_funtion = build_fn_decl ("abort", build_function_type_list
543 (void_type_node, NULL_TREE));
545 abort_call = build_call_expr (abort_funtion, 0);
547 g_iter = gsi_last_bb (abort_bb);
548 force_gimple_operand_gsi (&g_iter, abort_call, true, NULL, false,
549 GSI_CONTINUE_LINKING);
550 return result;
554 /* Add call CALL to the current edge from CODE_GEN. ZERO_RETURN indicates
555 whether NULL or 0 is the success return value for CALL. */
557 static void
558 opencl_add_safe_call (opencl_main code_gen, tree call, bool zero_return)
560 code_gen->main_edge
561 = opencl_add_safe_call_on_edge (call, zero_return, code_gen->main_edge);
564 /* Get base object for OBJ. */
566 tree
567 opencl_get_base_object_by_tree (tree obj)
569 while (TREE_CODE (obj) == INDIRECT_REF
570 || TREE_CODE (obj) == ARRAY_REF)
571 obj = TREE_OPERAND (obj, 0);
573 return obj;
576 /* Get base object from data reference DR. */
578 tree
579 dr_outermost_base_object (data_reference_p dr)
581 tree addr = DR_BASE_ADDRESS (dr);
583 if (!addr)
585 /* In case, we don't know base object. For example:
587 | void foo (int *a)
589 | int *b = a + 1;
590 | *b = 0;
593 Just return the innermost object when the base address is unknown. */
594 tree ref = DR_REF (dr);
596 return opencl_get_base_object_by_tree (ref);
599 if (TREE_CODE (addr) == ADDR_EXPR)
600 addr = TREE_OPERAND (addr, 0);
602 return addr;
605 /* Get correct basic block for data with DATA_ID transfer. If DEVICE
606 is true, then it's host to device transfer, otherwise it's device
607 to host transfer. CODE_GEN holds information related to code
608 generation. */
610 static edge
611 opencl_get_edge_for_init (opencl_main code_gen, int data_id, bool device)
613 opencl_clast_meta curr = code_gen->curr_meta;
615 if (!curr)
616 return NULL;
618 while (curr->parent)
620 opencl_clast_meta parent = curr->parent;
621 bitmap curr_bitmap
622 = device ? parent->modified_on_host : parent->modified_on_device;
624 if (bitmap_bit_p (curr_bitmap, data_id))
625 break;
627 curr = curr->parent;
630 return curr->init_edge;
633 /* Return tree, which represents function selected by ID.
634 If ID is STATIC_INIT, init all required data. */
636 static tree
637 opencl_create_function_decl (enum OPENCL_FUNCTIONS id)
639 static tree create_context_from_type_decl = NULL;
640 static tree get_context_info_decl = NULL;
641 static tree create_command_queue_decl = NULL;
642 static tree create_program_with_source_decl = NULL;
643 static tree build_program_decl = NULL;
644 static tree create_kernel_decl = NULL;
645 static tree create_buffer_decl = NULL;
646 static tree set_kernel_arg_decl = NULL;
647 static tree enqueue_nd_range_kernel_decl = NULL;
648 static tree enqueue_read_buffer_decl = NULL;
649 static tree enqueue_write_buffer_decl = NULL;
650 static tree release_memory_obj_decl = NULL;
651 static tree release_context_decl = NULL;
652 static tree release_command_queue_decl = NULL;
653 static tree release_program_decl = NULL;
654 static tree release_kernel_decl = NULL;
655 static tree get_platform_ids_decl = NULL;
656 static tree get_wait_for_events_decl = NULL;
658 switch (id)
660 case STATIC_INIT:
662 tree const_char_type = build_qualified_type (char_type_node,
663 TYPE_QUAL_CONST);
664 tree const_char_ptr = build_pointer_type (const_char_type);
665 tree const_char_ptr_ptr = build_pointer_type (const_char_ptr);
667 tree const_size_t = build_qualified_type (size_type_node,
668 TYPE_QUAL_CONST);
669 tree const_size_t_ptr = build_pointer_type (const_size_t);
671 tree size_t_ptr = build_pointer_type (size_type_node);
673 tree cl_device_type = integer_type_node;
674 tree cl_context_info = unsigned_type_node;
675 tree cl_command_queue_properties = long_unsigned_type_node;
676 tree cl_mem_flags = long_unsigned_type_node;
678 tree cl_context = ptr_type_node;
679 tree cl_context_properties = ptr_type_node;
680 tree cl_command_queue = ptr_type_node;
681 tree cl_device_id = ptr_type_node;
682 tree cl_program = ptr_type_node;
683 tree cl_kernel = ptr_type_node;
684 tree cl_event = ptr_type_node;
685 tree cl_mem = ptr_type_node;
687 tree const_cl_event = build_qualified_type (cl_event,
688 TYPE_QUAL_CONST);
689 tree cl_event_ptr = build_pointer_type (cl_event);
690 tree const_cl_event_ptr = build_pointer_type (const_cl_event);
692 tree const_cl_device_id = build_qualified_type (cl_device_id,
693 TYPE_QUAL_CONST);
694 tree const_cl_device_id_ptr = build_pointer_type (const_cl_device_id);
696 tree cl_platford_id = long_integer_type_node;
697 tree cl_platford_id_ptr = build_pointer_type (cl_platford_id);
699 tree function_type;
700 /* | cl_context
701 | clCreateContextFromType (cl_context_properties *properties,
702 | cl_device_type device_type,
703 | void (*pfn_notify) (const char *errinfo,
704 | const void *private_info, size_t cb,
705 | void *user_data),
706 | void *user_data,
707 | cl_int *errcode_ret) */
708 function_type
709 = build_function_type_list (cl_context,
710 cl_context_properties,
711 cl_device_type,
712 ptr_type_node,
713 ptr_type_node,
714 integer_ptr_type_node,
715 NULL_TREE);
716 create_context_from_type_decl
717 = build_fn_decl (opencl_function_names[CREATE_CONTEXT_FROM_TYPE],
718 function_type);
720 /* | cl_int clGetContextInfo (cl_context context,
721 | cl_context_info param_name,
722 | size_t param_value_size,
723 | void *param_value,
724 | size_t *param_value_size_ret) */
725 function_type
726 = build_function_type_list (integer_type_node,
727 cl_context,
728 cl_context_info,
729 size_type_node,
730 ptr_type_node,
731 size_t_ptr,
732 NULL_TREE);
733 get_context_info_decl
734 = build_fn_decl (opencl_function_names[GET_CONTEXT_INFO],
735 function_type);
737 /* | cl_command_queue
738 | clCreateCommandQueue (cl_context context,
739 | cl_device_id device,
740 | cl_command_queue_properties properties,
741 | cl_int *errcode_ret) */
742 function_type
743 = build_function_type_list (cl_command_queue,
744 cl_context,
745 cl_device_id,
746 cl_command_queue_properties,
747 integer_ptr_type_node,
748 NULL_TREE);
749 create_command_queue_decl
750 = build_fn_decl (opencl_function_names[CREATE_COMMAND_QUEUE],
751 function_type);
753 /* | cl_program clCreateProgramWithSource (cl_context context,
754 | cl_uint count,
755 | const char **strings,
756 | const size_t *lengths,
757 | cl_int *errcode_ret) */
758 function_type
759 = build_function_type_list (cl_program,
760 cl_context,
761 unsigned_type_node,
762 const_char_ptr_ptr,
763 const_size_t_ptr,
764 integer_ptr_type_node,
765 NULL_TREE);
766 create_program_with_source_decl
767 = build_fn_decl (opencl_function_names[CREATE_PROGRAM_WITH_SOURCE],
768 function_type);
770 /* | cl_int
771 | clBuildProgram (cl_program program,
772 | cl_uint num_devices,
773 | const cl_device_id *device_list,
774 | const char *options,
775 | void (*pfn_notify) (cl_program, void *user_data),
776 | void *user_data) */
777 function_type
778 = build_function_type_list (integer_type_node,
779 cl_program,
780 unsigned_type_node,
781 const_cl_device_id_ptr,
782 const_char_ptr,
783 ptr_type_node,
784 ptr_type_node,
785 NULL_TREE);
786 build_program_decl
787 = build_fn_decl (opencl_function_names[BUILD_PROGRAM],
788 function_type);
790 /* | cl_kernel clCreateKernel (cl_program program,
791 | const char *kernel_name,
792 | cl_int *errcode_ret) */
793 function_type
794 = build_function_type_list (cl_kernel,
795 cl_program,
796 const_char_ptr,
797 integer_ptr_type_node,
798 NULL_TREE);
800 create_kernel_decl
801 = build_fn_decl (opencl_function_names[CREATE_KERNEL],
802 function_type);
804 /* | cl_mem clCreateBuffer (cl_context context,
805 | cl_mem_flags flags,
806 | size_t size,
807 | void *host_ptr,
808 | cl_int *errcode_ret) */
810 function_type
811 = build_function_type_list (cl_mem,
812 cl_context,
813 cl_mem_flags,
814 size_type_node,
815 ptr_type_node,
816 integer_ptr_type_node,
817 NULL_TREE);
818 create_buffer_decl
819 = build_fn_decl (opencl_function_names[CREATE_BUFFER],
820 function_type);
823 /* | cl_int clSetKernelArg (cl_kernel kernel,
824 | cl_uint arg_index,
825 | size_t arg_size,
826 | const void *arg_value) */
828 function_type
829 = build_function_type_list (integer_type_node,
830 cl_kernel,
831 unsigned_type_node,
832 size_type_node,
833 const_ptr_type_node,
834 NULL_TREE);
835 set_kernel_arg_decl
836 = build_fn_decl (opencl_function_names[SET_KERNEL_ARG],
837 function_type);
839 /* | cl_int clEnqueueNDRangeKernel (cl_command_queue command_queue,
840 | cl_kernel kernel,
841 | cl_uint work_dim,
842 | const size_t *global_work_offset,
843 | const size_t *global_work_size,
844 | const size_t *local_work_size,
845 | cl_uint num_events_in_wait_list,
846 | const cl_event *event_wait_list,
847 | cl_event *event) */
849 function_type
850 = build_function_type_list (integer_type_node,
851 cl_command_queue,
852 cl_kernel,
853 unsigned_type_node,
854 const_size_t_ptr,
855 const_size_t_ptr,
856 const_size_t_ptr,
857 unsigned_type_node,
858 const_cl_event_ptr,
859 cl_event_ptr,
860 NULL_TREE);
862 enqueue_nd_range_kernel_decl
863 = build_fn_decl (opencl_function_names[ENQUEUE_ND_RANGE_KERNEL],
864 function_type);
866 /* | cl_int clEnqueueReadBuffer (cl_command_queue command_queue,
867 | cl_mem buffer,
868 | cl_bool blocking_read,
869 | size_t offset,
870 | size_t cb,
871 | void *ptr,
872 | cl_uint num_events_in_wait_list,
873 | const cl_event *event_wait_list,
874 | cl_event *event) */
876 function_type
877 = build_function_type_list (integer_type_node,
878 cl_command_queue,
879 cl_mem,
880 unsigned_type_node,
881 size_type_node,
882 size_type_node,
883 ptr_type_node,
884 unsigned_type_node,
885 const_cl_event_ptr,
886 cl_event_ptr,
887 NULL_TREE);
889 enqueue_read_buffer_decl
890 = build_fn_decl (opencl_function_names[ENQUEUE_READ_BUFFER],
891 function_type);
893 /* | cl_int clEnqueueWriteBuffer (cl_command_queue command_queue,
894 | cl_mem buffer,
895 | cl_bool blocking_write,
896 | size_t offset,
897 | size_t cb,
898 | const void *ptr,
899 | cl_uint num_events_in_wait_list,
900 | const cl_event *event_wait_list,
901 | cl_event *event) */
903 function_type
904 = build_function_type_list (integer_type_node,
905 cl_command_queue,
906 cl_mem,
907 unsigned_type_node,
908 size_type_node,
909 size_type_node,
910 const_ptr_type_node,
911 unsigned_type_node,
912 const_cl_event_ptr,
913 cl_event_ptr,
914 NULL_TREE);
916 enqueue_write_buffer_decl
917 = build_fn_decl (opencl_function_names[ENQUEUE_WRITE_BUFFER],
918 function_type);
921 /* cl_int clReleaseMemObject (cl_mem memobj) */
923 function_type
924 = build_function_type_list (integer_type_node, cl_mem, NULL_TREE);
926 release_memory_obj_decl
927 = build_fn_decl (opencl_function_names[RELEASE_MEMORY_OBJ],
928 function_type);
931 /* cl_int clReleaseContext (cl_context context) */
932 function_type
933 = build_function_type_list (integer_type_node, cl_context,
934 NULL_TREE);
936 release_context_decl
937 = build_fn_decl (opencl_function_names[RELEASE_CONTEXT],
938 function_type);
940 /* cl_int clReleaseCommandQueue (cl_command_queue command_queue) */
941 function_type
942 = build_function_type_list (integer_type_node, cl_command_queue,
943 NULL_TREE);
945 release_command_queue_decl
946 = build_fn_decl (opencl_function_names[RELEASE_COMMAND_QUEUE],
947 function_type);
949 /* cl_int clReleaseProgram (cl_program program) */
950 function_type
951 = build_function_type_list (integer_type_node, cl_program,
952 NULL_TREE);
954 release_program_decl
955 = build_fn_decl (opencl_function_names[RELEASE_PROGRAM],
956 function_type);
958 /* cl_int clReleaseKernel (cl_kernel kernel) */
959 function_type
960 = build_function_type_list (integer_type_node, cl_kernel, NULL_TREE);
962 release_kernel_decl
963 = build_fn_decl (opencl_function_names[RELEASE_KERNEL],
964 function_type);
966 /* | cl_int clGetPlatformIDs (cl_uint num_entries,
967 | cl_platform_id *platforms,
968 | cl_uint *num_platforms) */
971 function_type
972 = build_function_type_list (integer_type_node,
973 unsigned_type_node,
974 cl_platford_id_ptr,
975 build_pointer_type (unsigned_type_node),
976 NULL_TREE);
977 get_platform_ids_decl
978 = build_fn_decl (opencl_function_names [GET_PLATFORM_IDS],
979 function_type);
982 /* | cl_int clWaitForEvents (cl_uint num_events,
983 | const cl_event *event_list) */
985 function_type
986 = build_function_type_list (integer_type_node,
987 unsigned_type_node,
988 const_cl_event_ptr,
989 NULL_TREE);
991 get_wait_for_events_decl
992 = build_fn_decl (opencl_function_names [WAIT_FOR_EVENTS],
993 function_type);
995 return NULL_TREE;
998 case CREATE_CONTEXT_FROM_TYPE:
999 return create_context_from_type_decl;
1001 case GET_CONTEXT_INFO:
1002 return get_context_info_decl;
1004 case CREATE_COMMAND_QUEUE:
1005 return create_command_queue_decl;
1007 case CREATE_PROGRAM_WITH_SOURCE:
1008 return create_program_with_source_decl;
1010 case BUILD_PROGRAM:
1011 return build_program_decl;
1013 case CREATE_KERNEL:
1014 return create_kernel_decl;
1016 case CREATE_BUFFER:
1017 return create_buffer_decl;
1019 case SET_KERNEL_ARG:
1020 return set_kernel_arg_decl;
1022 case ENQUEUE_ND_RANGE_KERNEL:
1023 return enqueue_nd_range_kernel_decl;
1025 case ENQUEUE_READ_BUFFER:
1026 return enqueue_read_buffer_decl;
1028 case ENQUEUE_WRITE_BUFFER:
1029 return enqueue_write_buffer_decl;
1031 case RELEASE_MEMORY_OBJ:
1032 return release_memory_obj_decl;
1034 case RELEASE_CONTEXT:
1035 return release_context_decl;
1037 case RELEASE_COMMAND_QUEUE:
1038 return release_command_queue_decl;
1040 case RELEASE_PROGRAM:
1041 return release_program_decl;
1043 case RELEASE_KERNEL:
1044 return release_kernel_decl;
1046 case GET_PLATFORM_IDS:
1047 return get_platform_ids_decl;
1049 case WAIT_FOR_EVENTS:
1050 return get_wait_for_events_decl;
1052 default: gcc_unreachable ();
1056 /* Add clWaitForEvent (1, EVENT_VAR); call to CODE_GEN->main_edge. */
1058 static void
1059 opencl_wait_for_event (opencl_main code_gen, tree event_var)
1061 tree function = opencl_create_function_decl (WAIT_FOR_EVENTS);
1062 tree call = build_call_expr (function, 2,
1063 integer_one_node,
1064 event_var);
1066 opencl_add_safe_call (code_gen, call, true);
1069 /* Add host to device memory transfer. DATA - data, which must be
1070 transfered to device. CODE_GEN holds information related to code
1071 generation. */
1073 static tree
1074 opencl_pass_to_device (opencl_main code_gen, opencl_data data)
1076 edge init_edge;
1077 tree function;
1078 tree call;
1080 tree index_type = build_index_type (build_int_cst (NULL_TREE, 2));
1081 tree array_type = build_array_type (ptr_type_node, index_type);
1082 tree var = opencl_create_tmp_var (array_type, "wait_event");
1083 tree event_call;
1085 TREE_STATIC (var) = 1;
1086 assemble_variable (var, 1, 0, 1);
1088 /* If data is wrutten in device, mark it is not up to date on host. */
1089 if (data->written_in_current_body)
1090 data->up_to_date_on_host = false;
1092 /* If data is up to date on device, but it was initialized befor
1093 current loop, then mark it as initialized in current loop and
1094 store it.
1096 Consider an example: D - device, H - host, W - write, R - read.
1098 | HW(1) -- LOOP
1099 | /\
1100 | / \
1101 | / \
1102 | HR(2) DW(3)
1104 While analyzing statement (2), data will be up to date on host
1105 because of statement (1), but while executing after (3) in loop,
1106 (2) will read incorrect data.
1108 So, we have to add device to host memory transfer after statement (3).
1110 if (flag_graphite_opencl_cpu)
1111 return data->device_object;
1113 if (data->up_to_date_on_device)
1115 if (!data->inited_in_current_loop_on_device
1116 && code_gen && code_gen->curr_meta
1117 && code_gen->curr_meta->parent)
1118 VEC_safe_push (opencl_data, heap,
1119 code_gen->curr_meta->parent->post_pass_to_device,
1120 data);
1122 data->inited_in_current_loop_on_device = true;
1123 return data->device_object;
1126 data->inited_in_current_loop_on_device = true;
1127 init_edge = opencl_get_edge_for_init (code_gen, data->id, true);
1129 /* Add gimple. */
1130 function = opencl_create_function_decl (ENQUEUE_WRITE_BUFFER);
1132 event_call = build4 (ARRAY_REF, ptr_type_node, var,
1133 integer_zero_node, NULL_TREE, NULL_TREE);
1134 event_call = build_addr (event_call, current_function_decl);
1135 call = build_call_expr (function, 9,
1136 h_cmd_queue,
1137 data->device_object,
1138 build_int_cst (NULL_TREE, CL_TRUE),
1139 integer_zero_node,
1140 data->size_variable,
1141 data->object,
1142 integer_zero_node,
1143 null_pointer_node,
1144 event_call);
1146 if (init_edge)
1147 opencl_add_safe_call_on_edge (call, true, init_edge);
1148 else
1149 opencl_add_safe_call (code_gen, call, true);
1151 data->up_to_date_on_device = true;
1152 opencl_wait_for_event (code_gen, event_call);
1153 return data->device_object;
1156 /* Add device to host memory transfer. DATA - data, which must be
1157 transfered to host. CODE_GEN holds information related to code
1158 generation. */
1160 static void
1161 opencl_pass_to_host (opencl_main code_gen, opencl_data data)
1163 edge init_edge;
1164 tree function;
1165 tree curr_type;
1166 tree curr;
1167 tree call;
1168 tree index_type = build_index_type (build_int_cst (NULL_TREE, 2));
1169 tree array_type = build_array_type (ptr_type_node, index_type);
1170 tree var = opencl_create_tmp_var (array_type, "wait_event");
1171 tree event_call;
1173 TREE_STATIC (var) = 1;
1174 assemble_variable (var, 1, 0, 1);
1176 if (data->written_in_current_body)
1177 data->up_to_date_on_device = false;
1179 if (data->up_to_date_on_host)
1181 if (!data->inited_in_current_loop_on_host
1182 && code_gen && code_gen->curr_meta &&
1183 code_gen->curr_meta->parent)
1184 VEC_safe_push (opencl_data, heap,
1185 code_gen->curr_meta->parent->post_pass_to_host, data);
1187 data->inited_in_current_loop_on_host = true;
1188 return;
1191 data->inited_in_current_loop_on_host = true;
1193 if (flag_graphite_opencl_cpu
1194 || data->privatized)
1195 return;
1197 init_edge = opencl_get_edge_for_init (code_gen, data->id, false);
1199 function = opencl_create_function_decl (ENQUEUE_READ_BUFFER);
1200 curr_type = TREE_TYPE (data->object);
1201 curr = data->object;
1203 if (TREE_CODE (curr_type) == ARRAY_TYPE)
1204 curr = build_addr (curr, current_function_decl);
1206 event_call = build4 (ARRAY_REF, ptr_type_node, var,
1207 integer_zero_node, NULL_TREE, NULL_TREE);
1208 event_call = build_addr (event_call, current_function_decl);
1210 call = build_call_expr (function, 9,
1211 h_cmd_queue,
1212 data->device_object,
1213 build_int_cst (NULL_TREE, CL_TRUE),
1214 integer_zero_node,
1215 data->size_variable,
1216 curr, integer_zero_node,
1217 null_pointer_node,
1218 event_call);
1220 if (init_edge)
1221 opencl_add_safe_call_on_edge (call, true, init_edge);
1222 else
1223 opencl_add_safe_call (code_gen, call, true);
1225 opencl_wait_for_event (code_gen, event_call);
1226 data->up_to_date_on_host = true;
1229 /* Pass all data from device to host. This function must be called when
1230 we need all data to be up to date on host. CODE_GEN holds information
1231 related to code generation. */
1233 static void
1234 opencl_fflush_all_device_buffers_to_host (opencl_main code_gen)
1236 VEC (opencl_data, heap) *datas = code_gen->opencl_function_data;
1237 int i;
1238 opencl_data curr;
1239 tree function = opencl_create_function_decl (RELEASE_MEMORY_OBJ);
1241 for (i = 0; VEC_iterate (opencl_data, datas, i, curr); i ++)
1243 curr->written_in_current_body = true;
1244 opencl_pass_to_host (code_gen, curr);
1247 for (i = 0; VEC_iterate (opencl_data, datas, i, curr); i ++)
1248 if (curr->used_on_device && !curr->is_static)
1250 tree var = curr->device_object;
1251 tree call = build_call_expr (function, 1, var);
1253 opencl_add_safe_call (code_gen, call, true);
1257 /* Calculate correct flags for clCreateBuffer. READ means, that
1258 buffer must be readable on device, WRITE - that buffer must be
1259 writable on device. */
1261 static int
1262 opencl_get_mem_flags (bool read, bool write)
1264 int rw_flags;
1265 int location_flags;
1267 gcc_assert (read || write);
1269 if (write && read)
1270 rw_flags = CL_MEM_READ_WRITE;
1271 else
1273 if (read)
1274 rw_flags = CL_MEM_READ_ONLY;
1275 else
1276 rw_flags = CL_MEM_WRITE_ONLY;
1279 if (flag_graphite_opencl_cpu)
1280 location_flags = CL_MEM_USE_HOST_PTR;
1281 else
1282 location_flags = CL_MEM_COPY_HOST_PTR;
1284 return location_flags | rw_flags;
1287 /* Create memory on device for DATA and init it by data from host.
1288 ptr is pointer to host memory location. Function returns tree,
1289 corresponding to memory location on device. */
1291 static tree
1292 opencl_create_memory_for_pointer (opencl_data data)
1294 tree ptr = data->object;
1295 tree arr_size = data->size_variable;
1296 tree function = opencl_create_function_decl (CREATE_BUFFER);
1297 bool ever_read = data->ever_read_on_device;
1298 bool ever_written = data->ever_written_on_device;
1299 tree mem_flags = build_int_cst (NULL_TREE,
1300 opencl_get_mem_flags (ever_read,
1301 ever_written));
1302 if (TREE_CODE (TREE_TYPE (ptr)) == ARRAY_TYPE)
1303 ptr = build_addr (ptr, current_function_decl);
1305 if (flag_graphite_opencl_debug)
1307 tree result = opencl_create_tmp_var (integer_type_node,
1308 "__opencl_create_buffer_result");
1310 return build_call_expr (function, 5,
1311 h_context, mem_flags,
1312 arr_size, ptr,
1313 build1 (ADDR_EXPR,
1314 integer_ptr_type_node,
1315 result));
1317 else
1318 return build_call_expr (function, 5,
1319 h_context, mem_flags,
1320 arr_size, ptr, null_pointer_node);
1323 /* Create memory buffers on host for all required host memory objects.
1324 CODE_GEN holds information related to code generation. */
1326 static void
1327 opencl_init_all_device_buffers (opencl_main code_gen)
1329 VEC (opencl_data, heap) *datas = code_gen->opencl_function_data;
1330 int i;
1331 opencl_data curr;
1332 edge data_init_edge = single_succ_edge (code_gen->data_init_bb);
1334 for (i = 0; VEC_iterate (opencl_data, datas, i, curr); i ++)
1336 tree tmp;
1338 if (!curr->used_on_device || curr->is_static)
1339 continue;
1341 tmp = opencl_create_memory_for_pointer (curr);
1342 tmp = build2 (MODIFY_EXPR, ptr_type_node, curr->device_object, tmp);
1343 data_init_edge = opencl_add_safe_call_on_edge (tmp, false,
1344 data_init_edge);
1348 /* Create new static void * variable with name __ocl_ + NAME. */
1350 static tree
1351 opencl_create_static_ptr_variable (const char *name)
1353 const char *id_name = concat ("__ocl_",name, NULL);
1354 tree var = build_decl (UNKNOWN_LOCATION, VAR_DECL,
1355 create_tmp_var_name (id_name), ptr_type_node);
1356 TREE_STATIC (var) = 1;
1357 TREE_PUBLIC (var) = 0;
1358 DECL_ARTIFICIAL (var) = 1;
1359 TREE_USED (var) = 1;
1360 TREE_ADDRESSABLE (var) = 1;
1361 DECL_INITIAL (var) = null_pointer_node;
1362 assemble_variable (var, 1, 0, 1);
1364 return var;
1367 /* Insert several opencl calls to output program. */
1369 /* | cl_program h_program;
1370 | h_program = clCreateProgramWithSource (h_context, 1,
1371 | sProgramSource, 0, 0);
1373 SRC is the program source code and DEST is the edge where
1374 call must be inserted. */
1376 static edge
1377 opencl_insert_create_program_with_source_call (const char *src, edge base)
1379 /* Get tree with function definition. */
1380 tree function = opencl_create_function_decl (CREATE_PROGRAM_WITH_SOURCE);
1381 tree code_tree = build_string_literal (strlen (src) + 1, src);
1382 tree call;
1383 basic_block bb = split_edge (base);
1384 tree tmp_var = opencl_tree_to_var (bb, code_tree);
1386 call = build_call_expr (function, 5, h_context,
1387 integer_one_node,
1388 build_addr (tmp_var, current_function_decl),
1389 null_pointer_node,
1390 null_pointer_node);
1392 call = build2 (MODIFY_EXPR, ptr_type_node,
1393 h_program, call);
1394 return opencl_add_safe_call_on_edge (call, false, single_succ_edge (bb));
1397 /* clBuildProgram (h_program, 0, 0, 0, 0, 0);
1398 BASE is the edge where call must be inserted. */
1400 static edge
1401 opencl_insert_build_program_call (edge base)
1403 tree function = opencl_create_function_decl (BUILD_PROGRAM);
1404 tree call = build_call_expr (function, 6,
1405 h_program,
1406 integer_zero_node, null_pointer_node,
1407 null_pointer_node, null_pointer_node,
1408 null_pointer_node);
1409 return opencl_add_safe_call_on_edge (call, true, base);
1412 /* cl_kernel tmm_kernel;
1413 tmp_kernel = clCreateKernel (h_program, func_name, 0);
1414 FUNCTION_NAME is the name of the kernel function,
1415 CODE_GEN holds information related to code generation. */
1417 static tree
1418 opencl_insert_create_kernel_call (opencl_main code_gen,
1419 const char *function_name)
1421 tree new_kernel_var;
1422 basic_block bb;
1423 gimple_stmt_iterator g_iter;
1424 tree function;
1425 tree kernel_name;
1426 tree call;
1427 tree tmp_tree;
1429 new_kernel_var = opencl_create_tmp_var (ptr_type_node, function_name);
1430 bb = split_edge (code_gen->kernel_edge);
1431 g_iter = gsi_last_bb (bb);
1432 function = opencl_create_function_decl (CREATE_KERNEL);
1433 kernel_name = build_string_literal (strlen (function_name) + 1,
1434 function_name);
1435 call = build_call_expr (function, 3, h_program, kernel_name,
1436 null_pointer_node);
1437 tmp_tree = build2 (MODIFY_EXPR, ptr_type_node,
1438 new_kernel_var, call);
1440 code_gen->kernel_edge = single_succ_edge (bb);
1441 force_gimple_operand_gsi (&g_iter, tmp_tree, true, NULL, false,
1442 GSI_CONTINUE_LINKING);
1443 VEC_safe_push (tree, heap, opencl_function_kernels, new_kernel_var);
1444 code_gen->kernel_edge
1445 = opencl_add_safe_call_on_edge (new_kernel_var, false,
1446 code_gen->kernel_edge);
1447 return new_kernel_var;
1450 /* Init memory on device. Only one levell of pointers are suppoted.
1451 So in case of char ** only array of char * will be created.
1452 Function return tree, corresponding to new pointer (pointer
1453 on device).
1455 | cl_mem clCreateBuffer (cl_context context,
1456 | cl_mem_flags flags,
1457 | size_t size,
1458 | void *host_ptr,
1459 | cl_int *errcode_ret) */
1461 /* Calculate size of data reference, represented by REF. PTR is a
1462 base object of data reference. */
1464 static tree
1465 opencl_get_indirect_size (tree ptr, poly_dr_p ref)
1467 ptr = TREE_TYPE (ptr);
1469 switch (TREE_CODE (ptr))
1471 case ARRAY_TYPE:
1472 return TYPE_SIZE_UNIT (ptr);
1474 case POINTER_TYPE:
1476 tree inner_type = TREE_TYPE (ptr);
1477 tree t = graphite_outer_subscript_bound (ref, false);
1478 tree inner_type_size = TYPE_SIZE_UNIT (inner_type);
1480 if (inner_type_size == NULL)
1481 return NULL;
1483 if (DECL_P (inner_type_size))
1484 add_referenced_var (inner_type_size);
1486 gcc_assert (t);
1487 t = fold_build2 (TRUNC_DIV_EXPR, sizetype, t, inner_type_size);
1488 t = fold_build2 (PLUS_EXPR, sizetype, t, size_one_node);
1489 t = fold_build2 (MULT_EXPR, sizetype, t, inner_type_size);
1490 return t;
1493 default:
1494 return NULL_TREE;
1497 gcc_unreachable ();
1500 /* Create variables for kernel KERNEL arguments. Each argument is
1501 represented by new variable with it's value and it's size. If arg
1502 is a pointer or array, it's represented by device buffer with data
1503 from host memory. CODE_GEN holds information related to code
1504 generation. */
1506 static void
1507 opencl_init_local_device_memory (opencl_main code_gen, opencl_body kernel)
1509 VEC (tree, heap) **args = &kernel->function_args;
1510 VEC (tree, heap) **args_to_pass = &kernel->function_args_to_pass;
1511 VEC (opencl_data, heap) **refs = &kernel->data_refs;
1512 tree curr;
1513 opencl_data curr_data;
1514 int i;
1515 basic_block bb = opencl_create_bb (code_gen);
1516 basic_block kernel_bb = split_edge (code_gen->kernel_edge);
1518 code_gen->kernel_edge = single_succ_edge (kernel_bb);
1520 for (i = 0; VEC_iterate (tree, *args, i, curr); i ++)
1522 gimple_stmt_iterator g_iter = gsi_last_bb (bb);
1523 gimple_stmt_iterator kernel_g_iter = gsi_last_bb (kernel_bb);
1524 tree curr_type = TREE_TYPE (curr);
1525 tree new_type;
1526 tree tmp_var;
1527 tree mov;
1528 tree curr_var = opencl_create_tmp_var (curr_type, "__ocl_iv");
1530 if (TREE_CODE (curr) != PARM_DECL
1531 && TREE_CODE (curr) != VAR_DECL)
1533 mov = build2 (MODIFY_EXPR, curr_type, curr_var, curr);
1535 force_gimple_operand_gsi (&g_iter, mov, false, NULL, false,
1536 GSI_CONTINUE_LINKING);
1538 else
1539 force_gimple_operand_gsi (&g_iter, curr, false, curr_var, false,
1540 GSI_CONTINUE_LINKING);
1541 curr = curr_var;
1543 new_type = build_pointer_type (curr_type);
1544 tmp_var = opencl_create_tmp_var (new_type, "__opencl_scalar_arg");
1545 mov = build1 (ADDR_EXPR, new_type, curr);
1547 mov = build2 (MODIFY_EXPR, new_type, tmp_var, mov);
1549 force_gimple_operand_gsi (&kernel_g_iter, mov, false, NULL, false,
1550 GSI_CONTINUE_LINKING);
1551 VEC_safe_push (tree, heap, *args_to_pass, tmp_var);
1554 for (i = 0; VEC_iterate (opencl_data, *refs, i, curr_data); i++)
1556 gimple_stmt_iterator kernel_g_iter = gsi_last_bb (kernel_bb);
1557 tree new_type;
1558 tree tmp_var;
1559 tree mov;
1560 tree curr = opencl_pass_to_device (code_gen, curr_data);
1561 tree curr_type = ptr_type_node;
1563 new_type = build_pointer_type (curr_type);
1564 tmp_var = opencl_create_tmp_var (new_type, "__opencl_non_scalar_arg");
1565 mov = build1 (ADDR_EXPR, new_type, curr);
1567 mov = build2 (MODIFY_EXPR, new_type, tmp_var, mov);
1569 force_gimple_operand_gsi (&kernel_g_iter, mov, false, NULL, false,
1570 GSI_CONTINUE_LINKING);
1571 VEC_safe_push (tree, heap, *args_to_pass, tmp_var);
1576 /* cl_int clSetKernelArg (cl_kernel kernel,
1577 cl_uint arg_index,
1578 size_t arg_size,
1579 const void *arg_value)
1581 Set all kernel args for OpenCL kernel, represented by KERNEL_VAR.
1582 KERNEL holds all data, related to given kernel.
1583 CODE_GEN holds information related to code generation.
1584 All arguments are passed by pointer. */
1586 static void
1587 opencl_pass_kernel_arguments (opencl_main code_gen, opencl_body kernel,
1588 tree kernel_var)
1590 VEC (tree, heap) *args_to_pass = kernel->function_args_to_pass;
1591 tree arg;
1592 int i;
1593 tree function = opencl_create_function_decl (SET_KERNEL_ARG);
1595 for (i = 0; VEC_iterate (tree, args_to_pass, i, arg); i++)
1597 tree call
1598 = build_call_expr (function, 4, kernel_var,
1599 build_int_cst (NULL_TREE, i),
1600 TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (arg))),
1601 arg);
1602 opencl_add_safe_call (code_gen, call, true);
1606 /* clEnqueueNDRangeKernel (h_cmd_queue, hKernel, 1, 0,
1607 &cnDimension, 0, 0, 0, 0);
1609 Execute kernel, represented by KERNEL_VAR in NUM_OF_EXEC threads.
1610 Use EVENT_VAR as event variable for asynchronous call.
1611 CODE_GEN holds information related to code generation. */
1613 static void
1614 opencl_execute_kernel (opencl_main code_gen, tree num_of_exec,
1615 tree kernel_var, tree event_var)
1617 tree function = opencl_create_function_decl (ENQUEUE_ND_RANGE_KERNEL);
1618 tree num_of_threads = opencl_create_tmp_var (integer_type_node,
1619 "__opencl_num_of_exec");
1620 gimple_stmt_iterator g_iter = gsi_last_bb (opencl_create_bb (code_gen));
1621 tree call;
1623 TREE_STATIC (num_of_threads) = 1;
1624 assemble_variable (num_of_threads, 1, 0, 1);
1626 call = build2 (MODIFY_EXPR, integer_type_node, num_of_threads, num_of_exec);
1628 force_gimple_operand_gsi (&g_iter, call, true, NULL, false,
1629 GSI_CONTINUE_LINKING);
1631 call = build1 (ADDR_EXPR, integer_ptr_type_node, num_of_threads);
1633 call = build_call_expr (function, 9,
1634 h_cmd_queue,
1635 kernel_var,
1636 integer_one_node,
1637 null_pointer_node,
1638 call,
1639 null_pointer_node,
1640 integer_zero_node,
1641 null_pointer_node,
1642 event_var);
1644 opencl_add_safe_call (code_gen, call, true);
1647 /* Place building program from single source string to edge BASE.
1648 Current implementation performs single build per function.
1649 String contains kernels from all scops of current function.
1650 Functions returns true if any kernel has been created. */
1652 static edge
1653 opencl_create_function_call (edge base)
1655 edge new_edge;
1656 const char *src;
1658 /* Required for addressing types with size less then 4 bytes. */
1659 dyn_string_prepend_cstr
1660 (main_program_src,
1661 "#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable\n");
1662 /* Required for double type. */
1663 dyn_string_prepend_cstr
1664 (main_program_src, "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n ");
1665 src = dyn_string_buf (main_program_src);
1667 if (dump_file && (dump_flags & TDF_DETAILS))
1669 fprintf (dump_file, "\nGenerated OpenCL code: \n");
1670 fprintf (dump_file, "%s", src);
1673 new_edge = opencl_insert_create_program_with_source_call (src, base);
1675 return opencl_insert_build_program_call (new_edge);
1678 /* Mark privatizable data for current loop nest. Information where
1679 given data can be privatized is taken from meta information of
1680 current loop nest, which is stored in CODE_GEN. */
1682 static void
1683 opencl_mark_privatized_data (opencl_main code_gen)
1685 VEC (opencl_data, heap) *datas = code_gen->opencl_function_data;
1686 int i;
1687 opencl_data curr;
1688 bitmap can_be_private = code_gen->curr_meta->can_be_private;
1690 for (i = 0; VEC_iterate (opencl_data, datas, i, curr); i ++)
1691 curr->privatized = bitmap_bit_p (can_be_private, curr->id);
1695 /* Store calculated sizes of all pointers or arrays to variables.
1696 CODE_GEN holds information related to code generation. */
1698 static void
1699 opencl_set_data_size (opencl_main code_gen)
1701 VEC (opencl_data, heap) *datas = code_gen->opencl_function_data;
1702 int i;
1703 opencl_data curr;
1704 gimple_stmt_iterator g_iter = gsi_last_bb (code_gen->data_init_bb);
1706 for (i = 0; VEC_iterate (opencl_data, datas, i, curr); i ++)
1708 tree call;
1710 if (curr->is_static
1711 || !curr->used_on_device
1712 || curr->size_value == NULL)
1713 continue;
1715 call = build2 (MODIFY_EXPR, size_type_node,
1716 curr->size_variable, curr->size_value);
1718 force_gimple_operand_gsi (&g_iter, call, true, NULL, false,
1719 GSI_CONTINUE_LINKING);
1723 /* Find opencl_data which represents array VAR. */
1725 static opencl_data
1726 opencl_get_static_data_by_tree (tree var)
1728 map_tree_to_data tmp = map_tree_to_data_create (var, NULL);
1729 map_tree_to_data *slot
1730 = (map_tree_to_data *) htab_find_slot (array_data_to_tree,
1731 tmp, INSERT);
1732 if (*slot == NULL)
1733 return NULL;
1735 return (*slot)->value;
1739 /* Create required OpenCL variable for given DATA. */
1741 static void
1742 opencl_data_init_object (opencl_data data)
1744 if (TREE_CODE (TREE_TYPE (data->exact_object)) == POINTER_TYPE)
1746 data->device_object
1747 = opencl_create_tmp_var (ptr_type_node, "__opencl_data");
1748 data->is_static = false;
1750 else
1752 /* (TREE_CODE (TREE_TYPE (data->exact_object)) == ARRAY_TYPE) */
1753 map_tree_to_data tree_ptr
1754 = map_tree_to_data_create (data->exact_object, data);
1755 map_tree_to_data *tree_slot
1756 = (map_tree_to_data *) htab_find_slot (array_data_to_tree,
1757 tree_ptr, INSERT);
1759 gcc_assert (*tree_slot == NULL);
1760 *tree_slot = tree_ptr;
1762 data->device_object
1763 = opencl_create_static_ptr_variable ("__opencl_data");
1764 data->is_static = true;
1765 data->size_variable = data->size_value;
1766 VEC_safe_push (opencl_data, heap, opencl_array_data, data);
1770 /* Register reference to DATA via data reference REF_KEY and
1771 variable TREE_KEY in CODE_GEN structures. */
1773 static void
1774 opencl_register_data (opencl_main code_gen, opencl_data data,
1775 tree tree_key, data_reference_p ref_key)
1777 htab_t ref_to_data = code_gen->ref_to_data;
1778 htab_t tree_to_data = code_gen->tree_to_data;
1779 map_ref_to_data ref_ptr = map_ref_to_data_create (ref_key, data);
1780 map_tree_to_data tree_ptr = map_tree_to_data_create (tree_key, data);
1781 map_ref_to_data *ref_slot;
1782 map_tree_to_data *tree_slot;
1784 ref_slot
1785 = (map_ref_to_data *) htab_find_slot (ref_to_data, ref_ptr, INSERT);
1786 gcc_assert (*ref_slot == NULL);
1787 *ref_slot = ref_ptr;
1789 tree_slot
1790 = (map_tree_to_data *) htab_find_slot (tree_to_data, tree_ptr, INSERT);
1791 gcc_assert (*tree_slot == NULL || (*tree_slot)->value == data);
1792 *tree_slot = tree_ptr;
1795 /* Analyze single data reference REF and update CODE_GEN structures.
1796 If it access data, which has been accessed in data references
1797 before, update it's size. Otherwise add data to array. */
1799 static void
1800 opencl_parse_single_data_ref (poly_dr_p ref, opencl_main code_gen)
1802 data_reference_p d_ref = (data_reference_p) PDR_CDR (ref);
1803 tree data_ref_tree = dr_outermost_base_object (d_ref);
1804 opencl_data curr = opencl_get_data_by_tree (code_gen, data_ref_tree);
1805 tree size = opencl_get_indirect_size (data_ref_tree, ref);
1807 if (curr)
1809 if (!curr->is_static)
1811 if (!size || !curr->size_value)
1812 curr->size_value = NULL;
1813 else
1814 curr->size_value = fold_build2 (MAX_EXPR, sizetype,
1815 size, curr->size_value);
1818 else
1820 curr = opencl_get_static_data_by_tree (data_ref_tree);
1822 if (!curr)
1824 curr = opencl_data_create (data_ref_tree, size);
1825 opencl_data_init_object (curr);
1828 curr->id = VEC_length (opencl_data, code_gen->opencl_function_data);
1829 VEC_safe_push (opencl_data, heap, code_gen->opencl_function_data, curr);
1832 opencl_register_data (code_gen, curr, data_ref_tree, d_ref);
1835 /* Analyse all data reference for poly basic block PBB and update CODE_GEN
1836 structures. */
1838 static void
1839 opencl_parse_data_refs (poly_bb_p pbb, opencl_main code_gen)
1841 VEC (poly_dr_p, heap) *drs = PBB_DRS (pbb);
1842 int i;
1843 poly_dr_p curr;
1845 for (i = 0; VEC_iterate (poly_dr_p, drs, i, curr); i++)
1846 opencl_parse_single_data_ref (curr, code_gen);
1849 /* Analyse all data reference for scop M_SCOP and update
1850 CODE_GEN structures. */
1852 static void
1853 opencl_init_data (scop_p m_scop, opencl_main code_gen)
1855 VEC (poly_bb_p, heap) *bbs = SCOP_BBS (m_scop);
1856 int i;
1857 poly_bb_p curr;
1859 for (i = 0; VEC_iterate (poly_bb_p, bbs, i, curr); i++)
1860 opencl_parse_data_refs (curr, code_gen);
1863 /* Init basic block in CODE_GEN structures. */
1865 static void
1866 opencl_init_basic_blocks (opencl_main code_gen)
1868 code_gen->data_init_bb = opencl_create_bb (code_gen);
1869 code_gen->kernel_edge = code_gen->main_edge;
1872 /* Add function calls to create and launch kernel KERNEL to
1873 CODE_GEN->main_edge. */
1875 static void
1876 opencl_create_gimple_for_body (opencl_body kernel, opencl_main code_gen)
1878 tree num_of_exec = kernel->num_of_exec;
1879 tree call;
1881 tree kernel_var
1882 = opencl_insert_create_kernel_call (code_gen, (const char *) kernel->name);
1884 tree index_type = build_index_type (build_int_cst (NULL_TREE, 2));
1885 tree array_type = build_array_type (ptr_type_node, index_type);
1886 tree var = opencl_create_tmp_var (array_type, "wait_event");
1888 TREE_STATIC (var) = 1;
1889 assemble_variable (var, 1, 0, 1);
1891 call = build4 (ARRAY_REF, ptr_type_node, var,
1892 integer_zero_node, NULL_TREE, NULL_TREE);
1893 call = build_addr (call, current_function_decl);
1895 opencl_init_local_device_memory (code_gen, kernel);
1896 opencl_pass_kernel_arguments (code_gen, kernel, kernel_var);
1898 opencl_execute_kernel (code_gen, num_of_exec, kernel_var, call);
1899 opencl_wait_for_event (code_gen, call);
1902 /* Prepare memory for gimple (host) statement, represented by PBB.
1903 Copy memory from device to host if it's nessesary.
1904 CODE_GEN holds information related to code generation. */
1906 static void
1907 opencl_prepare_memory_for_gimple_stmt (poly_bb_p pbb, opencl_main code_gen)
1909 VEC (poly_dr_p, heap) *drs = PBB_DRS (pbb);
1910 int i;
1911 poly_dr_p curr;
1913 for (i = 0; VEC_iterate (poly_dr_p, drs, i, curr); i++)
1915 data_reference_p d_ref = (data_reference_p) PDR_CDR (curr);
1916 opencl_data data;
1917 bool is_read;
1919 /* Scalar variables can be passed directly. */
1920 data = opencl_get_data_by_data_ref (code_gen, d_ref);
1922 /* Private variables should not be passed from device to host. */
1923 if (data->privatized)
1924 continue;
1926 is_read = DR_IS_READ (d_ref);
1927 gcc_assert (data);
1929 data->read_in_current_body = is_read;
1930 data->written_in_current_body = !is_read;
1931 opencl_pass_to_host (code_gen, data);
1933 if (!is_read)
1934 bitmap_set_bit (code_gen->curr_meta->modified_on_host, data->id);
1938 /* Add basic block from clast_user_stmt STMT to gimple.
1939 CODE_GEN holds information related to code generation. */
1941 static void
1942 opencl_add_gimple_for_user_stmt (struct clast_user_stmt *stmt,
1943 opencl_main code_gen)
1945 gimple_bb_p gbb;
1946 CloogStatement *cs = stmt->statement;
1947 poly_bb_p pbb = (poly_bb_p) cloog_statement_usr (cs);
1948 sese region = code_gen->region;
1949 int nb_loops = number_of_loops ();
1950 int i;
1951 VEC (tree, heap) *iv_map = VEC_alloc (tree, heap, nb_loops);
1952 htab_t newivs_index = code_gen->newivs_index;
1953 VEC (tree, heap) *newivs = code_gen->newivs;
1955 /* Get basic block to add. */
1956 gbb = PBB_BLACK_BOX (pbb);
1958 if (GBB_BB (gbb) == ENTRY_BLOCK_PTR)
1959 return;
1961 /*Reset flags. */
1962 opencl_fflush_rw_flags (code_gen);
1964 /* Pass all required memory to host. */
1965 opencl_prepare_memory_for_gimple_stmt (pbb, code_gen);
1967 for (i = 0; i < nb_loops; i++)
1968 VEC_quick_push (tree, iv_map, NULL_TREE);
1970 build_iv_mapping (iv_map, region, newivs, newivs_index,
1971 stmt, code_gen->params_index);
1972 code_gen->main_edge
1973 = copy_bb_and_scalar_dependences (GBB_BB (gbb), region,
1974 code_gen->main_edge, iv_map);
1975 VEC_free (tree, heap, iv_map);
1976 recompute_all_dominators ();
1977 update_ssa (TODO_update_ssa);
1979 opencl_verify ();
1982 /* Delete opencl_body DATA. */
1984 static void
1985 opencl_body_delete (opencl_body data)
1987 dyn_string_delete (data->body);
1988 dyn_string_delete (data->header);
1989 dyn_string_delete (data->pre_header);
1990 dyn_string_delete (data->non_scalar_args);
1991 VEC_free (tree, heap, data->function_args);
1992 VEC_free (tree, heap, data->function_args_to_pass);
1993 VEC_free (opencl_data, heap, data->data_refs);
1994 free (data);
1997 /* Reset data structures before processing loop, represented by META.
1998 CODE_GEN holds information related to code generation. */
2000 static void
2001 opencl_init_new_loop (opencl_clast_meta meta, opencl_main code_gen)
2003 opencl_data curr;
2004 unsigned i;
2006 meta->post_pass_to_host
2007 = VEC_alloc (opencl_data, heap, OPENCL_INIT_BUFF_SIZE);
2008 meta->post_pass_to_device
2009 = VEC_alloc (opencl_data, heap, OPENCL_INIT_BUFF_SIZE);
2011 for (i = 0;
2012 VEC_iterate (opencl_data, code_gen->opencl_function_data, i, curr);
2013 i++)
2015 curr->inited_in_current_loop_on_host = false;
2016 curr->inited_in_current_loop_on_device = false;
2020 /* Post loop init. Loop through all data stored in POST_PASS_TO_HOST
2021 and POST_PASS_TO_DEVICE vectors in META. This data must be up to
2022 date on host or device respectively at the end of current loop.
2023 CODE_GEN holds information related to code generation. */
2025 static void
2026 opencl_postpass_data (opencl_main code_gen, opencl_clast_meta meta)
2028 opencl_data curr;
2029 unsigned i;
2031 for (i = 0;
2032 VEC_iterate (opencl_data, meta->post_pass_to_host, i, curr); i++)
2034 curr->written_in_current_body = false;
2035 opencl_pass_to_host (code_gen, curr);
2038 if (!flag_graphite_opencl_cpu)
2039 for (i = 0;
2040 VEC_iterate (opencl_data, meta->post_pass_to_device, i, curr); i++)
2042 curr->written_in_current_body = false;
2043 opencl_pass_to_device (code_gen, curr);
2046 if (meta->parent)
2048 VEC (opencl_data, heap) *parent_vec_host
2049 = meta->parent->post_pass_to_host;
2050 VEC (opencl_data, heap) *parent_vec_device
2051 = meta->parent->post_pass_to_device;
2053 for (i = 0;
2054 VEC_iterate (opencl_data, meta->post_pass_to_host, i, curr); i++)
2055 VEC_safe_push (opencl_data, heap, parent_vec_host, curr);
2057 for (i = 0;
2058 VEC_iterate (opencl_data, meta->post_pass_to_device, i, curr); i++)
2059 VEC_safe_push (opencl_data, heap, parent_vec_device, curr);
2062 VEC_free (opencl_data, heap, meta->post_pass_to_host);
2063 VEC_free (opencl_data, heap, meta->post_pass_to_device);
2066 static void opencl_transform_stmt_list (struct clast_stmt *, opencl_main, int);
2068 /* Add loop body, of the loop, represented by S, on host.
2069 Loop body can contain device code.
2070 DEPTH contains depth of given loop in current loop nest.
2071 DEPENDENCY indicates where given loop has any dependencies.
2072 CODE_GEN holds information related to code generation. */
2074 static void
2075 opencl_add_gimple_for_loop (struct clast_for *s, opencl_main code_gen,
2076 int depth, bool dependency)
2078 loop_p old_parent = code_gen->context_loop;
2079 loop_p new_loop = graphite_create_new_loop
2080 (code_gen->region, code_gen->main_edge, s, code_gen->context_loop,
2081 &code_gen->newivs, code_gen->newivs_index, code_gen->params_index, depth);
2082 edge last_e = single_exit (new_loop);
2083 edge to_body = single_succ_edge (new_loop->header);
2084 basic_block after = to_body->dest;
2085 opencl_clast_meta parent = code_gen->curr_meta->parent;
2087 last_e = single_succ_edge (split_edge (last_e));
2089 code_gen->context_loop = new_loop;
2090 code_gen->main_edge = single_succ_edge (new_loop->header);
2092 opencl_init_new_loop (parent, code_gen);
2093 opencl_transform_stmt_list (s->body, code_gen, depth + 1);
2095 code_gen->context_loop = old_parent;
2097 redirect_edge_succ_nodup (code_gen->main_edge, after);
2098 set_immediate_dominator (CDI_DOMINATORS, code_gen->main_edge->dest,
2099 code_gen->main_edge->src);
2101 opencl_postpass_data (code_gen, parent);
2102 code_gen->main_edge = last_e;
2104 if (flag_loop_parallelize_all && !dependency)
2105 new_loop->can_be_parallel = true;
2107 opencl_verify ();
2110 /* Add loop, represented by S, on host. Loop body can contain device code.
2111 DEPTH contains depth of given loop in current loop nest.
2112 DEPENDENCY indicates where given loop has any dependencies.
2113 CODE_GEN holds information related to code generation. */
2115 static void
2116 opencl_add_gimple_for_stmt_for (struct clast_for *s, opencl_main code_gen,
2117 int depth, bool dependency)
2119 edge last_e = graphite_create_new_loop_guard (code_gen->region,
2120 code_gen->main_edge,
2121 s, code_gen->newivs,
2122 code_gen->newivs_index,
2123 code_gen->params_index);
2124 edge true_e = get_true_edge_from_guard_bb (code_gen->main_edge->dest);
2126 code_gen->main_edge = true_e;
2127 opencl_add_gimple_for_loop (s, code_gen, depth, dependency);
2128 code_gen->main_edge = last_e;
2131 /* Calculate parent data access flags in META based on children.
2132 parent->modified_on_host = OR_{forall children} child->modified_on_host.
2133 parent->modified_on_device = OR_{forall children} child->modified_on_device.
2136 static void
2137 opencl_fix_meta_flags (opencl_clast_meta meta)
2139 opencl_clast_meta curr = meta->body;
2141 while (curr)
2143 bitmap_ior_into (meta->modified_on_host, curr->modified_on_host);
2144 bitmap_ior_into (meta->modified_on_device, curr->modified_on_device);
2145 curr = curr->next;
2149 /* Add if statement, represented by S to current gimple.
2150 CODE_GEN holds information related to code generation. */
2152 static void
2153 opencl_add_gimple_for_stmt_guard (struct clast_guard *s,
2154 opencl_main code_gen, int depth)
2156 edge last_e = graphite_create_new_guard (code_gen->region,
2157 code_gen->main_edge, s,
2158 code_gen->newivs,
2159 code_gen->newivs_index,
2160 code_gen->params_index);
2161 edge true_e = get_true_edge_from_guard_bb (code_gen->main_edge->dest);
2163 code_gen->main_edge = true_e;
2164 opencl_transform_stmt_list (s->then, code_gen, depth);
2165 code_gen->main_edge = last_e;
2167 recompute_all_dominators ();
2168 opencl_verify ();
2171 /* Parse clast statement list S, located on depth DEPTH in current loop nest.
2172 This function generates gimple from clast statements, but in case of
2173 stmt_for either host or device code can be generated.
2174 CODE_GEN holds information related to code generation. */
2176 static void
2177 opencl_transform_stmt_list (struct clast_stmt *s, opencl_main code_gen,
2178 int depth)
2180 bool dump_p = dump_file && (dump_flags & TDF_DETAILS);
2182 for ( ; s; s = s->next)
2184 opencl_clast_meta tmp = code_gen->curr_meta;
2186 if (CLAST_STMT_IS_A (s, stmt_root))
2187 continue;
2189 else if (CLAST_STMT_IS_A (s, stmt_user))
2191 code_gen->curr_meta->init_edge = code_gen->main_edge;
2192 opencl_add_gimple_for_user_stmt ((struct clast_user_stmt *) s,
2193 code_gen);
2194 code_gen->curr_meta = code_gen->curr_meta->next;
2196 else if (CLAST_STMT_IS_A (s, stmt_for))
2198 opencl_clast_meta current_clast = code_gen->curr_meta;
2199 struct clast_for *for_stmt = (struct clast_for *) s;
2200 bool dependency = false;
2201 bool parallel = false;
2203 /* If there are dependencies in loop, it can't be parallelized. */
2204 if (!flag_graphite_opencl_no_dep_check &&
2205 dependency_in_clast_loop_p (code_gen, current_clast,
2206 for_stmt, depth))
2208 if (dump_p)
2209 fprintf (dump_file, "dependency in loop\n");
2211 dependency = true;
2214 if (!dependency)
2215 parallel = opencl_should_be_parallel_p (code_gen, current_clast,
2216 depth);
2218 /* Create init block for memory transfer befor loop. */
2219 current_clast->init_edge = code_gen->main_edge;
2221 if (parallel && !dependency)
2223 opencl_body current_body;
2225 opencl_fflush_rw_flags (code_gen);
2226 opencl_mark_privatized_data (code_gen);
2227 current_clast->on_device = true;
2228 current_body
2229 = opencl_clast_to_kernel (for_stmt, code_gen, depth);
2231 if (current_body->num_of_data_writes)
2233 dyn_string_t header = current_body->header;
2234 dyn_string_t pre_header = current_body->pre_header;
2235 dyn_string_t body = current_body->body;
2237 dyn_string_append (code_gen->main_program, header);
2238 dyn_string_append (code_gen->main_program, pre_header);
2239 dyn_string_append (code_gen->main_program, body);
2241 opencl_create_gimple_for_body (current_body, code_gen);
2243 htab_delete (code_gen->global_defined_vars);
2244 update_ssa (TODO_update_ssa);
2245 opencl_verify ();
2246 opencl_body_delete (current_body);
2247 code_gen->current_body = NULL;
2249 else
2251 code_gen->curr_meta = code_gen->curr_meta->body;
2252 opencl_add_gimple_for_stmt_for (for_stmt, code_gen,
2253 depth, dependency);
2256 opencl_fix_meta_flags (current_clast);
2257 code_gen->curr_meta = current_clast->next;
2259 else if (CLAST_STMT_IS_A (s, stmt_guard))
2260 opencl_add_gimple_for_stmt_guard ((struct clast_guard *) s,
2261 code_gen, depth);
2262 else if (CLAST_STMT_IS_A (s, stmt_block))
2263 opencl_transform_stmt_list (((struct clast_block *) s)->body,
2264 code_gen, depth);
2265 else
2266 gcc_unreachable ();
2268 if (tmp->parent)
2269 opencl_fix_meta_flags (tmp->parent);
2273 /* Transform clast statement DATA from scop SCOP to OpenCL calls
2274 in region REGION. Place all calls to edge MAIN. PARAM_INDEX
2275 holds external scop params. */
2277 void
2278 opencl_transform_clast (struct clast_stmt *data, sese region,
2279 edge main, scop_p scop, htab_t params_index)
2281 opencl_main code_gen;
2282 /* Create main data struture for code generation. */
2284 if (dump_file && (dump_flags & TDF_DETAILS))
2286 fprintf (dump_file, "\nGenerating OpenCL code for SCoP: \n");
2287 print_scop (dump_file, scop, 0);
2290 code_gen = opencl_main_create (((struct clast_root *) data)->names,
2291 region, main, params_index);
2293 opencl_init_basic_blocks (code_gen);
2294 opencl_init_data (scop, code_gen);
2296 code_gen->clast_meta = opencl_create_meta_from_clast (code_gen, data, 1,
2297 NULL);
2298 code_gen->curr_meta = code_gen->clast_meta;
2300 opencl_transform_stmt_list (data, code_gen, 1);
2302 if (dyn_string_length (code_gen->main_program) != 0)
2304 dyn_string_append (main_program_src, code_gen->main_program);
2305 opencl_set_data_size (code_gen);
2306 opencl_init_all_device_buffers (code_gen);
2307 opencl_fflush_all_device_buffers_to_host (code_gen);
2310 recompute_all_dominators ();
2311 update_ssa (TODO_update_ssa);
2312 opencl_main_delete (code_gen);
2315 /* Find opencl_data object by host object OBJ in CODE_GEN hash maps. */
2317 opencl_data
2318 opencl_get_data_by_tree (opencl_main code_gen, tree obj)
2320 map_tree_to_data tmp = map_tree_to_data_create (obj, NULL);
2321 map_tree_to_data *slot
2322 = (map_tree_to_data *) htab_find_slot (code_gen->tree_to_data,
2323 tmp, INSERT);
2324 if (*slot == NULL)
2325 return NULL;
2327 return (*slot)->value;
2330 /* Find opencl_data object by data reference REF in CODE_GEN hash maps. */
2332 opencl_data
2333 opencl_get_data_by_data_ref (opencl_main code_gen, data_reference_p ref)
2335 map_ref_to_data tmp = map_ref_to_data_create (ref, NULL);
2336 map_ref_to_data *slot
2337 = (map_ref_to_data *) htab_find_slot (code_gen->ref_to_data,
2338 tmp, INSERT);
2339 if (*slot == NULL)
2340 return NULL;
2342 return (*slot)->value;
2345 /* Create global variables for opencl code. */
2347 static void
2348 opencl_create_gimple_variables (void)
2350 static bool opencl_var_created = false;
2352 if (opencl_var_created)
2353 return;
2355 opencl_var_created = true;
2357 /* cl_context h_context */
2358 h_context = opencl_create_static_ptr_variable ("__ocl_h_context");
2360 /* cl_command_queue h_cmd_queue */
2361 h_cmd_queue = opencl_create_static_ptr_variable ("__ocl_h_cmd_queue");
2364 /* Create call
2365 | clGetContextInfo (h_context, CL_CONTEXT_DEVICES, 0, 0,
2366 | &n_context_descriptor_size);
2368 POINTER_TO_SIZE if &n_context_descriptor_size. */
2370 static tree
2371 opencl_create_clGetContextInfo_1 (tree pointer_to_size)
2373 tree function = opencl_create_function_decl (GET_CONTEXT_INFO);
2374 tree zero_pointer = null_pointer_node;
2375 tree cl_contex_devices = build_int_cst (NULL_TREE, CL_CONTEXT_DEVICES);
2376 tree context_var = h_context;
2378 return build_call_expr (function, 5,
2379 context_var,
2380 cl_contex_devices,
2381 integer_zero_node,
2382 zero_pointer,
2383 pointer_to_size);
2386 /* Create call
2387 | clGetContextInfo (h_context, CL_CONTEXT_DEVICES,
2388 | n_context_descriptor_size, A_DEVICES, 0);
2390 POINTER_TO_SIZE if &n_context_descriptor_size. */
2392 static tree
2393 opencl_create_clGetContextInfo_2 (tree size, tree a_devices)
2395 tree function = opencl_create_function_decl (GET_CONTEXT_INFO);
2396 tree zero_pointer = null_pointer_node;
2397 tree cl_contex_devices = build_int_cst (NULL_TREE, CL_CONTEXT_DEVICES);
2398 tree context_var = h_context;
2400 return build_call_expr (function, 5,
2401 context_var,
2402 cl_contex_devices,
2403 size,
2404 a_devices,
2405 zero_pointer);
2408 /* Create context_properties array variable. */
2410 static tree
2411 opencl_create_context_properties (void)
2413 tree cl_context_properties_type = long_integer_type_node;
2415 tree index_type = build_index_type (build_int_cst (NULL_TREE, 3));
2416 tree array_type = build_array_type (cl_context_properties_type,
2417 index_type);
2419 return opencl_create_tmp_var (array_type, "context_properties");
2422 /* Place calls to obtain current platform id to INIT_EDGE.
2423 Place obtained id to VAR. */
2425 static edge
2426 opencl_set_context_properties (edge init_edge, tree var)
2428 tree function = opencl_create_function_decl (GET_PLATFORM_IDS);
2429 tree cl_context_properties_type = long_integer_type_node;
2430 tree call;
2431 tree call2;
2432 gimple_stmt_iterator g_iter;
2434 basic_block bb = split_edge (init_edge);
2436 init_edge = single_succ_edge (bb);
2438 g_iter = gsi_last_bb (bb);
2439 call = build4 (ARRAY_REF, cl_context_properties_type,
2440 var, integer_zero_node, NULL_TREE, NULL_TREE);
2441 call2 = build_int_cst (NULL_TREE, CL_CONTEXT_PLATFORM);
2442 call2 = build1 (CONVERT_EXPR, cl_context_properties_type, call2);
2444 call = build2 (MODIFY_EXPR, cl_context_properties_type,
2445 call, call2);
2447 force_gimple_operand_gsi (&g_iter, call, true, NULL, false,
2448 GSI_CONTINUE_LINKING);
2449 g_iter = gsi_last_bb (bb);
2451 call = build4 (ARRAY_REF, cl_context_properties_type,
2452 var, integer_one_node, NULL_TREE, NULL_TREE);
2453 call2 = build_call_expr (function, 3,
2454 integer_one_node,
2455 build_addr (call, current_function_decl),
2456 null_pointer_node);
2457 force_gimple_operand_gsi (&g_iter, call2, true, NULL, false,
2458 GSI_CONTINUE_LINKING);
2460 call = build4 (ARRAY_REF, cl_context_properties_type,
2461 var, build_int_cst (NULL_TREE, 2), NULL_TREE, NULL_TREE);
2462 call = build2 (MODIFY_EXPR, cl_context_properties_type,
2463 call, fold_convert (cl_context_properties_type,
2464 integer_zero_node));
2465 force_gimple_operand_gsi (&g_iter, call, true, NULL, false,
2466 GSI_CONTINUE_LINKING);
2467 return init_edge;
2470 /* Create call
2471 clCreateContextFromType (PROPERTIES, CL_DEVICE_TYPE_GPU, 0, 0, 0); */
2473 static tree
2474 opencl_create_clCreateContextFromType (tree properties)
2476 tree function = opencl_create_function_decl (CREATE_CONTEXT_FROM_TYPE);
2477 tree zero_pointer = null_pointer_node;
2478 tree device
2479 = build_int_cst (NULL_TREE, flag_graphite_opencl_cpu
2480 ? CL_DEVICE_TYPE_CPU : CL_DEVICE_TYPE_GPU);
2482 return build_call_expr (function, 5,
2483 build_addr (properties, current_function_decl),
2484 device,
2485 zero_pointer,
2486 zero_pointer,
2487 zero_pointer);
2490 /* Create call
2491 clCreateCommandQueue (h_context, DEV_ID, 0, 0); */
2493 static tree
2494 opencl_create_clCreateCommandQueue (tree dev_id)
2497 tree function = opencl_create_function_decl (CREATE_COMMAND_QUEUE);
2498 tree zero_pointer = null_pointer_node;
2499 tree context = h_context;
2501 return build_call_expr (function, 4,
2502 context,
2503 dev_id,
2504 zero_pointer,
2505 zero_pointer);
2508 /* Create call malloc (ARG). */
2510 static tree
2511 opencl_create_malloc_call (tree arg)
2513 tree function_type = build_function_type_list (ptr_type_node,
2514 integer_type_node,
2515 NULL_TREE);
2516 tree function = build_fn_decl ("malloc", function_type);
2518 return build_call_expr (function, 1, arg);
2521 /* Generate calls for opencl init functions and place them to INIT_EDGE.
2522 Must be called only once in each function. */
2524 static edge
2525 opencl_create_init_context (edge init_edge)
2527 tree tmp_type;
2528 tree call;
2529 tree n_context_descriptor_size
2530 = opencl_create_tmp_var (size_type_node, "__ocl_nContextDescriptorSize");
2531 tree a_devices = opencl_create_tmp_var (build_pointer_type (ptr_type_node),
2532 "__ocl_a_devices");
2533 tree properties = opencl_create_context_properties ();
2535 init_edge = opencl_set_context_properties (init_edge, properties);
2536 call = opencl_create_clCreateContextFromType (properties);
2537 call = build2 (MODIFY_EXPR, TREE_TYPE (h_context),
2538 h_context, call);
2539 init_edge = opencl_add_safe_call_on_edge (call, false, init_edge);
2540 tmp_type = build_pointer_type
2541 (TREE_TYPE (n_context_descriptor_size));
2542 call = build1 (ADDR_EXPR, tmp_type,
2543 n_context_descriptor_size);
2544 call = opencl_create_clGetContextInfo_1 (call);
2545 init_edge = opencl_add_safe_call_on_edge (call, true, init_edge);
2546 call = opencl_create_malloc_call (n_context_descriptor_size);
2547 call = fold_convert (TREE_TYPE (a_devices), call);
2548 call = build2 (MODIFY_EXPR, TREE_TYPE (a_devices), a_devices, call);
2549 init_edge = opencl_add_safe_call_on_edge (call, false, init_edge);
2550 call = opencl_create_clGetContextInfo_2 (n_context_descriptor_size,
2551 a_devices);
2552 init_edge = opencl_add_safe_call_on_edge (call, true, init_edge);
2553 tmp_type = TREE_TYPE (TREE_TYPE (a_devices));
2554 call = build1 (INDIRECT_REF, tmp_type, a_devices);
2555 call = opencl_create_clCreateCommandQueue (call);
2556 call = build2 (MODIFY_EXPR, TREE_TYPE (h_cmd_queue),
2557 h_cmd_queue, call);
2558 init_edge = opencl_add_safe_call_on_edge (call, false, init_edge);
2559 return init_edge;
2562 /* Fill array VEC with all poly basic blocks in clast statement ROOT. */
2564 static void
2565 build_poly_bb_vec (struct clast_stmt *root,
2566 VEC (poly_bb_p, heap) **vec)
2568 while (root)
2570 if (CLAST_STMT_IS_A (root, stmt_user))
2572 poly_bb_p tmp
2573 = (poly_bb_p) cloog_statement_usr
2574 (((struct clast_user_stmt *) root)->statement);
2576 VEC_safe_push (poly_bb_p, heap, *vec, tmp);
2579 else if (CLAST_STMT_IS_A (root, stmt_for))
2580 build_poly_bb_vec (((struct clast_for *) root)->body, vec);
2582 else if (CLAST_STMT_IS_A (root, stmt_guard))
2583 build_poly_bb_vec (((struct clast_guard *) root)->then, vec);
2585 else if (CLAST_STMT_IS_A (root, stmt_block))
2586 build_poly_bb_vec (((struct clast_block *) root)->body, vec);
2588 root = root->next;
2592 /* Check whether there is a dependency between PBB1 and PBB2 on level LEVEL.
2593 CAN_BE_PRIVATE indicates which variables can be privatizated.
2594 CODE_GEN holds information related to code generation. */
2596 static bool
2597 opencl_dependency_between_pbbs_p (opencl_main code_gen, poly_bb_p pbb1,
2598 poly_bb_p pbb2, int level,
2599 bitmap can_be_private)
2601 int i, j;
2602 poly_dr_p pdr1, pdr2;
2604 timevar_push (TV_GRAPHITE_DATA_DEPS);
2606 for (i = 0; VEC_iterate (poly_dr_p, PBB_DRS (pbb1), i, pdr1); i++)
2608 data_reference_p ref1 = (data_reference_p)PDR_CDR (pdr1);
2609 opencl_data data_1 = opencl_get_data_by_data_ref (code_gen, ref1);
2611 if (bitmap_bit_p (can_be_private, data_1->id))
2612 continue;
2614 for (j = 0; VEC_iterate (poly_dr_p, PBB_DRS (pbb2), j, pdr2); j++)
2616 data_reference_p ref2 = (data_reference_p)PDR_CDR (pdr2);
2618 opencl_data data_2 = opencl_get_data_by_data_ref (code_gen, ref2);
2620 if (bitmap_bit_p (can_be_private, data_2->id))
2621 continue;
2623 if (graphite_carried_dependence_level_k (pdr1, pdr2, level))
2625 timevar_pop (TV_GRAPHITE_DATA_DEPS);
2626 return true;
2631 timevar_pop (TV_GRAPHITE_DATA_DEPS);
2632 return false;
2635 /* Returns true, if there is dependency in clast loop STMT on depth DEPTH.
2636 CODE_GEN holds information related to code generation. */
2638 bool
2639 dependency_in_clast_loop_p (opencl_main code_gen, opencl_clast_meta meta,
2640 struct clast_for *stmt, int depth)
2642 VEC (poly_bb_p, heap) *pbbs = VEC_alloc (poly_bb_p, heap, 10);
2643 int level = get_scattering_level (depth);
2644 int i;
2645 poly_bb_p pbb1;
2646 bitmap can_be_private;
2648 build_poly_bb_vec (stmt->body, &pbbs);
2649 can_be_private = meta->can_be_private;
2651 for (i = 0; VEC_iterate (poly_bb_p, pbbs, i, pbb1); i++)
2653 int j;
2654 poly_bb_p pbb2;
2656 for (j = 0; VEC_iterate (poly_bb_p, pbbs, j, pbb2); j++)
2657 if (opencl_dependency_between_pbbs_p (code_gen, pbb1, pbb1,
2658 level, can_be_private))
2660 VEC_free (poly_bb_p, heap, pbbs);
2661 return true;
2665 VEC_free (poly_bb_p, heap, pbbs);
2666 return false;
2669 /* Init graphite-opencl pass. Must be called in each function before
2670 any scop processing. */
2672 void
2673 graphite_opencl_init (void)
2675 opencl_create_gimple_variables ();
2677 /* cl_program h_program */
2678 h_program
2679 = opencl_create_static_ptr_variable ("__ocl_h_program");
2681 opencl_function_kernels = VEC_alloc (tree, heap, OPENCL_INIT_BUFF_SIZE);
2682 main_program_src = dyn_string_new (100);
2684 opencl_array_data = VEC_alloc (opencl_data, heap, OPENCL_INIT_BUFF_SIZE);
2685 array_data_to_tree = htab_create (10, map_tree_to_data_to_hash,
2686 map_tree_to_data_cmp, free);
2688 opencl_create_function_decl (STATIC_INIT);
2691 /* Create calls to initialize static data for current function and
2692 place them to INIT_EDGE. */
2694 static edge
2695 opencl_init_static_data (edge init_edge)
2697 int i;
2698 opencl_data curr;
2700 for (i = 0; VEC_iterate (opencl_data, opencl_array_data, i, curr); i ++)
2702 tree tmp;
2704 if (!curr->used_on_device)
2705 continue;
2707 tmp = opencl_create_memory_for_pointer (curr);
2708 tmp = build2 (MODIFY_EXPR, ptr_type_node, curr->device_object, tmp);
2709 init_edge = opencl_add_safe_call_on_edge (tmp, false, init_edge);
2712 return init_edge;
2715 /* Finalize graphite-opencl pass for current function. Place all required
2716 calls to STATIC_INIT_EDGE. Must be called after all scop processing
2717 in current function. */
2719 void
2720 graphite_opencl_finalize (edge static_init_edge)
2722 int i;
2723 opencl_data curr;
2725 if (dyn_string_length (main_program_src) != 0)
2727 tree call = build2 (EQ_EXPR, boolean_type_node,
2728 h_program, null_pointer_node);
2729 basic_block buff_init_block = split_edge (static_init_edge);
2730 edge before_init;
2731 edge init_edge;
2733 static_init_edge = single_succ_edge (buff_init_block);
2735 create_empty_if_region_on_edge (static_init_edge, call);
2737 static_init_edge = opencl_create_function_call
2738 (get_true_edge_from_guard_bb (static_init_edge->dest));
2739 static_init_edge = opencl_init_static_data (static_init_edge);
2740 before_init = single_pred_edge (buff_init_block);
2741 call = build2 (EQ_EXPR, boolean_type_node,
2742 h_context, null_pointer_node);
2744 create_empty_if_region_on_edge (before_init, call);
2745 init_edge = get_true_edge_from_guard_bb (before_init->dest);
2746 init_edge = opencl_create_init_context (init_edge);
2749 dyn_string_delete (main_program_src);
2751 for (i = 0; VEC_iterate (opencl_data, opencl_array_data, i, curr); i++)
2752 opencl_data_delete (curr);
2754 VEC_free (tree, heap, opencl_function_kernels);
2755 VEC_free (opencl_data, heap, opencl_array_data);
2756 recompute_all_dominators ();
2757 update_ssa (TODO_update_ssa);
2760 /* Debug functions for deined data structures. */
2762 static void
2763 dump_flag_to_file (const char *name, bool cond,
2764 FILE *file, int indent)
2766 indent_to (file, indent);
2767 fprintf (file, "%s = %s", name, cond? "true" : "false");
2770 void
2771 dump_opencl_data (opencl_data data, FILE *file, bool verbose)
2773 fprintf (file, "Data id = %d\n", data->id);
2774 fprintf (file, "Data dimension = %d\n", data->data_dim);
2775 fprintf (file, "Data depth = %d\n", data->depth);
2776 fprintf (file, "Flags");
2777 indent_to (file, 2);
2778 fprintf (file, "Global");
2779 dump_flag_to_file ("Static", data->is_static, file, 4);
2780 dump_flag_to_file ("Can be private", data->can_be_private, file, 4);
2781 dump_flag_to_file ("Used on device", data->used_on_device, file, 4);
2782 dump_flag_to_file ("Ever read on device",
2783 data->ever_read_on_device, file, 4);
2785 dump_flag_to_file ("Ever written on device",
2786 data->ever_written_on_device, file, 4);
2788 dump_flag_to_file ("Supported", data->supported, file, 4);
2789 indent_to (file, 2);
2790 fprintf (file, "Local");
2792 dump_flag_to_file ("Up to date on device",
2793 data->up_to_date_on_device, file, 4);
2794 dump_flag_to_file ("Up to date on host",
2795 data->up_to_date_on_host, file, 4);
2797 dump_flag_to_file ("Inited in current loop on host",
2798 data->inited_in_current_loop_on_host, file, 4);
2800 dump_flag_to_file ("Inited in current loop on device",
2801 data->inited_in_current_loop_on_device, file, 4);
2803 dump_flag_to_file ("Written in current body",
2804 data->written_in_current_body, file, 4);
2806 dump_flag_to_file ("Read in current body",
2807 data->read_in_current_body, file, 4);
2808 dump_flag_to_file ("Privatized", data->privatized, file, 4);
2810 fprintf (file, "\n");
2812 if (verbose)
2814 fprintf (file, "\nObject\n");
2815 print_node_brief (file, "", data->object, 2);
2817 fprintf (file, "\nDevice object\n");
2818 print_node_brief (file, "", data->device_object, 2);
2820 fprintf (file, "\nSize value\n");
2821 print_node_brief (file, "", data->size_value, 2);
2823 fprintf (file, "\nSize variable\n");
2824 print_node_brief (file, "", data->size_variable, 2);
2826 fprintf (file, "\nExact object\n");
2827 print_node_brief (file, "", data->exact_object, 2);
2831 DEBUG_FUNCTION void
2832 debug_opencl_data (opencl_data data, bool verbose)
2834 dump_opencl_data (data, stderr, verbose);
2837 void
2838 dump_opencl_body (opencl_body body, FILE *file, bool verbose)
2840 fprintf (file, "\n%s\n\n", body->name);
2841 fprintf (file, "First iterator: %s\n", body->first_iter);
2842 fprintf (file, "Last iterator: %s\n", body->last_iter);
2843 fprintf (file, "Number of data writes = %d\n\n", body->num_of_data_writes);
2844 fprintf (file, "Function header::\n");
2845 fprintf (file, "%s\n\n", dyn_string_buf (body->header));
2846 fprintf (file, "Non scalar args::\n");
2847 fprintf (file, "%s\n\n", dyn_string_buf (body->non_scalar_args));
2848 fprintf (file, "Pre header::\n");
2849 fprintf (file, "%s\n\n", dyn_string_buf (body->pre_header));
2850 fprintf (file, "Body::\n");
2851 fprintf (file, "%s\n\n", dyn_string_buf (body->body));
2853 fprintf (file, "Number of executions::\n");
2854 print_node_brief (file, "", body->num_of_exec, 2);
2856 if (verbose)
2857 print_clast_stmt (file, body->clast_body);
2860 DEBUG_FUNCTION void
2861 debug_opencl_body (opencl_body body, bool verbose)
2863 dump_opencl_body (body, stderr, verbose);
2866 void
2867 dump_opencl_clast_meta (opencl_clast_meta meta, FILE *file,
2868 bool verbose, int indent)
2870 if (!verbose)
2871 /* Just print structure of meta. */
2873 while (meta)
2875 indent_to (file, indent);
2876 fprintf (file, "<in = %d, out = %d, dev = %s, ok = %s>",
2877 meta->in_depth, meta->out_depth,
2878 meta->on_device?"true":"false",
2879 meta->access_unsupported?"false":"true");
2880 dump_opencl_clast_meta (meta->body, file, false, indent + 4);
2881 meta = meta->next;
2884 else
2886 fprintf (file, "<in = %d, out = %d, dev = %s, ok = %s>",
2887 meta->in_depth, meta->out_depth,
2888 meta->on_device?"true":"false",
2889 meta->access_unsupported?"false":"true");
2891 fprintf (file, "\nModified on host::\n");
2892 debug_bitmap_file (file, meta->modified_on_host);
2894 fprintf (file, "\nModified on device::\n");
2895 debug_bitmap_file (file, meta->modified_on_device);
2897 fprintf (file, "\nAccess::\n");
2898 debug_bitmap_file (file, meta->access);
2900 fprintf (file, "\nCan be private::\n");
2901 debug_bitmap_file (file, meta->can_be_private);
2905 DEBUG_FUNCTION void
2906 debug_opencl_clast_meta (opencl_clast_meta meta, bool verbose)
2908 dump_opencl_clast_meta (meta, stderr, verbose, 0);
2911 static int
2912 print_char_p_htab (void **h, void *v)
2914 char **ptr = (char **) h;
2915 FILE *file = (FILE *) v;
2917 fprintf (file, " %s\n", *ptr);
2918 return 1;
2921 static int
2922 print_tree_to_data_htab (void **h, void *v)
2924 map_tree_to_data *map = (map_tree_to_data *) h;
2925 FILE *file = (FILE *) v;
2926 tree key = (*map)->key;
2927 opencl_data data = (*map)->value;
2929 print_node_brief (file, "key = ", key, 2);
2930 fprintf (file, " data_id = %d\n", data->id);
2931 return 1;
2934 static int
2935 print_ref_to_data_htab (void **h, void *v)
2937 map_ref_to_data *map = (map_ref_to_data *) h;
2938 FILE *file = (FILE *) v;
2939 data_reference_p key = (*map)->key;
2940 opencl_data data = (*map)->value;
2942 fprintf (file, "key::\n");
2943 dump_data_reference (file, key);
2944 fprintf (file, "data_id = %d\n\n", data->id);
2945 return 1;
2948 void
2949 dump_opencl_main (opencl_main code_gen, FILE *file, bool verbose)
2951 fprintf (file, "Current meta::\n");
2952 dump_opencl_clast_meta (code_gen->curr_meta, file, false, 2);
2953 fprintf (file, "\n");
2955 if (code_gen->current_body)
2957 fprintf (file, "Current body::\n");
2958 dump_opencl_body (code_gen->current_body, file, verbose);
2961 fprintf (file, "\n\nData init basic block::\n");
2962 dump_bb (code_gen->data_init_bb, stderr, 0);
2964 if (code_gen->defined_vars)
2966 fprintf (file, "Defined variables::\n");
2967 htab_traverse_noresize (code_gen->defined_vars, print_char_p_htab,
2968 file);
2971 if (code_gen->global_defined_vars)
2973 fprintf (file, "Global defined variables::\n");
2974 htab_traverse_noresize (code_gen->global_defined_vars,
2975 print_char_p_htab, file);
2978 fprintf (file, "Refs to data::\n");
2979 htab_traverse_noresize (code_gen->ref_to_data,
2980 print_ref_to_data_htab, file);
2982 fprintf (file, "Trees to data::\n");
2983 htab_traverse_noresize (code_gen->tree_to_data,
2984 print_tree_to_data_htab, file);
2986 if (verbose)
2987 fprintf (file, "%s\n", dyn_string_buf (code_gen->main_program));
2990 DEBUG_FUNCTION void
2991 debug_opencl_main (opencl_main code_gen, bool verbose)
2993 dump_opencl_main (code_gen, stderr, verbose);
2996 DEBUG_FUNCTION void
2997 debug_opencl_program (void)
2999 fprintf (stderr, "%s", dyn_string_buf (main_program_src));
3002 #endif
3003 #include "gt-graphite-opencl.h"