Don't include sys/time.h.
[official-gcc/graphite-test-results.git] / gcc / graphite-opencl.c
blob06bea63b1e1e861e9bf4d97054a26e0c803cb0a1
1 /* GRAPHITE-OpenCL pass.
2 Copyright (C) 2009, 2010 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass implements transformation of perfect loop nests without
21 dependencies to OpenCL kernels.
23 For example, loop nest like this
25 | for (s_i = lb_i; s_i <= ub_i; s_i += stride_i)
26 | for (s_{i_1} = lb_{i+1}; s_{i+1} <= ub_{i+1}; s_{i+1} += stride_{i+1})
27 | ...
28 | for (s_j = lb_j; s_j <= ub_j; s_j += stride_j)
29 | {
30 | STMT(s_i, s_{i+1}, ..., s_j);
31 | }
33 will be transformed to this OpenCL kernel and all required OpenCL
34 calls will be created.
36 | __kernel void opencl_auto_function_N
37 | (base_i, mod_i, step_i, first_i,
38 | ...,
39 | base_j, mod_j, step_i, first_j,
40 | ...)
41 | {
42 | unsigned int global_id = get_global_id (0);
43 | unsigned int s_i = ((global_id / base_i) % mod_i) * step_i + first_i;
44 | unsigned int s_j = ((global_id / base_j) % mod_j) * step_j + first_j;
45 | STMT(s_i, s_{i+1}, ..., s_j);
46 | }
49 #include "config.h"
50 #include "system.h"
51 #include "coretypes.h"
52 #include "tm.h"
53 #include "ggc.h"
54 #include "tree.h"
55 #include "rtl.h"
56 #include "basic-block.h"
57 #include "diagnostic.h"
58 #include "tree-flow.h"
59 #include "toplev.h"
60 #include "tree-dump.h"
61 #include "timevar.h"
62 #include "cfgloop.h"
63 #include "tree-chrec.h"
64 #include "tree-data-ref.h"
65 #include "tree-scalar-evolution.h"
66 #include "tree-pass.h"
67 #include "domwalk.h"
68 #include "value-prof.h"
69 #include "pointer-set.h"
70 #include "gimple.h"
71 #include "sese.h"
72 #include "output.h"
73 #include "hashtab.h"
74 #include "tree.h"
76 /* Variable, which holds OpenCL context. */
77 static GTY(()) tree h_context;
79 /* Variable, which holds OpenCL command queue. */
80 static GTY(()) tree h_cmd_queue;
82 /* Variable, which holds OpenCL program for current function. */
83 static GTY(()) tree h_program;
85 #ifdef HAVE_cloog
86 #include "cloog/cloog.h"
87 #include "ppl_c.h"
88 #include "graphite-ppl.h"
89 #include "graphite.h"
90 #include "graphite-poly.h"
91 #include "graphite-scop-detection.h"
92 #include "graphite-clast-to-gimple.h"
93 #include "graphite-dependences.h"
94 #include "dyn-string.h"
95 #include "graphite-opencl.h"
97 /* Data structure to be used in data_reference_p to opencl_data hash
98 table. */
99 struct map_ref_to_data_def
101 data_reference_p key;
102 opencl_data value;
105 typedef struct map_ref_to_data_def *map_ref_to_data;
107 /* Calculate hash value from map_ref_to_data. */
109 static hashval_t
110 map_ref_to_data_to_hash (const void *data)
112 const struct map_ref_to_data_def *obj
113 = (const struct map_ref_to_data_def *) data;
115 return htab_hash_pointer (obj->key);
118 /* Compare to map_ref_to_data pointers. */
120 static int
121 map_ref_to_data_cmp (const void *v1, const void *v2)
123 const struct map_ref_to_data_def *obj_1
124 = (const struct map_ref_to_data_def *) v1;
125 const struct map_ref_to_data_def *obj_2
126 = (const struct map_ref_to_data_def *) v2;
128 return (obj_1->key == obj_2->key);
131 /* Create new map_ref_to_data with NEW_KEY as key and NEW_VALUE as value. */
133 static map_ref_to_data
134 map_ref_to_data_create (data_reference_p new_key,
135 opencl_data new_value)
137 map_ref_to_data tmp = XNEW (struct map_ref_to_data_def);
139 tmp->key = new_key;
140 tmp->value = new_value;
141 return tmp;
144 /* Data structure to be used in tree to opencl_data hash table. */
146 struct map_tree_to_data_def
148 tree key;
149 opencl_data value;
152 typedef struct map_tree_to_data_def *map_tree_to_data;
154 /* Calculate hash value from map_tree_to_data. */
156 static hashval_t
157 map_tree_to_data_to_hash (const void *data)
159 const struct map_tree_to_data_def *obj
160 = (const struct map_tree_to_data_def *) data;
162 return htab_hash_pointer (obj->key);
165 /* Compare to map_tree_to_data pointers. */
167 static int
168 map_tree_to_data_cmp (const void *v1, const void *v2)
170 const struct map_tree_to_data_def *obj_1
171 = (const struct map_tree_to_data_def *) v1;
172 const struct map_tree_to_data_def *obj_2
173 = (const struct map_tree_to_data_def *) v2;
175 return (obj_1->key == obj_2->key);
178 /* Create new map_tree_to_data with NEW_KEY as key and NEW_VALUE as value. */
180 static map_tree_to_data
181 map_tree_to_data_create (tree new_key,
182 opencl_data new_value)
184 map_tree_to_data tmp = XNEW (struct map_tree_to_data_def);
186 tmp->key = new_key;
187 tmp->value = new_value;
188 return tmp;
191 /* Create and init new temporary variable with name NAME and
192 type TYPE. */
194 static tree
195 opencl_create_tmp_var (tree type, const char *name)
197 tree tmp = create_tmp_var (type, name);
199 TREE_ADDRESSABLE (tmp) = 1;
200 return tmp;
203 /* Create new var in basic block DEST to store EXPR and return it. */
205 tree
206 opencl_tree_to_var (basic_block dest, tree expr)
208 tree type = TREE_TYPE (expr);
209 tree var = opencl_create_tmp_var (type, "__ocl_general_tmp_var");
210 gimple_stmt_iterator g_iter = gsi_last_bb (dest);
212 tree call = build2 (MODIFY_EXPR, type, var, expr);
214 force_gimple_operand_gsi (&g_iter, call, true, NULL, false,
215 GSI_CONTINUE_LINKING);
217 return var;
220 /* Set rw flags to false for all datas, referenced in CODE_GEN. */
222 static void
223 opencl_fflush_rw_flags (opencl_main code_gen)
225 VEC (opencl_data, heap) *datas = code_gen->opencl_function_data;
226 int i;
227 opencl_data curr;
229 for (i = 0; VEC_iterate (opencl_data, datas, i, curr); i ++)
231 curr->written_in_current_body = false;
232 curr->read_in_current_body = false;
233 curr->privatized = false;
237 /* Create new basic block on CODE_GEN->main edge and update it. */
239 basic_block
240 opencl_create_bb (opencl_main code_gen)
242 basic_block tmp = split_edge (code_gen->main_edge);
244 code_gen->main_edge = single_succ_edge (tmp);
245 return tmp;
248 /* All kernels of current function. */
249 static VEC(tree, heap) *opencl_function_kernels;
251 /* OpenCL code for all kernels of current function. */
252 static dyn_string_t main_program_src;
254 /* Delete clast meta DATA. */
256 static void
257 opencl_clast_meta_delete (opencl_clast_meta data)
259 if (!data)
260 return;
262 opencl_clast_meta_delete (data->body);
263 opencl_clast_meta_delete (data->next);
264 BITMAP_FREE (data->modified_on_device);
265 BITMAP_FREE (data->modified_on_host);
267 if (data->access != NULL)
268 BITMAP_FREE (data->access);
270 if (data->can_be_private)
271 BITMAP_FREE (data->can_be_private);
273 free (data);
276 static inline void
277 opencl_verify (void)
279 #ifdef ENABLE_CHECKING
280 verify_loop_structure ();
281 verify_dominators (CDI_DOMINATORS);
282 verify_loop_closed_ssa (true);
283 #endif
286 /* OpenCL definitions. */
287 #define CL_CONTEXT_PLATFORM 0x1084
288 #define CL_CONTEXT_DEVICES 0x1081
289 #define CL_DEVICE_TYPE_CPU (1 << 1)
290 #define CL_DEVICE_TYPE_GPU (1 << 2)
291 #define CL_MEM_COPY_HOST_PTR (1 << 5)
292 #define CL_MEM_USE_HOST_PTR (1 << 3)
293 #define CL_MEM_READ_WRITE (1 << 0)
294 #define CL_MEM_WRITE_ONLY (1 << 1)
295 #define CL_MEM_READ_ONLY (1 << 2)
296 #define CL_TRUE 1
298 #define DEFOPENCLCODE(CODE, FN_NAME) CODE,
300 /* Enum for all OpenCL functions used in GRAPHITE-OpenCL. */
301 enum OPENCL_FUNCTIONS
303 #include "graphite-opencl-functions.def"
304 STATIC_INIT
307 #undef DEFOPENCLCODE
309 #define DEFOPENCLCODE(CODE, FN_NAME) FN_NAME,
311 /* Names of all OpenCL functions, used in GRAPHITE-OpenCL. */
312 static const char *opencl_function_names[] =
314 #include "graphite-opencl-functions.def"
317 #undef DEFOPENCLCODE
319 /* This vector holds opencl_data, which represents arrays.
320 Arrays have constant sizes, so buffers for each of them can
321 be created only once. */
322 static VEC (opencl_data, heap) *opencl_array_data;
324 /* Hash table, which maps opencl_data, related to arrays, to
325 trees, which represents corresponding array. */
326 static htab_t array_data_to_tree;
328 /* Check whether VAR is a zero dimension array. */
330 static bool
331 zero_dim_array_p (tree var)
333 tree type = TREE_TYPE (var);
334 tree domain;
335 tree up_bound;
337 if (TREE_CODE (type) != ARRAY_TYPE
338 || TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE
339 || (domain = TYPE_DOMAIN (type)) == NULL)
340 return false;
342 up_bound = TYPE_MAX_VALUE (domain);
344 if (TREE_CODE (up_bound) != INTEGER_CST)
345 return false;
347 return TREE_INT_CST_LOW (up_bound) == 0;
350 /* Check whether NAME is the name of the artificial array, which can be
351 privatized. */
353 static bool
354 opencl_private_var_name_p (const char *name)
356 static const char *general_reduction = "General_Reduction";
357 static const char *close_phi = "Close_Phi";
358 static const char *cross_bb = "Cross_BB_scalar_dependence";
359 static const char *commutative = "Commutative_Associative_Reduction";
361 if (!name)
362 return false;
364 return
365 ((strstr (name, general_reduction) == name)
366 || (strstr (name, close_phi) == name)
367 || (strstr (name, commutative) == name)
368 || (strstr (name, cross_bb) == name));
371 /* Check whether VAR is an artificial array, which can be privatized. */
373 static bool
374 graphite_artificial_array_p (tree var)
376 tree name;
378 if (TREE_CODE (var) != VAR_DECL
379 || !zero_dim_array_p (var)
380 || !(name = DECL_NAME (var)))
381 return false;
383 return opencl_private_var_name_p (IDENTIFIER_POINTER (name));
386 /* Get depth of type TYPE scalar (base) part. */
388 static int
389 opencl_get_non_scalar_type_depth (tree type)
391 int count = 0;
393 while (TREE_CODE (type) == ARRAY_TYPE
394 || TREE_CODE (type) == POINTER_TYPE)
396 count++;
397 type = TREE_TYPE (type);
400 return count;
403 /* Constructors & destructors.
404 <name>_create - creates a new object of such type and returns it.
405 <name>_delete - delete object (like destructor). */
407 static opencl_data
408 opencl_data_create (tree var, tree size)
410 opencl_data tmp = XNEW (struct opencl_data_def);
411 tree type = TREE_TYPE (var);
413 tmp->can_be_private = graphite_artificial_array_p (var);
414 tmp->exact_object = var;
416 tmp->supported = TREE_CODE (var) == VAR_DECL || TREE_CODE (var) == SSA_NAME;
418 if (TREE_CODE (type) == ARRAY_TYPE)
419 var = build_addr (var, current_function_decl);
421 tmp->data_dim = opencl_get_non_scalar_type_depth (type);
422 tmp->object = var;
424 tmp->size_value = size;
425 tmp->size_variable
426 = opencl_create_tmp_var (size_type_node, "__opencl_data_size");
428 tmp->up_to_date_on_host = true;
429 tmp->up_to_date_on_device = true;
430 tmp->used_on_device = false;
431 tmp->ever_read_on_device = false;
432 tmp->ever_written_on_device = false;
433 return tmp;
436 static void
437 opencl_data_delete (opencl_data data)
439 free (data);
442 static opencl_main
443 opencl_main_create (CloogNames *names, sese region, edge main_edge,
444 htab_t params_index)
446 opencl_main tmp = XNEW (struct graphite_opencl_creator);
448 tmp->root_names = names;
449 tmp->defined_vars = NULL;
450 tmp->global_defined_vars = NULL;
451 tmp->region = region;
452 tmp->main_edge = main_edge;
453 tmp->main_program = dyn_string_new (OPENCL_INIT_BUFF_SIZE);
454 tmp->current_body = NULL;
455 tmp->clast_meta = NULL;
456 tmp->curr_meta = NULL;
457 tmp->params_index = params_index;
458 tmp->newivs_index = htab_create (10, clast_name_index_elt_info,
459 eq_clast_name_indexes, free);
460 tmp->ref_to_data = htab_create (10, map_ref_to_data_to_hash,
461 map_ref_to_data_cmp, free);
462 tmp->tree_to_data = htab_create (10, map_tree_to_data_to_hash,
463 map_tree_to_data_cmp, free);
464 tmp->newivs = VEC_alloc (tree, heap, 10);
465 tmp->context_loop = SESE_ENTRY (region)->src->loop_father;
466 tmp->opencl_function_data = VEC_alloc (opencl_data, heap,
467 OPENCL_INIT_BUFF_SIZE);
468 return tmp;
471 static void
472 opencl_main_delete (opencl_main data)
474 int i;
475 opencl_data curr;
477 dyn_string_delete (data->main_program);
478 htab_delete (data->newivs_index);
479 htab_delete (data->ref_to_data);
480 htab_delete (data->tree_to_data);
481 opencl_clast_meta_delete (data->clast_meta);
483 for (i = 0; VEC_iterate (opencl_data, data->opencl_function_data, i, curr);
484 i++)
485 if (!curr->is_static)
486 opencl_data_delete (curr);
488 VEC_free (tree, heap, data->newivs);
489 VEC_free (opencl_data, heap, data->opencl_function_data);
490 free (data);
493 /* Add function call CALL to edge SRC. If FLAG_GRAPHITE_OPENCL_DEBUG is
494 enabled, then add the following:
496 | int result = call ();
497 | if (call == 0 != ZERO_RETURN)
498 | abort ();
500 Otherwise just add CALL as function call. */
502 static edge
503 opencl_add_safe_call_on_edge (tree call, bool zero_return, edge src)
505 if (!flag_graphite_opencl_debug)
507 basic_block bb = split_edge (src);
508 gimple_stmt_iterator g_iter = gsi_last_bb (bb);
510 force_gimple_operand_gsi (&g_iter, call, true, NULL, false,
511 GSI_CONTINUE_LINKING);
512 return single_succ_edge (bb);
514 else
516 tree cmp;
517 edge result;
518 basic_block abort_bb;
519 tree abort_funtion;
520 tree abort_call;
521 gimple_stmt_iterator g_iter;
523 if (zero_return)
525 tree correct_result = build1 (CONVERT_EXPR, TREE_TYPE (call),
526 integer_zero_node);
527 cmp = build2 (EQ_EXPR, boolean_type_node,
528 call, correct_result);
530 else
532 tree incorrect_result = build1 (CONVERT_EXPR, TREE_TYPE (call),
533 null_pointer_node);
535 cmp = build2 (NE_EXPR, boolean_type_node,
536 call, incorrect_result);
539 result = create_empty_if_region_on_edge (src, cmp);
540 abort_bb = get_false_edge_from_guard_bb (src->dest)->dest;
541 abort_funtion = build_fn_decl ("abort", build_function_type_list
542 (void_type_node, NULL_TREE));
544 abort_call = build_call_expr (abort_funtion, 0);
546 g_iter = gsi_last_bb (abort_bb);
547 force_gimple_operand_gsi (&g_iter, abort_call, true, NULL, false,
548 GSI_CONTINUE_LINKING);
549 return result;
553 /* Add call CALL to the current edge from CODE_GEN. ZERO_RETURN indicates
554 whether NULL or 0 is the success return value for CALL. */
556 static void
557 opencl_add_safe_call (opencl_main code_gen, tree call, bool zero_return)
559 code_gen->main_edge
560 = opencl_add_safe_call_on_edge (call, zero_return, code_gen->main_edge);
563 /* Get base object for OBJ. */
565 tree
566 opencl_get_base_object_by_tree (tree obj)
568 while (TREE_CODE (obj) == INDIRECT_REF
569 || TREE_CODE (obj) == ARRAY_REF)
570 obj = TREE_OPERAND (obj, 0);
572 return obj;
575 /* Get base object from data reference DR. */
577 tree
578 dr_outermost_base_object (data_reference_p dr)
580 tree addr = DR_BASE_ADDRESS (dr);
582 if (!addr)
584 /* In case, we don't know base object. For example:
586 | void foo (int *a)
588 | int *b = a + 1;
589 | *b = 0;
592 Just return the innermost object when the base address is unknown. */
593 tree ref = DR_REF (dr);
595 return opencl_get_base_object_by_tree (ref);
598 if (TREE_CODE (addr) == ADDR_EXPR)
599 addr = TREE_OPERAND (addr, 0);
601 return addr;
604 /* Get correct basic block for data with DATA_ID transfer. If DEVICE
605 is true, then it's host to device transfer, otherwise it's device
606 to host transfer. CODE_GEN holds information related to code
607 generation. */
609 static edge
610 opencl_get_edge_for_init (opencl_main code_gen, int data_id, bool device)
612 opencl_clast_meta curr = code_gen->curr_meta;
614 if (!curr)
615 return NULL;
617 while (curr->parent)
619 opencl_clast_meta parent = curr->parent;
620 bitmap curr_bitmap
621 = device ? parent->modified_on_host : parent->modified_on_device;
623 if (bitmap_bit_p (curr_bitmap, data_id))
624 break;
626 curr = curr->parent;
629 return curr->init_edge;
632 /* Return tree, which represents function selected by ID.
633 If ID is STATIC_INIT, init all required data. */
635 static tree
636 opencl_create_function_decl (enum OPENCL_FUNCTIONS id)
638 static tree create_context_from_type_decl = NULL;
639 static tree get_context_info_decl = NULL;
640 static tree create_command_queue_decl = NULL;
641 static tree create_program_with_source_decl = NULL;
642 static tree build_program_decl = NULL;
643 static tree create_kernel_decl = NULL;
644 static tree create_buffer_decl = NULL;
645 static tree set_kernel_arg_decl = NULL;
646 static tree enqueue_nd_range_kernel_decl = NULL;
647 static tree enqueue_read_buffer_decl = NULL;
648 static tree enqueue_write_buffer_decl = NULL;
649 static tree release_memory_obj_decl = NULL;
650 static tree release_context_decl = NULL;
651 static tree release_command_queue_decl = NULL;
652 static tree release_program_decl = NULL;
653 static tree release_kernel_decl = NULL;
654 static tree get_platform_ids_decl = NULL;
655 static tree get_wait_for_events_decl = NULL;
657 switch (id)
659 case STATIC_INIT:
661 tree const_char_type = build_qualified_type (char_type_node,
662 TYPE_QUAL_CONST);
663 tree const_char_ptr = build_pointer_type (const_char_type);
664 tree const_char_ptr_ptr = build_pointer_type (const_char_ptr);
666 tree const_size_t = build_qualified_type (size_type_node,
667 TYPE_QUAL_CONST);
668 tree const_size_t_ptr = build_pointer_type (const_size_t);
670 tree size_t_ptr = build_pointer_type (size_type_node);
672 tree cl_device_type = integer_type_node;
673 tree cl_context_info = unsigned_type_node;
674 tree cl_command_queue_properties = long_unsigned_type_node;
675 tree cl_mem_flags = long_unsigned_type_node;
677 tree cl_context = ptr_type_node;
678 tree cl_context_properties = ptr_type_node;
679 tree cl_command_queue = ptr_type_node;
680 tree cl_device_id = ptr_type_node;
681 tree cl_program = ptr_type_node;
682 tree cl_kernel = ptr_type_node;
683 tree cl_event = ptr_type_node;
684 tree cl_mem = ptr_type_node;
686 tree const_cl_event = build_qualified_type (cl_event,
687 TYPE_QUAL_CONST);
688 tree cl_event_ptr = build_pointer_type (cl_event);
689 tree const_cl_event_ptr = build_pointer_type (const_cl_event);
691 tree const_cl_device_id = build_qualified_type (cl_device_id,
692 TYPE_QUAL_CONST);
693 tree const_cl_device_id_ptr = build_pointer_type (const_cl_device_id);
695 tree cl_platford_id = long_integer_type_node;
696 tree cl_platford_id_ptr = build_pointer_type (cl_platford_id);
698 tree function_type;
699 /* | cl_context
700 | clCreateContextFromType (cl_context_properties *properties,
701 | cl_device_type device_type,
702 | void (*pfn_notify) (const char *errinfo,
703 | const void *private_info, size_t cb,
704 | void *user_data),
705 | void *user_data,
706 | cl_int *errcode_ret) */
707 function_type
708 = build_function_type_list (cl_context,
709 cl_context_properties,
710 cl_device_type,
711 ptr_type_node,
712 ptr_type_node,
713 integer_ptr_type_node,
714 NULL_TREE);
715 create_context_from_type_decl
716 = build_fn_decl (opencl_function_names[CREATE_CONTEXT_FROM_TYPE],
717 function_type);
719 /* | cl_int clGetContextInfo (cl_context context,
720 | cl_context_info param_name,
721 | size_t param_value_size,
722 | void *param_value,
723 | size_t *param_value_size_ret) */
724 function_type
725 = build_function_type_list (integer_type_node,
726 cl_context,
727 cl_context_info,
728 size_type_node,
729 ptr_type_node,
730 size_t_ptr,
731 NULL_TREE);
732 get_context_info_decl
733 = build_fn_decl (opencl_function_names[GET_CONTEXT_INFO],
734 function_type);
736 /* | cl_command_queue
737 | clCreateCommandQueue (cl_context context,
738 | cl_device_id device,
739 | cl_command_queue_properties properties,
740 | cl_int *errcode_ret) */
741 function_type
742 = build_function_type_list (cl_command_queue,
743 cl_context,
744 cl_device_id,
745 cl_command_queue_properties,
746 integer_ptr_type_node,
747 NULL_TREE);
748 create_command_queue_decl
749 = build_fn_decl (opencl_function_names[CREATE_COMMAND_QUEUE],
750 function_type);
752 /* | cl_program clCreateProgramWithSource (cl_context context,
753 | cl_uint count,
754 | const char **strings,
755 | const size_t *lengths,
756 | cl_int *errcode_ret) */
757 function_type
758 = build_function_type_list (cl_program,
759 cl_context,
760 unsigned_type_node,
761 const_char_ptr_ptr,
762 const_size_t_ptr,
763 integer_ptr_type_node,
764 NULL_TREE);
765 create_program_with_source_decl
766 = build_fn_decl (opencl_function_names[CREATE_PROGRAM_WITH_SOURCE],
767 function_type);
769 /* | cl_int
770 | clBuildProgram (cl_program program,
771 | cl_uint num_devices,
772 | const cl_device_id *device_list,
773 | const char *options,
774 | void (*pfn_notify) (cl_program, void *user_data),
775 | void *user_data) */
776 function_type
777 = build_function_type_list (integer_type_node,
778 cl_program,
779 unsigned_type_node,
780 const_cl_device_id_ptr,
781 const_char_ptr,
782 ptr_type_node,
783 ptr_type_node,
784 NULL_TREE);
785 build_program_decl
786 = build_fn_decl (opencl_function_names[BUILD_PROGRAM],
787 function_type);
789 /* | cl_kernel clCreateKernel (cl_program program,
790 | const char *kernel_name,
791 | cl_int *errcode_ret) */
792 function_type
793 = build_function_type_list (cl_kernel,
794 cl_program,
795 const_char_ptr,
796 integer_ptr_type_node,
797 NULL_TREE);
799 create_kernel_decl
800 = build_fn_decl (opencl_function_names[CREATE_KERNEL],
801 function_type);
803 /* | cl_mem clCreateBuffer (cl_context context,
804 | cl_mem_flags flags,
805 | size_t size,
806 | void *host_ptr,
807 | cl_int *errcode_ret) */
809 function_type
810 = build_function_type_list (cl_mem,
811 cl_context,
812 cl_mem_flags,
813 size_type_node,
814 ptr_type_node,
815 integer_ptr_type_node,
816 NULL_TREE);
817 create_buffer_decl
818 = build_fn_decl (opencl_function_names[CREATE_BUFFER],
819 function_type);
822 /* | cl_int clSetKernelArg (cl_kernel kernel,
823 | cl_uint arg_index,
824 | size_t arg_size,
825 | const void *arg_value) */
827 function_type
828 = build_function_type_list (integer_type_node,
829 cl_kernel,
830 unsigned_type_node,
831 size_type_node,
832 const_ptr_type_node,
833 NULL_TREE);
834 set_kernel_arg_decl
835 = build_fn_decl (opencl_function_names[SET_KERNEL_ARG],
836 function_type);
838 /* | cl_int clEnqueueNDRangeKernel (cl_command_queue command_queue,
839 | cl_kernel kernel,
840 | cl_uint work_dim,
841 | const size_t *global_work_offset,
842 | const size_t *global_work_size,
843 | const size_t *local_work_size,
844 | cl_uint num_events_in_wait_list,
845 | const cl_event *event_wait_list,
846 | cl_event *event) */
848 function_type
849 = build_function_type_list (integer_type_node,
850 cl_command_queue,
851 cl_kernel,
852 unsigned_type_node,
853 const_size_t_ptr,
854 const_size_t_ptr,
855 const_size_t_ptr,
856 unsigned_type_node,
857 const_cl_event_ptr,
858 cl_event_ptr,
859 NULL_TREE);
861 enqueue_nd_range_kernel_decl
862 = build_fn_decl (opencl_function_names[ENQUEUE_ND_RANGE_KERNEL],
863 function_type);
865 /* | cl_int clEnqueueReadBuffer (cl_command_queue command_queue,
866 | cl_mem buffer,
867 | cl_bool blocking_read,
868 | size_t offset,
869 | size_t cb,
870 | void *ptr,
871 | cl_uint num_events_in_wait_list,
872 | const cl_event *event_wait_list,
873 | cl_event *event) */
875 function_type
876 = build_function_type_list (integer_type_node,
877 cl_command_queue,
878 cl_mem,
879 unsigned_type_node,
880 size_type_node,
881 size_type_node,
882 ptr_type_node,
883 unsigned_type_node,
884 const_cl_event_ptr,
885 cl_event_ptr,
886 NULL_TREE);
888 enqueue_read_buffer_decl
889 = build_fn_decl (opencl_function_names[ENQUEUE_READ_BUFFER],
890 function_type);
892 /* | cl_int clEnqueueWriteBuffer (cl_command_queue command_queue,
893 | cl_mem buffer,
894 | cl_bool blocking_write,
895 | size_t offset,
896 | size_t cb,
897 | const void *ptr,
898 | cl_uint num_events_in_wait_list,
899 | const cl_event *event_wait_list,
900 | cl_event *event) */
902 function_type
903 = build_function_type_list (integer_type_node,
904 cl_command_queue,
905 cl_mem,
906 unsigned_type_node,
907 size_type_node,
908 size_type_node,
909 const_ptr_type_node,
910 unsigned_type_node,
911 const_cl_event_ptr,
912 cl_event_ptr,
913 NULL_TREE);
915 enqueue_write_buffer_decl
916 = build_fn_decl (opencl_function_names[ENQUEUE_WRITE_BUFFER],
917 function_type);
920 /* cl_int clReleaseMemObject (cl_mem memobj) */
922 function_type
923 = build_function_type_list (integer_type_node, cl_mem, NULL_TREE);
925 release_memory_obj_decl
926 = build_fn_decl (opencl_function_names[RELEASE_MEMORY_OBJ],
927 function_type);
930 /* cl_int clReleaseContext (cl_context context) */
931 function_type
932 = build_function_type_list (integer_type_node, cl_context,
933 NULL_TREE);
935 release_context_decl
936 = build_fn_decl (opencl_function_names[RELEASE_CONTEXT],
937 function_type);
939 /* cl_int clReleaseCommandQueue (cl_command_queue command_queue) */
940 function_type
941 = build_function_type_list (integer_type_node, cl_command_queue,
942 NULL_TREE);
944 release_command_queue_decl
945 = build_fn_decl (opencl_function_names[RELEASE_COMMAND_QUEUE],
946 function_type);
948 /* cl_int clReleaseProgram (cl_program program) */
949 function_type
950 = build_function_type_list (integer_type_node, cl_program,
951 NULL_TREE);
953 release_program_decl
954 = build_fn_decl (opencl_function_names[RELEASE_PROGRAM],
955 function_type);
957 /* cl_int clReleaseKernel (cl_kernel kernel) */
958 function_type
959 = build_function_type_list (integer_type_node, cl_kernel, NULL_TREE);
961 release_kernel_decl
962 = build_fn_decl (opencl_function_names[RELEASE_KERNEL],
963 function_type);
965 /* | cl_int clGetPlatformIDs (cl_uint num_entries,
966 | cl_platform_id *platforms,
967 | cl_uint *num_platforms) */
970 function_type
971 = build_function_type_list (integer_type_node,
972 unsigned_type_node,
973 cl_platford_id_ptr,
974 build_pointer_type (unsigned_type_node),
975 NULL_TREE);
976 get_platform_ids_decl
977 = build_fn_decl (opencl_function_names [GET_PLATFORM_IDS],
978 function_type);
981 /* | cl_int clWaitForEvents (cl_uint num_events,
982 | const cl_event *event_list) */
984 function_type
985 = build_function_type_list (integer_type_node,
986 unsigned_type_node,
987 const_cl_event_ptr,
988 NULL_TREE);
990 get_wait_for_events_decl
991 = build_fn_decl (opencl_function_names [WAIT_FOR_EVENTS],
992 function_type);
994 return NULL_TREE;
997 case CREATE_CONTEXT_FROM_TYPE:
998 return create_context_from_type_decl;
1000 case GET_CONTEXT_INFO:
1001 return get_context_info_decl;
1003 case CREATE_COMMAND_QUEUE:
1004 return create_command_queue_decl;
1006 case CREATE_PROGRAM_WITH_SOURCE:
1007 return create_program_with_source_decl;
1009 case BUILD_PROGRAM:
1010 return build_program_decl;
1012 case CREATE_KERNEL:
1013 return create_kernel_decl;
1015 case CREATE_BUFFER:
1016 return create_buffer_decl;
1018 case SET_KERNEL_ARG:
1019 return set_kernel_arg_decl;
1021 case ENQUEUE_ND_RANGE_KERNEL:
1022 return enqueue_nd_range_kernel_decl;
1024 case ENQUEUE_READ_BUFFER:
1025 return enqueue_read_buffer_decl;
1027 case ENQUEUE_WRITE_BUFFER:
1028 return enqueue_write_buffer_decl;
1030 case RELEASE_MEMORY_OBJ:
1031 return release_memory_obj_decl;
1033 case RELEASE_CONTEXT:
1034 return release_context_decl;
1036 case RELEASE_COMMAND_QUEUE:
1037 return release_command_queue_decl;
1039 case RELEASE_PROGRAM:
1040 return release_program_decl;
1042 case RELEASE_KERNEL:
1043 return release_kernel_decl;
1045 case GET_PLATFORM_IDS:
1046 return get_platform_ids_decl;
1048 case WAIT_FOR_EVENTS:
1049 return get_wait_for_events_decl;
1051 default: gcc_unreachable ();
1055 /* Add clWaitForEvent (1, EVENT_VAR); call to CODE_GEN->main_edge. */
1057 static void
1058 opencl_wait_for_event (opencl_main code_gen, tree event_var)
1060 tree function = opencl_create_function_decl (WAIT_FOR_EVENTS);
1061 tree call = build_call_expr (function, 2,
1062 integer_one_node,
1063 event_var);
1065 opencl_add_safe_call (code_gen, call, true);
1068 /* Add host to device memory transfer. DATA - data, which must be
1069 transfered to device. CODE_GEN holds information related to code
1070 generation. */
1072 static tree
1073 opencl_pass_to_device (opencl_main code_gen, opencl_data data)
1075 edge init_edge;
1076 tree function;
1077 tree call;
1079 tree index_type = build_index_type (build_int_cst (NULL_TREE, 2));
1080 tree array_type = build_array_type (ptr_type_node, index_type);
1081 tree var = opencl_create_tmp_var (array_type, "wait_event");
1082 tree event_call;
1084 TREE_STATIC (var) = 1;
1085 assemble_variable (var, 1, 0, 1);
1087 /* If data is wrutten in device, mark it is not up to date on host. */
1088 if (data->written_in_current_body)
1089 data->up_to_date_on_host = false;
1091 /* If data is up to date on device, but it was initialized befor
1092 current loop, then mark it as initialized in current loop and
1093 store it.
1095 Consider an example: D - device, H - host, W - write, R - read.
1097 | HW(1) -- LOOP
1098 | /\
1099 | / \
1100 | / \
1101 | HR(2) DW(3)
1103 While analyzing statement (2), data will be up to date on host
1104 because of statement (1), but while executing after (3) in loop,
1105 (2) will read incorrect data.
1107 So, we have to add device to host memory transfer after statement (3).
1109 if (flag_graphite_opencl_cpu)
1110 return data->device_object;
1112 if (data->up_to_date_on_device)
1114 if (!data->inited_in_current_loop_on_device
1115 && code_gen && code_gen->curr_meta
1116 && code_gen->curr_meta->parent)
1117 VEC_safe_push (opencl_data, heap,
1118 code_gen->curr_meta->parent->post_pass_to_device,
1119 data);
1121 data->inited_in_current_loop_on_device = true;
1122 return data->device_object;
1125 data->inited_in_current_loop_on_device = true;
1126 init_edge = opencl_get_edge_for_init (code_gen, data->id, true);
1128 /* Add gimple. */
1129 function = opencl_create_function_decl (ENQUEUE_WRITE_BUFFER);
1131 event_call = build4 (ARRAY_REF, ptr_type_node, var,
1132 integer_zero_node, NULL_TREE, NULL_TREE);
1133 event_call = build_addr (event_call, current_function_decl);
1134 call = build_call_expr (function, 9,
1135 h_cmd_queue,
1136 data->device_object,
1137 build_int_cst (NULL_TREE, CL_TRUE),
1138 integer_zero_node,
1139 data->size_variable,
1140 data->object,
1141 integer_zero_node,
1142 null_pointer_node,
1143 event_call);
1145 if (init_edge)
1146 opencl_add_safe_call_on_edge (call, true, init_edge);
1147 else
1148 opencl_add_safe_call (code_gen, call, true);
1150 data->up_to_date_on_device = true;
1151 opencl_wait_for_event (code_gen, event_call);
1152 return data->device_object;
1155 /* Add device to host memory transfer. DATA - data, which must be
1156 transfered to host. CODE_GEN holds information related to code
1157 generation. */
1159 static void
1160 opencl_pass_to_host (opencl_main code_gen, opencl_data data)
1162 edge init_edge;
1163 tree function;
1164 tree curr_type;
1165 tree curr;
1166 tree call;
1167 tree index_type = build_index_type (build_int_cst (NULL_TREE, 2));
1168 tree array_type = build_array_type (ptr_type_node, index_type);
1169 tree var = opencl_create_tmp_var (array_type, "wait_event");
1170 tree event_call;
1172 TREE_STATIC (var) = 1;
1173 assemble_variable (var, 1, 0, 1);
1175 if (data->written_in_current_body)
1176 data->up_to_date_on_device = false;
1178 if (data->up_to_date_on_host)
1180 if (!data->inited_in_current_loop_on_host
1181 && code_gen && code_gen->curr_meta &&
1182 code_gen->curr_meta->parent)
1183 VEC_safe_push (opencl_data, heap,
1184 code_gen->curr_meta->parent->post_pass_to_host, data);
1186 data->inited_in_current_loop_on_host = true;
1187 return;
1190 data->inited_in_current_loop_on_host = true;
1192 if (flag_graphite_opencl_cpu
1193 || data->privatized)
1194 return;
1196 init_edge = opencl_get_edge_for_init (code_gen, data->id, false);
1198 function = opencl_create_function_decl (ENQUEUE_READ_BUFFER);
1199 curr_type = TREE_TYPE (data->object);
1200 curr = data->object;
1202 if (TREE_CODE (curr_type) == ARRAY_TYPE)
1203 curr = build_addr (curr, current_function_decl);
1205 event_call = build4 (ARRAY_REF, ptr_type_node, var,
1206 integer_zero_node, NULL_TREE, NULL_TREE);
1207 event_call = build_addr (event_call, current_function_decl);
1209 call = build_call_expr (function, 9,
1210 h_cmd_queue,
1211 data->device_object,
1212 build_int_cst (NULL_TREE, CL_TRUE),
1213 integer_zero_node,
1214 data->size_variable,
1215 curr, integer_zero_node,
1216 null_pointer_node,
1217 event_call);
1219 if (init_edge)
1220 opencl_add_safe_call_on_edge (call, true, init_edge);
1221 else
1222 opencl_add_safe_call (code_gen, call, true);
1224 opencl_wait_for_event (code_gen, event_call);
1225 data->up_to_date_on_host = true;
1228 /* Pass all data from device to host. This function must be called when
1229 we need all data to be up to date on host. CODE_GEN holds information
1230 related to code generation. */
1232 static void
1233 opencl_fflush_all_device_buffers_to_host (opencl_main code_gen)
1235 VEC (opencl_data, heap) *datas = code_gen->opencl_function_data;
1236 int i;
1237 opencl_data curr;
1238 tree function = opencl_create_function_decl (RELEASE_MEMORY_OBJ);
1240 for (i = 0; VEC_iterate (opencl_data, datas, i, curr); i ++)
1242 curr->written_in_current_body = true;
1243 opencl_pass_to_host (code_gen, curr);
1246 for (i = 0; VEC_iterate (opencl_data, datas, i, curr); i ++)
1247 if (curr->used_on_device && !curr->is_static)
1249 tree var = curr->device_object;
1250 tree call = build_call_expr (function, 1, var);
1252 opencl_add_safe_call (code_gen, call, true);
1256 /* Calculate correct flags for clCreateBuffer. READ means, that
1257 buffer must be readable on device, WRITE - that buffer must be
1258 writable on device. */
1260 static int
1261 opencl_get_mem_flags (bool read, bool write)
1263 int rw_flags;
1264 int location_flags;
1266 gcc_assert (read || write);
1268 if (write && read)
1269 rw_flags = CL_MEM_READ_WRITE;
1270 else
1272 if (read)
1273 rw_flags = CL_MEM_READ_ONLY;
1274 else
1275 rw_flags = CL_MEM_WRITE_ONLY;
1278 if (flag_graphite_opencl_cpu)
1279 location_flags = CL_MEM_USE_HOST_PTR;
1280 else
1281 location_flags = CL_MEM_COPY_HOST_PTR;
1283 return location_flags | rw_flags;
1286 /* Create memory on device for DATA and init it by data from host.
1287 ptr is pointer to host memory location. Function returns tree,
1288 corresponding to memory location on device. */
1290 static tree
1291 opencl_create_memory_for_pointer (opencl_data data)
1293 tree ptr = data->object;
1294 tree arr_size = data->size_variable;
1295 tree function = opencl_create_function_decl (CREATE_BUFFER);
1296 bool ever_read = data->ever_read_on_device;
1297 bool ever_written = data->ever_written_on_device;
1298 tree mem_flags = build_int_cst (NULL_TREE,
1299 opencl_get_mem_flags (ever_read,
1300 ever_written));
1301 if (TREE_CODE (TREE_TYPE (ptr)) == ARRAY_TYPE)
1302 ptr = build_addr (ptr, current_function_decl);
1304 if (flag_graphite_opencl_debug)
1306 tree result = opencl_create_tmp_var (integer_type_node,
1307 "__opencl_create_buffer_result");
1309 return build_call_expr (function, 5,
1310 h_context, mem_flags,
1311 arr_size, ptr,
1312 build1 (ADDR_EXPR,
1313 integer_ptr_type_node,
1314 result));
1316 else
1317 return build_call_expr (function, 5,
1318 h_context, mem_flags,
1319 arr_size, ptr, null_pointer_node);
1322 /* Create memory buffers on host for all required host memory objects.
1323 CODE_GEN holds information related to code generation. */
1325 static void
1326 opencl_init_all_device_buffers (opencl_main code_gen)
1328 VEC (opencl_data, heap) *datas = code_gen->opencl_function_data;
1329 int i;
1330 opencl_data curr;
1331 edge data_init_edge = single_succ_edge (code_gen->data_init_bb);
1333 for (i = 0; VEC_iterate (opencl_data, datas, i, curr); i ++)
1335 tree tmp;
1337 if (!curr->used_on_device || curr->is_static)
1338 continue;
1340 tmp = opencl_create_memory_for_pointer (curr);
1341 tmp = build2 (MODIFY_EXPR, ptr_type_node, curr->device_object, tmp);
1342 data_init_edge = opencl_add_safe_call_on_edge (tmp, false,
1343 data_init_edge);
1347 /* Create new static void * variable with name __ocl_ + NAME. */
1349 static tree
1350 opencl_create_static_ptr_variable (const char *name)
1352 const char *id_name = concat ("__ocl_",name, NULL);
1353 tree var = build_decl (UNKNOWN_LOCATION, VAR_DECL,
1354 create_tmp_var_name (id_name), ptr_type_node);
1355 TREE_STATIC (var) = 1;
1356 TREE_PUBLIC (var) = 0;
1357 DECL_ARTIFICIAL (var) = 1;
1358 TREE_USED (var) = 1;
1359 TREE_ADDRESSABLE (var) = 1;
1360 DECL_INITIAL (var) = null_pointer_node;
1361 assemble_variable (var, 1, 0, 1);
1363 return var;
1366 /* Insert several opencl calls to output program. */
1368 /* | cl_program h_program;
1369 | h_program = clCreateProgramWithSource (h_context, 1,
1370 | sProgramSource, 0, 0);
1372 SRC is the program source code and DEST is the edge where
1373 call must be inserted. */
1375 static edge
1376 opencl_insert_create_program_with_source_call (const char *src, edge base)
1378 /* Get tree with function definition. */
1379 tree function = opencl_create_function_decl (CREATE_PROGRAM_WITH_SOURCE);
1380 tree code_tree = build_string_literal (strlen (src) + 1, src);
1381 tree call;
1382 basic_block bb = split_edge (base);
1383 tree tmp_var = opencl_tree_to_var (bb, code_tree);
1385 call = build_call_expr (function, 5, h_context,
1386 integer_one_node,
1387 build_addr (tmp_var, current_function_decl),
1388 null_pointer_node,
1389 null_pointer_node);
1391 call = build2 (MODIFY_EXPR, ptr_type_node,
1392 h_program, call);
1393 return opencl_add_safe_call_on_edge (call, false, single_succ_edge (bb));
1396 /* clBuildProgram (h_program, 0, 0, 0, 0, 0);
1397 BASE is the edge where call must be inserted. */
1399 static edge
1400 opencl_insert_build_program_call (edge base)
1402 tree function = opencl_create_function_decl (BUILD_PROGRAM);
1403 tree call = build_call_expr (function, 6,
1404 h_program,
1405 integer_zero_node, null_pointer_node,
1406 null_pointer_node, null_pointer_node,
1407 null_pointer_node);
1408 return opencl_add_safe_call_on_edge (call, true, base);
1411 /* cl_kernel tmm_kernel;
1412 tmp_kernel = clCreateKernel (h_program, func_name, 0);
1413 FUNCTION_NAME is the name of the kernel function,
1414 CODE_GEN holds information related to code generation. */
1416 static tree
1417 opencl_insert_create_kernel_call (opencl_main code_gen,
1418 const char *function_name)
1420 tree new_kernel_var;
1421 basic_block bb;
1422 gimple_stmt_iterator g_iter;
1423 tree function;
1424 tree kernel_name;
1425 tree call;
1426 tree tmp_tree;
1428 new_kernel_var = opencl_create_tmp_var (ptr_type_node, function_name);
1429 bb = split_edge (code_gen->kernel_edge);
1430 g_iter = gsi_last_bb (bb);
1431 function = opencl_create_function_decl (CREATE_KERNEL);
1432 kernel_name = build_string_literal (strlen (function_name) + 1,
1433 function_name);
1434 call = build_call_expr (function, 3, h_program, kernel_name,
1435 null_pointer_node);
1436 tmp_tree = build2 (MODIFY_EXPR, ptr_type_node,
1437 new_kernel_var, call);
1439 code_gen->kernel_edge = single_succ_edge (bb);
1440 force_gimple_operand_gsi (&g_iter, tmp_tree, true, NULL, false,
1441 GSI_CONTINUE_LINKING);
1442 VEC_safe_push (tree, heap, opencl_function_kernels, new_kernel_var);
1443 code_gen->kernel_edge
1444 = opencl_add_safe_call_on_edge (new_kernel_var, false,
1445 code_gen->kernel_edge);
1446 return new_kernel_var;
1449 /* Init memory on device. Only one levell of pointers are suppoted.
1450 So in case of char ** only array of char * will be created.
1451 Function return tree, corresponding to new pointer (pointer
1452 on device).
1454 | cl_mem clCreateBuffer (cl_context context,
1455 | cl_mem_flags flags,
1456 | size_t size,
1457 | void *host_ptr,
1458 | cl_int *errcode_ret) */
1460 /* Calculate size of data reference, represented by REF. PTR is a
1461 base object of data reference. */
1463 static tree
1464 opencl_get_indirect_size (tree ptr, poly_dr_p ref)
1466 ptr = TREE_TYPE (ptr);
1468 switch (TREE_CODE (ptr))
1470 case ARRAY_TYPE:
1471 return TYPE_SIZE_UNIT (ptr);
1473 case POINTER_TYPE:
1475 tree inner_type = TREE_TYPE (ptr);
1476 tree t = graphite_outer_subscript_bound (ref, false);
1477 tree inner_type_size = TYPE_SIZE_UNIT (inner_type);
1479 if (inner_type_size == NULL)
1480 return NULL;
1482 if (DECL_P (inner_type_size))
1483 add_referenced_var (inner_type_size);
1485 gcc_assert (t);
1486 t = fold_build2 (TRUNC_DIV_EXPR, sizetype, t, inner_type_size);
1487 t = fold_build2 (PLUS_EXPR, sizetype, t, size_one_node);
1488 t = fold_build2 (MULT_EXPR, sizetype, t, inner_type_size);
1489 return t;
1492 default:
1493 return NULL_TREE;
1496 gcc_unreachable ();
1499 /* Create variables for kernel KERNEL arguments. Each argument is
1500 represented by new variable with it's value and it's size. If arg
1501 is a pointer or array, it's represented by device buffer with data
1502 from host memory. CODE_GEN holds information related to code
1503 generation. */
1505 static void
1506 opencl_init_local_device_memory (opencl_main code_gen, opencl_body kernel)
1508 VEC (tree, heap) **args = &kernel->function_args;
1509 VEC (tree, heap) **args_to_pass = &kernel->function_args_to_pass;
1510 VEC (opencl_data, heap) **refs = &kernel->data_refs;
1511 tree curr;
1512 opencl_data curr_data;
1513 int i;
1514 basic_block bb = opencl_create_bb (code_gen);
1515 basic_block kernel_bb = split_edge (code_gen->kernel_edge);
1517 code_gen->kernel_edge = single_succ_edge (kernel_bb);
1519 for (i = 0; VEC_iterate (tree, *args, i, curr); i ++)
1521 gimple_stmt_iterator g_iter = gsi_last_bb (bb);
1522 gimple_stmt_iterator kernel_g_iter = gsi_last_bb (kernel_bb);
1523 tree curr_type = TREE_TYPE (curr);
1524 tree new_type;
1525 tree tmp_var;
1526 tree mov;
1527 tree curr_var = opencl_create_tmp_var (curr_type, "__ocl_iv");
1529 if (TREE_CODE (curr) != PARM_DECL
1530 && TREE_CODE (curr) != VAR_DECL)
1532 mov = build2 (MODIFY_EXPR, curr_type, curr_var, curr);
1534 force_gimple_operand_gsi (&g_iter, mov, false, NULL, false,
1535 GSI_CONTINUE_LINKING);
1537 else
1538 force_gimple_operand_gsi (&g_iter, curr, false, curr_var, false,
1539 GSI_CONTINUE_LINKING);
1540 curr = curr_var;
1542 new_type = build_pointer_type (curr_type);
1543 tmp_var = opencl_create_tmp_var (new_type, "__opencl_scalar_arg");
1544 mov = build1 (ADDR_EXPR, new_type, curr);
1546 mov = build2 (MODIFY_EXPR, new_type, tmp_var, mov);
1548 force_gimple_operand_gsi (&kernel_g_iter, mov, false, NULL, false,
1549 GSI_CONTINUE_LINKING);
1550 VEC_safe_push (tree, heap, *args_to_pass, tmp_var);
1553 for (i = 0; VEC_iterate (opencl_data, *refs, i, curr_data); i++)
1555 gimple_stmt_iterator kernel_g_iter = gsi_last_bb (kernel_bb);
1556 tree new_type;
1557 tree tmp_var;
1558 tree mov;
1559 tree curr = opencl_pass_to_device (code_gen, curr_data);
1560 tree curr_type = ptr_type_node;
1562 new_type = build_pointer_type (curr_type);
1563 tmp_var = opencl_create_tmp_var (new_type, "__opencl_non_scalar_arg");
1564 mov = build1 (ADDR_EXPR, new_type, curr);
1566 mov = build2 (MODIFY_EXPR, new_type, tmp_var, mov);
1568 force_gimple_operand_gsi (&kernel_g_iter, mov, false, NULL, false,
1569 GSI_CONTINUE_LINKING);
1570 VEC_safe_push (tree, heap, *args_to_pass, tmp_var);
1575 /* cl_int clSetKernelArg (cl_kernel kernel,
1576 cl_uint arg_index,
1577 size_t arg_size,
1578 const void *arg_value)
1580 Set all kernel args for OpenCL kernel, represented by KERNEL_VAR.
1581 KERNEL holds all data, related to given kernel.
1582 CODE_GEN holds information related to code generation.
1583 All arguments are passed by pointer. */
1585 static void
1586 opencl_pass_kernel_arguments (opencl_main code_gen, opencl_body kernel,
1587 tree kernel_var)
1589 VEC (tree, heap) *args_to_pass = kernel->function_args_to_pass;
1590 tree arg;
1591 int i;
1592 tree function = opencl_create_function_decl (SET_KERNEL_ARG);
1594 for (i = 0; VEC_iterate (tree, args_to_pass, i, arg); i++)
1596 tree call
1597 = build_call_expr (function, 4, kernel_var,
1598 build_int_cst (NULL_TREE, i),
1599 TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (arg))),
1600 arg);
1601 opencl_add_safe_call (code_gen, call, true);
1605 /* clEnqueueNDRangeKernel (h_cmd_queue, hKernel, 1, 0,
1606 &cnDimension, 0, 0, 0, 0);
1608 Execute kernel, represented by KERNEL_VAR in NUM_OF_EXEC threads.
1609 Use EVENT_VAR as event variable for asynchronous call.
1610 CODE_GEN holds information related to code generation. */
1612 static void
1613 opencl_execute_kernel (opencl_main code_gen, tree num_of_exec,
1614 tree kernel_var, tree event_var)
1616 tree function = opencl_create_function_decl (ENQUEUE_ND_RANGE_KERNEL);
1617 tree num_of_threads = opencl_create_tmp_var (integer_type_node,
1618 "__opencl_num_of_exec");
1619 gimple_stmt_iterator g_iter = gsi_last_bb (opencl_create_bb (code_gen));
1620 tree call;
1622 TREE_STATIC (num_of_threads) = 1;
1623 assemble_variable (num_of_threads, 1, 0, 1);
1625 call = build2 (MODIFY_EXPR, integer_type_node, num_of_threads, num_of_exec);
1627 force_gimple_operand_gsi (&g_iter, call, true, NULL, false,
1628 GSI_CONTINUE_LINKING);
1630 call = build1 (ADDR_EXPR, integer_ptr_type_node, num_of_threads);
1632 call = build_call_expr (function, 9,
1633 h_cmd_queue,
1634 kernel_var,
1635 integer_one_node,
1636 null_pointer_node,
1637 call,
1638 null_pointer_node,
1639 integer_zero_node,
1640 null_pointer_node,
1641 event_var);
1643 opencl_add_safe_call (code_gen, call, true);
1646 /* Place building program from single source string to edge BASE.
1647 Current implementation performs single build per function.
1648 String contains kernels from all scops of current function.
1649 Functions returns true if any kernel has been created. */
1651 static edge
1652 opencl_create_function_call (edge base)
1654 edge new_edge;
1655 const char *src;
1657 /* Required for addressing types with size less then 4 bytes. */
1658 dyn_string_prepend_cstr
1659 (main_program_src,
1660 "#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable\n");
1661 /* Required for double type. */
1662 dyn_string_prepend_cstr
1663 (main_program_src, "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n ");
1664 src = dyn_string_buf (main_program_src);
1666 if (dump_file && (dump_flags & TDF_DETAILS))
1668 fprintf (dump_file, "\nGenerated OpenCL code: \n");
1669 fprintf (dump_file, "%s", src);
1672 new_edge = opencl_insert_create_program_with_source_call (src, base);
1674 return opencl_insert_build_program_call (new_edge);
1677 /* Mark privatizable data for current loop nest. Information where
1678 given data can be privatized is taken from meta information of
1679 current loop nest, which is stored in CODE_GEN. */
1681 static void
1682 opencl_mark_privatized_data (opencl_main code_gen)
1684 VEC (opencl_data, heap) *datas = code_gen->opencl_function_data;
1685 int i;
1686 opencl_data curr;
1687 bitmap can_be_private = code_gen->curr_meta->can_be_private;
1689 for (i = 0; VEC_iterate (opencl_data, datas, i, curr); i ++)
1690 curr->privatized = bitmap_bit_p (can_be_private, curr->id);
1694 /* Store calculated sizes of all pointers or arrays to variables.
1695 CODE_GEN holds information related to code generation. */
1697 static void
1698 opencl_set_data_size (opencl_main code_gen)
1700 VEC (opencl_data, heap) *datas = code_gen->opencl_function_data;
1701 int i;
1702 opencl_data curr;
1703 gimple_stmt_iterator g_iter = gsi_last_bb (code_gen->data_init_bb);
1705 for (i = 0; VEC_iterate (opencl_data, datas, i, curr); i ++)
1707 tree call;
1709 if (curr->is_static
1710 || !curr->used_on_device
1711 || curr->size_value == NULL)
1712 continue;
1714 call = build2 (MODIFY_EXPR, size_type_node,
1715 curr->size_variable, curr->size_value);
1717 force_gimple_operand_gsi (&g_iter, call, true, NULL, false,
1718 GSI_CONTINUE_LINKING);
1722 /* Find opencl_data which represents array VAR. */
1724 static opencl_data
1725 opencl_get_static_data_by_tree (tree var)
1727 map_tree_to_data tmp = map_tree_to_data_create (var, NULL);
1728 map_tree_to_data *slot
1729 = (map_tree_to_data *) htab_find_slot (array_data_to_tree,
1730 tmp, INSERT);
1731 if (*slot == NULL)
1732 return NULL;
1734 return (*slot)->value;
1738 /* Create required OpenCL variable for given DATA. */
1740 static void
1741 opencl_data_init_object (opencl_data data)
1743 if (TREE_CODE (TREE_TYPE (data->exact_object)) == POINTER_TYPE)
1745 data->device_object
1746 = opencl_create_tmp_var (ptr_type_node, "__opencl_data");
1747 data->is_static = false;
1749 else
1751 /* (TREE_CODE (TREE_TYPE (data->exact_object)) == ARRAY_TYPE) */
1752 map_tree_to_data tree_ptr
1753 = map_tree_to_data_create (data->exact_object, data);
1754 map_tree_to_data *tree_slot
1755 = (map_tree_to_data *) htab_find_slot (array_data_to_tree,
1756 tree_ptr, INSERT);
1758 gcc_assert (*tree_slot == NULL);
1759 *tree_slot = tree_ptr;
1761 data->device_object
1762 = opencl_create_static_ptr_variable ("__opencl_data");
1763 data->is_static = true;
1764 data->size_variable = data->size_value;
1765 VEC_safe_push (opencl_data, heap, opencl_array_data, data);
1769 /* Register reference to DATA via data reference REF_KEY and
1770 variable TREE_KEY in CODE_GEN structures. */
1772 static void
1773 opencl_register_data (opencl_main code_gen, opencl_data data,
1774 tree tree_key, data_reference_p ref_key)
1776 htab_t ref_to_data = code_gen->ref_to_data;
1777 htab_t tree_to_data = code_gen->tree_to_data;
1778 map_ref_to_data ref_ptr = map_ref_to_data_create (ref_key, data);
1779 map_tree_to_data tree_ptr = map_tree_to_data_create (tree_key, data);
1780 map_ref_to_data *ref_slot;
1781 map_tree_to_data *tree_slot;
1783 ref_slot
1784 = (map_ref_to_data *) htab_find_slot (ref_to_data, ref_ptr, INSERT);
1785 gcc_assert (*ref_slot == NULL);
1786 *ref_slot = ref_ptr;
1788 tree_slot
1789 = (map_tree_to_data *) htab_find_slot (tree_to_data, tree_ptr, INSERT);
1790 gcc_assert (*tree_slot == NULL || (*tree_slot)->value == data);
1791 *tree_slot = tree_ptr;
1794 /* Analyze single data reference REF and update CODE_GEN structures.
1795 If it access data, which has been accessed in data references
1796 before, update it's size. Otherwise add data to array. */
1798 static void
1799 opencl_parse_single_data_ref (poly_dr_p ref, opencl_main code_gen)
1801 data_reference_p d_ref = (data_reference_p) PDR_CDR (ref);
1802 tree data_ref_tree = dr_outermost_base_object (d_ref);
1803 opencl_data curr = opencl_get_data_by_tree (code_gen, data_ref_tree);
1804 tree size = opencl_get_indirect_size (data_ref_tree, ref);
1806 if (curr)
1808 if (!curr->is_static)
1810 if (!size || !curr->size_value)
1811 curr->size_value = NULL;
1812 else
1813 curr->size_value = fold_build2 (MAX_EXPR, sizetype,
1814 size, curr->size_value);
1817 else
1819 curr = opencl_get_static_data_by_tree (data_ref_tree);
1821 if (!curr)
1823 curr = opencl_data_create (data_ref_tree, size);
1824 opencl_data_init_object (curr);
1827 curr->id = VEC_length (opencl_data, code_gen->opencl_function_data);
1828 VEC_safe_push (opencl_data, heap, code_gen->opencl_function_data, curr);
1831 opencl_register_data (code_gen, curr, data_ref_tree, d_ref);
1834 /* Analyse all data reference for poly basic block PBB and update CODE_GEN
1835 structures. */
1837 static void
1838 opencl_parse_data_refs (poly_bb_p pbb, opencl_main code_gen)
1840 VEC (poly_dr_p, heap) *drs = PBB_DRS (pbb);
1841 int i;
1842 poly_dr_p curr;
1844 for (i = 0; VEC_iterate (poly_dr_p, drs, i, curr); i++)
1845 opencl_parse_single_data_ref (curr, code_gen);
1848 /* Analyse all data reference for scop M_SCOP and update
1849 CODE_GEN structures. */
1851 static void
1852 opencl_init_data (scop_p m_scop, opencl_main code_gen)
1854 VEC (poly_bb_p, heap) *bbs = SCOP_BBS (m_scop);
1855 int i;
1856 poly_bb_p curr;
1858 for (i = 0; VEC_iterate (poly_bb_p, bbs, i, curr); i++)
1859 opencl_parse_data_refs (curr, code_gen);
1862 /* Init basic block in CODE_GEN structures. */
1864 static void
1865 opencl_init_basic_blocks (opencl_main code_gen)
1867 code_gen->data_init_bb = opencl_create_bb (code_gen);
1868 code_gen->kernel_edge = code_gen->main_edge;
1871 /* Add function calls to create and launch kernel KERNEL to
1872 CODE_GEN->main_edge. */
1874 static void
1875 opencl_create_gimple_for_body (opencl_body kernel, opencl_main code_gen)
1877 tree num_of_exec = kernel->num_of_exec;
1878 tree call;
1880 tree kernel_var
1881 = opencl_insert_create_kernel_call (code_gen, (const char *) kernel->name);
1883 tree index_type = build_index_type (build_int_cst (NULL_TREE, 2));
1884 tree array_type = build_array_type (ptr_type_node, index_type);
1885 tree var = opencl_create_tmp_var (array_type, "wait_event");
1887 TREE_STATIC (var) = 1;
1888 assemble_variable (var, 1, 0, 1);
1890 call = build4 (ARRAY_REF, ptr_type_node, var,
1891 integer_zero_node, NULL_TREE, NULL_TREE);
1892 call = build_addr (call, current_function_decl);
1894 opencl_init_local_device_memory (code_gen, kernel);
1895 opencl_pass_kernel_arguments (code_gen, kernel, kernel_var);
1897 opencl_execute_kernel (code_gen, num_of_exec, kernel_var, call);
1898 opencl_wait_for_event (code_gen, call);
1901 /* Prepare memory for gimple (host) statement, represented by PBB.
1902 Copy memory from device to host if it's nessesary.
1903 CODE_GEN holds information related to code generation. */
1905 static void
1906 opencl_prepare_memory_for_gimple_stmt (poly_bb_p pbb, opencl_main code_gen)
1908 VEC (poly_dr_p, heap) *drs = PBB_DRS (pbb);
1909 int i;
1910 poly_dr_p curr;
1912 for (i = 0; VEC_iterate (poly_dr_p, drs, i, curr); i++)
1914 data_reference_p d_ref = (data_reference_p) PDR_CDR (curr);
1915 opencl_data data;
1916 bool is_read;
1918 /* Scalar variables can be passed directly. */
1919 data = opencl_get_data_by_data_ref (code_gen, d_ref);
1921 /* Private variables should not be passed from device to host. */
1922 if (data->privatized)
1923 continue;
1925 is_read = DR_IS_READ (d_ref);
1926 gcc_assert (data);
1928 data->read_in_current_body = is_read;
1929 data->written_in_current_body = !is_read;
1930 opencl_pass_to_host (code_gen, data);
1932 if (!is_read)
1933 bitmap_set_bit (code_gen->curr_meta->modified_on_host, data->id);
1937 /* Add basic block from clast_user_stmt STMT to gimple.
1938 CODE_GEN holds information related to code generation. */
1940 static void
1941 opencl_add_gimple_for_user_stmt (struct clast_user_stmt *stmt,
1942 opencl_main code_gen)
1944 gimple_bb_p gbb;
1945 CloogStatement *cs = stmt->statement;
1946 poly_bb_p pbb = (poly_bb_p) cloog_statement_usr (cs);
1947 sese region = code_gen->region;
1948 int nb_loops = number_of_loops ();
1949 int i;
1950 VEC (tree, heap) *iv_map = VEC_alloc (tree, heap, nb_loops);
1951 htab_t newivs_index = code_gen->newivs_index;
1952 VEC (tree, heap) *newivs = code_gen->newivs;
1954 /* Get basic block to add. */
1955 gbb = PBB_BLACK_BOX (pbb);
1957 if (GBB_BB (gbb) == ENTRY_BLOCK_PTR)
1958 return;
1960 /*Reset flags. */
1961 opencl_fflush_rw_flags (code_gen);
1963 /* Pass all required memory to host. */
1964 opencl_prepare_memory_for_gimple_stmt (pbb, code_gen);
1966 for (i = 0; i < nb_loops; i++)
1967 VEC_quick_push (tree, iv_map, NULL_TREE);
1969 build_iv_mapping (iv_map, region, newivs, newivs_index,
1970 stmt, code_gen->params_index);
1971 code_gen->main_edge
1972 = copy_bb_and_scalar_dependences (GBB_BB (gbb), region,
1973 code_gen->main_edge, iv_map);
1974 VEC_free (tree, heap, iv_map);
1975 recompute_all_dominators ();
1976 update_ssa (TODO_update_ssa);
1978 opencl_verify ();
1981 /* Delete opencl_body DATA. */
1983 static void
1984 opencl_body_delete (opencl_body data)
1986 dyn_string_delete (data->body);
1987 dyn_string_delete (data->header);
1988 dyn_string_delete (data->pre_header);
1989 dyn_string_delete (data->non_scalar_args);
1990 VEC_free (tree, heap, data->function_args);
1991 VEC_free (tree, heap, data->function_args_to_pass);
1992 VEC_free (opencl_data, heap, data->data_refs);
1993 free (data);
1996 /* Reset data structures before processing loop, represented by META.
1997 CODE_GEN holds information related to code generation. */
1999 static void
2000 opencl_init_new_loop (opencl_clast_meta meta, opencl_main code_gen)
2002 opencl_data curr;
2003 unsigned i;
2005 meta->post_pass_to_host
2006 = VEC_alloc (opencl_data, heap, OPENCL_INIT_BUFF_SIZE);
2007 meta->post_pass_to_device
2008 = VEC_alloc (opencl_data, heap, OPENCL_INIT_BUFF_SIZE);
2010 for (i = 0;
2011 VEC_iterate (opencl_data, code_gen->opencl_function_data, i, curr);
2012 i++)
2014 curr->inited_in_current_loop_on_host = false;
2015 curr->inited_in_current_loop_on_device = false;
2019 /* Post loop init. Loop through all data stored in POST_PASS_TO_HOST
2020 and POST_PASS_TO_DEVICE vectors in META. This data must be up to
2021 date on host or device respectively at the end of current loop.
2022 CODE_GEN holds information related to code generation. */
2024 static void
2025 opencl_postpass_data (opencl_main code_gen, opencl_clast_meta meta)
2027 opencl_data curr;
2028 unsigned i;
2030 for (i = 0;
2031 VEC_iterate (opencl_data, meta->post_pass_to_host, i, curr); i++)
2033 curr->written_in_current_body = false;
2034 opencl_pass_to_host (code_gen, curr);
2037 if (!flag_graphite_opencl_cpu)
2038 for (i = 0;
2039 VEC_iterate (opencl_data, meta->post_pass_to_device, i, curr); i++)
2041 curr->written_in_current_body = false;
2042 opencl_pass_to_device (code_gen, curr);
2045 if (meta->parent)
2047 VEC (opencl_data, heap) *parent_vec_host
2048 = meta->parent->post_pass_to_host;
2049 VEC (opencl_data, heap) *parent_vec_device
2050 = meta->parent->post_pass_to_device;
2052 for (i = 0;
2053 VEC_iterate (opencl_data, meta->post_pass_to_host, i, curr); i++)
2054 VEC_safe_push (opencl_data, heap, parent_vec_host, curr);
2056 for (i = 0;
2057 VEC_iterate (opencl_data, meta->post_pass_to_device, i, curr); i++)
2058 VEC_safe_push (opencl_data, heap, parent_vec_device, curr);
2061 VEC_free (opencl_data, heap, meta->post_pass_to_host);
2062 VEC_free (opencl_data, heap, meta->post_pass_to_device);
2065 static void opencl_transform_stmt_list (struct clast_stmt *, opencl_main, int);
2067 /* Add loop body, of the loop, represented by S, on host.
2068 Loop body can contain device code.
2069 DEPTH contains depth of given loop in current loop nest.
2070 DEPENDENCY indicates where given loop has any dependencies.
2071 CODE_GEN holds information related to code generation. */
2073 static void
2074 opencl_add_gimple_for_loop (struct clast_for *s, opencl_main code_gen,
2075 int depth, bool dependency)
2077 loop_p old_parent = code_gen->context_loop;
2078 loop_p new_loop = graphite_create_new_loop
2079 (code_gen->region, code_gen->main_edge, s, code_gen->context_loop,
2080 &code_gen->newivs, code_gen->newivs_index, code_gen->params_index, depth);
2081 edge last_e = single_exit (new_loop);
2082 edge to_body = single_succ_edge (new_loop->header);
2083 basic_block after = to_body->dest;
2084 opencl_clast_meta parent = code_gen->curr_meta->parent;
2086 last_e = single_succ_edge (split_edge (last_e));
2088 code_gen->context_loop = new_loop;
2089 code_gen->main_edge = single_succ_edge (new_loop->header);
2091 opencl_init_new_loop (parent, code_gen);
2092 opencl_transform_stmt_list (s->body, code_gen, depth + 1);
2094 code_gen->context_loop = old_parent;
2096 redirect_edge_succ_nodup (code_gen->main_edge, after);
2097 set_immediate_dominator (CDI_DOMINATORS, code_gen->main_edge->dest,
2098 code_gen->main_edge->src);
2100 opencl_postpass_data (code_gen, parent);
2101 code_gen->main_edge = last_e;
2103 if (flag_loop_parallelize_all && !dependency)
2104 new_loop->can_be_parallel = true;
2106 opencl_verify ();
2109 /* Add loop, represented by S, on host. Loop body can contain device code.
2110 DEPTH contains depth of given loop in current loop nest.
2111 DEPENDENCY indicates where given loop has any dependencies.
2112 CODE_GEN holds information related to code generation. */
2114 static void
2115 opencl_add_gimple_for_stmt_for (struct clast_for *s, opencl_main code_gen,
2116 int depth, bool dependency)
2118 edge last_e = graphite_create_new_loop_guard (code_gen->region,
2119 code_gen->main_edge,
2120 s, code_gen->newivs,
2121 code_gen->newivs_index,
2122 code_gen->params_index);
2123 edge true_e = get_true_edge_from_guard_bb (code_gen->main_edge->dest);
2125 code_gen->main_edge = true_e;
2126 opencl_add_gimple_for_loop (s, code_gen, depth, dependency);
2127 code_gen->main_edge = last_e;
2130 /* Calculate parent data access flags in META based on children.
2131 parent->modified_on_host = OR_{forall children} child->modified_on_host.
2132 parent->modified_on_device = OR_{forall children} child->modified_on_device.
2135 static void
2136 opencl_fix_meta_flags (opencl_clast_meta meta)
2138 opencl_clast_meta curr = meta->body;
2140 while (curr)
2142 bitmap_ior_into (meta->modified_on_host, curr->modified_on_host);
2143 bitmap_ior_into (meta->modified_on_device, curr->modified_on_device);
2144 curr = curr->next;
2148 /* Add if statement, represented by S to current gimple.
2149 CODE_GEN holds information related to code generation. */
2151 static void
2152 opencl_add_gimple_for_stmt_guard (struct clast_guard *s,
2153 opencl_main code_gen, int depth)
2155 edge last_e = graphite_create_new_guard (code_gen->region,
2156 code_gen->main_edge, s,
2157 code_gen->newivs,
2158 code_gen->newivs_index,
2159 code_gen->params_index);
2160 edge true_e = get_true_edge_from_guard_bb (code_gen->main_edge->dest);
2162 code_gen->main_edge = true_e;
2163 opencl_transform_stmt_list (s->then, code_gen, depth);
2164 code_gen->main_edge = last_e;
2166 recompute_all_dominators ();
2167 opencl_verify ();
2170 /* Parse clast statement list S, located on depth DEPTH in current loop nest.
2171 This function generates gimple from clast statements, but in case of
2172 stmt_for either host or device code can be generated.
2173 CODE_GEN holds information related to code generation. */
2175 static void
2176 opencl_transform_stmt_list (struct clast_stmt *s, opencl_main code_gen,
2177 int depth)
2179 bool dump_p = dump_file && (dump_flags & TDF_DETAILS);
2181 for ( ; s; s = s->next)
2183 opencl_clast_meta tmp = code_gen->curr_meta;
2185 if (CLAST_STMT_IS_A (s, stmt_root))
2186 continue;
2188 else if (CLAST_STMT_IS_A (s, stmt_user))
2190 code_gen->curr_meta->init_edge = code_gen->main_edge;
2191 opencl_add_gimple_for_user_stmt ((struct clast_user_stmt *) s,
2192 code_gen);
2193 code_gen->curr_meta = code_gen->curr_meta->next;
2195 else if (CLAST_STMT_IS_A (s, stmt_for))
2197 opencl_clast_meta current_clast = code_gen->curr_meta;
2198 struct clast_for *for_stmt = (struct clast_for *) s;
2199 bool dependency = false;
2200 bool parallel = false;
2202 /* If there are dependencies in loop, it can't be parallelized. */
2203 if (!flag_graphite_opencl_no_dep_check &&
2204 dependency_in_clast_loop_p (code_gen, current_clast,
2205 for_stmt, depth))
2207 if (dump_p)
2208 fprintf (dump_file, "dependency in loop\n");
2210 dependency = true;
2213 if (!dependency)
2214 parallel = opencl_should_be_parallel_p (code_gen, current_clast,
2215 depth);
2217 /* Create init block for memory transfer befor loop. */
2218 current_clast->init_edge = code_gen->main_edge;
2220 if (parallel && !dependency)
2222 opencl_body current_body;
2224 opencl_fflush_rw_flags (code_gen);
2225 opencl_mark_privatized_data (code_gen);
2226 current_clast->on_device = true;
2227 current_body
2228 = opencl_clast_to_kernel (for_stmt, code_gen, depth);
2230 if (current_body->num_of_data_writes)
2232 dyn_string_t header = current_body->header;
2233 dyn_string_t pre_header = current_body->pre_header;
2234 dyn_string_t body = current_body->body;
2236 dyn_string_append (code_gen->main_program, header);
2237 dyn_string_append (code_gen->main_program, pre_header);
2238 dyn_string_append (code_gen->main_program, body);
2240 opencl_create_gimple_for_body (current_body, code_gen);
2242 htab_delete (code_gen->global_defined_vars);
2243 update_ssa (TODO_update_ssa);
2244 opencl_verify ();
2245 opencl_body_delete (current_body);
2246 code_gen->current_body = NULL;
2248 else
2250 code_gen->curr_meta = code_gen->curr_meta->body;
2251 opencl_add_gimple_for_stmt_for (for_stmt, code_gen,
2252 depth, dependency);
2255 opencl_fix_meta_flags (current_clast);
2256 code_gen->curr_meta = current_clast->next;
2258 else if (CLAST_STMT_IS_A (s, stmt_guard))
2259 opencl_add_gimple_for_stmt_guard ((struct clast_guard *) s,
2260 code_gen, depth);
2261 else if (CLAST_STMT_IS_A (s, stmt_block))
2262 opencl_transform_stmt_list (((struct clast_block *) s)->body,
2263 code_gen, depth);
2264 else
2265 gcc_unreachable ();
2267 if (tmp->parent)
2268 opencl_fix_meta_flags (tmp->parent);
2272 /* Transform clast statement DATA from scop SCOP to OpenCL calls
2273 in region REGION. Place all calls to edge MAIN. PARAM_INDEX
2274 holds external scop params. */
2276 void
2277 opencl_transform_clast (struct clast_stmt *data, sese region,
2278 edge main, scop_p scop, htab_t params_index)
2280 opencl_main code_gen;
2281 /* Create main data struture for code generation. */
2283 if (dump_file && (dump_flags & TDF_DETAILS))
2285 fprintf (dump_file, "\nGenerating OpenCL code for SCoP: \n");
2286 print_scop (dump_file, scop, 0);
2289 code_gen = opencl_main_create (((struct clast_root *) data)->names,
2290 region, main, params_index);
2292 opencl_init_basic_blocks (code_gen);
2293 opencl_init_data (scop, code_gen);
2295 code_gen->clast_meta = opencl_create_meta_from_clast (code_gen, data, 1,
2296 NULL);
2297 code_gen->curr_meta = code_gen->clast_meta;
2299 opencl_transform_stmt_list (data, code_gen, 1);
2301 if (dyn_string_length (code_gen->main_program) != 0)
2303 dyn_string_append (main_program_src, code_gen->main_program);
2304 opencl_set_data_size (code_gen);
2305 opencl_init_all_device_buffers (code_gen);
2306 opencl_fflush_all_device_buffers_to_host (code_gen);
2309 recompute_all_dominators ();
2310 update_ssa (TODO_update_ssa);
2311 opencl_main_delete (code_gen);
2314 /* Find opencl_data object by host object OBJ in CODE_GEN hash maps. */
2316 opencl_data
2317 opencl_get_data_by_tree (opencl_main code_gen, tree obj)
2319 map_tree_to_data tmp = map_tree_to_data_create (obj, NULL);
2320 map_tree_to_data *slot
2321 = (map_tree_to_data *) htab_find_slot (code_gen->tree_to_data,
2322 tmp, INSERT);
2323 if (*slot == NULL)
2324 return NULL;
2326 return (*slot)->value;
2329 /* Find opencl_data object by data reference REF in CODE_GEN hash maps. */
2331 opencl_data
2332 opencl_get_data_by_data_ref (opencl_main code_gen, data_reference_p ref)
2334 map_ref_to_data tmp = map_ref_to_data_create (ref, NULL);
2335 map_ref_to_data *slot
2336 = (map_ref_to_data *) htab_find_slot (code_gen->ref_to_data,
2337 tmp, INSERT);
2338 if (*slot == NULL)
2339 return NULL;
2341 return (*slot)->value;
2344 /* Create global variables for opencl code. */
2346 static void
2347 opencl_create_gimple_variables (void)
2349 static bool opencl_var_created = false;
2351 if (opencl_var_created)
2352 return;
2354 opencl_var_created = true;
2356 /* cl_context h_context */
2357 h_context = opencl_create_static_ptr_variable ("__ocl_h_context");
2359 /* cl_command_queue h_cmd_queue */
2360 h_cmd_queue = opencl_create_static_ptr_variable ("__ocl_h_cmd_queue");
2363 /* Create call
2364 | clGetContextInfo (h_context, CL_CONTEXT_DEVICES, 0, 0,
2365 | &n_context_descriptor_size);
2367 POINTER_TO_SIZE if &n_context_descriptor_size. */
2369 static tree
2370 opencl_create_clGetContextInfo_1 (tree pointer_to_size)
2372 tree function = opencl_create_function_decl (GET_CONTEXT_INFO);
2373 tree zero_pointer = null_pointer_node;
2374 tree cl_contex_devices = build_int_cst (NULL_TREE, CL_CONTEXT_DEVICES);
2375 tree context_var = h_context;
2377 return build_call_expr (function, 5,
2378 context_var,
2379 cl_contex_devices,
2380 integer_zero_node,
2381 zero_pointer,
2382 pointer_to_size);
2385 /* Create call
2386 | clGetContextInfo (h_context, CL_CONTEXT_DEVICES,
2387 | n_context_descriptor_size, A_DEVICES, 0);
2389 POINTER_TO_SIZE if &n_context_descriptor_size. */
2391 static tree
2392 opencl_create_clGetContextInfo_2 (tree size, tree a_devices)
2394 tree function = opencl_create_function_decl (GET_CONTEXT_INFO);
2395 tree zero_pointer = null_pointer_node;
2396 tree cl_contex_devices = build_int_cst (NULL_TREE, CL_CONTEXT_DEVICES);
2397 tree context_var = h_context;
2399 return build_call_expr (function, 5,
2400 context_var,
2401 cl_contex_devices,
2402 size,
2403 a_devices,
2404 zero_pointer);
2407 /* Create context_properties array variable. */
2409 static tree
2410 opencl_create_context_properties (void)
2412 tree cl_context_properties_type = long_integer_type_node;
2414 tree index_type = build_index_type (build_int_cst (NULL_TREE, 3));
2415 tree array_type = build_array_type (cl_context_properties_type,
2416 index_type);
2418 return opencl_create_tmp_var (array_type, "context_properties");
2421 /* Place calls to obtain current platform id to INIT_EDGE.
2422 Place obtained id to VAR. */
2424 static edge
2425 opencl_set_context_properties (edge init_edge, tree var)
2427 tree function = opencl_create_function_decl (GET_PLATFORM_IDS);
2428 tree cl_context_properties_type = long_integer_type_node;
2429 tree call;
2430 tree call2;
2431 gimple_stmt_iterator g_iter;
2433 basic_block bb = split_edge (init_edge);
2435 init_edge = single_succ_edge (bb);
2437 g_iter = gsi_last_bb (bb);
2438 call = build4 (ARRAY_REF, cl_context_properties_type,
2439 var, integer_zero_node, NULL_TREE, NULL_TREE);
2440 call2 = build_int_cst (NULL_TREE, CL_CONTEXT_PLATFORM);
2441 call2 = build1 (CONVERT_EXPR, cl_context_properties_type, call2);
2443 call = build2 (MODIFY_EXPR, cl_context_properties_type,
2444 call, call2);
2446 force_gimple_operand_gsi (&g_iter, call, true, NULL, false,
2447 GSI_CONTINUE_LINKING);
2448 g_iter = gsi_last_bb (bb);
2450 call = build4 (ARRAY_REF, cl_context_properties_type,
2451 var, integer_one_node, NULL_TREE, NULL_TREE);
2452 call2 = build_call_expr (function, 3,
2453 integer_one_node,
2454 build_addr (call, current_function_decl),
2455 null_pointer_node);
2456 force_gimple_operand_gsi (&g_iter, call2, true, NULL, false,
2457 GSI_CONTINUE_LINKING);
2459 call = build4 (ARRAY_REF, cl_context_properties_type,
2460 var, build_int_cst (NULL_TREE, 2), NULL_TREE, NULL_TREE);
2461 call = build2 (MODIFY_EXPR, cl_context_properties_type,
2462 call, fold_convert (cl_context_properties_type,
2463 integer_zero_node));
2464 force_gimple_operand_gsi (&g_iter, call, true, NULL, false,
2465 GSI_CONTINUE_LINKING);
2466 return init_edge;
2469 /* Create call
2470 clCreateContextFromType (PROPERTIES, CL_DEVICE_TYPE_GPU, 0, 0, 0); */
2472 static tree
2473 opencl_create_clCreateContextFromType (tree properties)
2475 tree function = opencl_create_function_decl (CREATE_CONTEXT_FROM_TYPE);
2476 tree zero_pointer = null_pointer_node;
2477 tree device
2478 = build_int_cst (NULL_TREE, flag_graphite_opencl_cpu
2479 ? CL_DEVICE_TYPE_CPU : CL_DEVICE_TYPE_GPU);
2481 return build_call_expr (function, 5,
2482 build_addr (properties, current_function_decl),
2483 device,
2484 zero_pointer,
2485 zero_pointer,
2486 zero_pointer);
2489 /* Create call
2490 clCreateCommandQueue (h_context, DEV_ID, 0, 0); */
2492 static tree
2493 opencl_create_clCreateCommandQueue (tree dev_id)
2496 tree function = opencl_create_function_decl (CREATE_COMMAND_QUEUE);
2497 tree zero_pointer = null_pointer_node;
2498 tree context = h_context;
2500 return build_call_expr (function, 4,
2501 context,
2502 dev_id,
2503 zero_pointer,
2504 zero_pointer);
2507 /* Create call malloc (ARG). */
2509 static tree
2510 opencl_create_malloc_call (tree arg)
2512 tree function_type = build_function_type_list (ptr_type_node,
2513 integer_type_node,
2514 NULL_TREE);
2515 tree function = build_fn_decl ("malloc", function_type);
2517 return build_call_expr (function, 1, arg);
2520 /* Generate calls for opencl init functions and place them to INIT_EDGE.
2521 Must be called only once in each function. */
2523 static edge
2524 opencl_create_init_context (edge init_edge)
2526 tree tmp_type;
2527 tree call;
2528 tree n_context_descriptor_size
2529 = opencl_create_tmp_var (size_type_node, "__ocl_nContextDescriptorSize");
2530 tree a_devices = opencl_create_tmp_var (build_pointer_type (ptr_type_node),
2531 "__ocl_a_devices");
2532 tree properties = opencl_create_context_properties ();
2534 init_edge = opencl_set_context_properties (init_edge, properties);
2535 call = opencl_create_clCreateContextFromType (properties);
2536 call = build2 (MODIFY_EXPR, TREE_TYPE (h_context),
2537 h_context, call);
2538 init_edge = opencl_add_safe_call_on_edge (call, false, init_edge);
2539 tmp_type = build_pointer_type
2540 (TREE_TYPE (n_context_descriptor_size));
2541 call = build1 (ADDR_EXPR, tmp_type,
2542 n_context_descriptor_size);
2543 call = opencl_create_clGetContextInfo_1 (call);
2544 init_edge = opencl_add_safe_call_on_edge (call, true, init_edge);
2545 call = opencl_create_malloc_call (n_context_descriptor_size);
2546 call = fold_convert (TREE_TYPE (a_devices), call);
2547 call = build2 (MODIFY_EXPR, TREE_TYPE (a_devices), a_devices, call);
2548 init_edge = opencl_add_safe_call_on_edge (call, false, init_edge);
2549 call = opencl_create_clGetContextInfo_2 (n_context_descriptor_size,
2550 a_devices);
2551 init_edge = opencl_add_safe_call_on_edge (call, true, init_edge);
2552 tmp_type = TREE_TYPE (TREE_TYPE (a_devices));
2553 call = build1 (INDIRECT_REF, tmp_type, a_devices);
2554 call = opencl_create_clCreateCommandQueue (call);
2555 call = build2 (MODIFY_EXPR, TREE_TYPE (h_cmd_queue),
2556 h_cmd_queue, call);
2557 init_edge = opencl_add_safe_call_on_edge (call, false, init_edge);
2558 return init_edge;
2561 /* Fill array VEC with all poly basic blocks in clast statement ROOT. */
2563 static void
2564 build_poly_bb_vec (struct clast_stmt *root,
2565 VEC (poly_bb_p, heap) **vec)
2567 while (root)
2569 if (CLAST_STMT_IS_A (root, stmt_user))
2571 poly_bb_p tmp
2572 = (poly_bb_p) cloog_statement_usr
2573 (((struct clast_user_stmt *) root)->statement);
2575 VEC_safe_push (poly_bb_p, heap, *vec, tmp);
2578 else if (CLAST_STMT_IS_A (root, stmt_for))
2579 build_poly_bb_vec (((struct clast_for *) root)->body, vec);
2581 else if (CLAST_STMT_IS_A (root, stmt_guard))
2582 build_poly_bb_vec (((struct clast_guard *) root)->then, vec);
2584 else if (CLAST_STMT_IS_A (root, stmt_block))
2585 build_poly_bb_vec (((struct clast_block *) root)->body, vec);
2587 root = root->next;
2591 /* Check whether there is a dependency between PBB1 and PBB2 on level LEVEL.
2592 CAN_BE_PRIVATE indicates which variables can be privatizated.
2593 CODE_GEN holds information related to code generation. */
2595 static bool
2596 opencl_dependency_between_pbbs_p (opencl_main code_gen, poly_bb_p pbb1,
2597 poly_bb_p pbb2, int level,
2598 bitmap can_be_private)
2600 int i, j;
2601 poly_dr_p pdr1, pdr2;
2603 timevar_push (TV_GRAPHITE_DATA_DEPS);
2605 for (i = 0; VEC_iterate (poly_dr_p, PBB_DRS (pbb1), i, pdr1); i++)
2607 data_reference_p ref1 = (data_reference_p)PDR_CDR (pdr1);
2608 opencl_data data_1 = opencl_get_data_by_data_ref (code_gen, ref1);
2610 if (bitmap_bit_p (can_be_private, data_1->id))
2611 continue;
2613 for (j = 0; VEC_iterate (poly_dr_p, PBB_DRS (pbb2), j, pdr2); j++)
2615 data_reference_p ref2 = (data_reference_p)PDR_CDR (pdr2);
2617 opencl_data data_2 = opencl_get_data_by_data_ref (code_gen, ref2);
2619 if (bitmap_bit_p (can_be_private, data_2->id))
2620 continue;
2622 if (graphite_carried_dependence_level_k (pdr1, pdr2, level))
2624 timevar_pop (TV_GRAPHITE_DATA_DEPS);
2625 return true;
2630 timevar_pop (TV_GRAPHITE_DATA_DEPS);
2631 return false;
2634 /* Returns true, if there is dependency in clast loop STMT on depth DEPTH.
2635 CODE_GEN holds information related to code generation. */
2637 bool
2638 dependency_in_clast_loop_p (opencl_main code_gen, opencl_clast_meta meta,
2639 struct clast_for *stmt, int depth)
2641 VEC (poly_bb_p, heap) *pbbs = VEC_alloc (poly_bb_p, heap, 10);
2642 int level = get_scattering_level (depth);
2643 int i;
2644 poly_bb_p pbb1;
2645 bitmap can_be_private;
2647 build_poly_bb_vec (stmt->body, &pbbs);
2648 can_be_private = meta->can_be_private;
2650 for (i = 0; VEC_iterate (poly_bb_p, pbbs, i, pbb1); i++)
2652 int j;
2653 poly_bb_p pbb2;
2655 for (j = 0; VEC_iterate (poly_bb_p, pbbs, j, pbb2); j++)
2656 if (opencl_dependency_between_pbbs_p (code_gen, pbb1, pbb1,
2657 level, can_be_private))
2659 VEC_free (poly_bb_p, heap, pbbs);
2660 return true;
2664 VEC_free (poly_bb_p, heap, pbbs);
2665 return false;
2668 /* Init graphite-opencl pass. Must be called in each function before
2669 any scop processing. */
2671 void
2672 graphite_opencl_init (void)
2674 opencl_create_gimple_variables ();
2676 /* cl_program h_program */
2677 h_program
2678 = opencl_create_static_ptr_variable ("__ocl_h_program");
2680 opencl_function_kernels = VEC_alloc (tree, heap, OPENCL_INIT_BUFF_SIZE);
2681 main_program_src = dyn_string_new (100);
2683 opencl_array_data = VEC_alloc (opencl_data, heap, OPENCL_INIT_BUFF_SIZE);
2684 array_data_to_tree = htab_create (10, map_tree_to_data_to_hash,
2685 map_tree_to_data_cmp, free);
2687 opencl_create_function_decl (STATIC_INIT);
2690 /* Create calls to initialize static data for current function and
2691 place them to INIT_EDGE. */
2693 static edge
2694 opencl_init_static_data (edge init_edge)
2696 int i;
2697 opencl_data curr;
2699 for (i = 0; VEC_iterate (opencl_data, opencl_array_data, i, curr); i ++)
2701 tree tmp;
2703 if (!curr->used_on_device)
2704 continue;
2706 tmp = opencl_create_memory_for_pointer (curr);
2707 tmp = build2 (MODIFY_EXPR, ptr_type_node, curr->device_object, tmp);
2708 init_edge = opencl_add_safe_call_on_edge (tmp, false, init_edge);
2711 return init_edge;
2714 /* Finalize graphite-opencl pass for current function. Place all required
2715 calls to STATIC_INIT_EDGE. Must be called after all scop processing
2716 in current function. */
2718 void
2719 graphite_opencl_finalize (edge static_init_edge)
2721 int i;
2722 opencl_data curr;
2724 if (dyn_string_length (main_program_src) != 0)
2726 tree call = build2 (EQ_EXPR, boolean_type_node,
2727 h_program, null_pointer_node);
2728 basic_block buff_init_block = split_edge (static_init_edge);
2729 edge before_init;
2730 edge init_edge;
2732 static_init_edge = single_succ_edge (buff_init_block);
2734 create_empty_if_region_on_edge (static_init_edge, call);
2736 static_init_edge = opencl_create_function_call
2737 (get_true_edge_from_guard_bb (static_init_edge->dest));
2738 static_init_edge = opencl_init_static_data (static_init_edge);
2739 before_init = single_pred_edge (buff_init_block);
2740 call = build2 (EQ_EXPR, boolean_type_node,
2741 h_context, null_pointer_node);
2743 create_empty_if_region_on_edge (before_init, call);
2744 init_edge = get_true_edge_from_guard_bb (before_init->dest);
2745 init_edge = opencl_create_init_context (init_edge);
2748 dyn_string_delete (main_program_src);
2750 for (i = 0; VEC_iterate (opencl_data, opencl_array_data, i, curr); i++)
2751 opencl_data_delete (curr);
2753 VEC_free (tree, heap, opencl_function_kernels);
2754 VEC_free (opencl_data, heap, opencl_array_data);
2755 recompute_all_dominators ();
2756 update_ssa (TODO_update_ssa);
2759 /* Debug functions for deined data structures. */
2761 static void
2762 dump_flag_to_file (const char *name, bool cond,
2763 FILE *file, int indent)
2765 indent_to (file, indent);
2766 fprintf (file, "%s = %s", name, cond? "true" : "false");
2769 void
2770 dump_opencl_data (opencl_data data, FILE *file, bool verbose)
2772 fprintf (file, "Data id = %d\n", data->id);
2773 fprintf (file, "Data dimension = %d\n", data->data_dim);
2774 fprintf (file, "Data depth = %d\n", data->depth);
2775 fprintf (file, "Flags");
2776 indent_to (file, 2);
2777 fprintf (file, "Global");
2778 dump_flag_to_file ("Static", data->is_static, file, 4);
2779 dump_flag_to_file ("Can be private", data->can_be_private, file, 4);
2780 dump_flag_to_file ("Used on device", data->used_on_device, file, 4);
2781 dump_flag_to_file ("Ever read on device",
2782 data->ever_read_on_device, file, 4);
2784 dump_flag_to_file ("Ever written on device",
2785 data->ever_written_on_device, file, 4);
2787 dump_flag_to_file ("Supported", data->supported, file, 4);
2788 indent_to (file, 2);
2789 fprintf (file, "Local");
2791 dump_flag_to_file ("Up to date on device",
2792 data->up_to_date_on_device, file, 4);
2793 dump_flag_to_file ("Up to date on host",
2794 data->up_to_date_on_host, file, 4);
2796 dump_flag_to_file ("Inited in current loop on host",
2797 data->inited_in_current_loop_on_host, file, 4);
2799 dump_flag_to_file ("Inited in current loop on device",
2800 data->inited_in_current_loop_on_device, file, 4);
2802 dump_flag_to_file ("Written in current body",
2803 data->written_in_current_body, file, 4);
2805 dump_flag_to_file ("Read in current body",
2806 data->read_in_current_body, file, 4);
2807 dump_flag_to_file ("Privatized", data->privatized, file, 4);
2809 fprintf (file, "\n");
2811 if (verbose)
2813 fprintf (file, "\nObject\n");
2814 print_node_brief (file, "", data->object, 2);
2816 fprintf (file, "\nDevice object\n");
2817 print_node_brief (file, "", data->device_object, 2);
2819 fprintf (file, "\nSize value\n");
2820 print_node_brief (file, "", data->size_value, 2);
2822 fprintf (file, "\nSize variable\n");
2823 print_node_brief (file, "", data->size_variable, 2);
2825 fprintf (file, "\nExact object\n");
2826 print_node_brief (file, "", data->exact_object, 2);
2830 DEBUG_FUNCTION void
2831 debug_opencl_data (opencl_data data, bool verbose)
2833 dump_opencl_data (data, stderr, verbose);
2836 void
2837 dump_opencl_body (opencl_body body, FILE *file, bool verbose)
2839 fprintf (file, "\n%s\n\n", body->name);
2840 fprintf (file, "First iterator: %s\n", body->first_iter);
2841 fprintf (file, "Last iterator: %s\n", body->last_iter);
2842 fprintf (file, "Number of data writes = %d\n\n", body->num_of_data_writes);
2843 fprintf (file, "Function header::\n");
2844 fprintf (file, "%s\n\n", dyn_string_buf (body->header));
2845 fprintf (file, "Non scalar args::\n");
2846 fprintf (file, "%s\n\n", dyn_string_buf (body->non_scalar_args));
2847 fprintf (file, "Pre header::\n");
2848 fprintf (file, "%s\n\n", dyn_string_buf (body->pre_header));
2849 fprintf (file, "Body::\n");
2850 fprintf (file, "%s\n\n", dyn_string_buf (body->body));
2852 fprintf (file, "Number of executions::\n");
2853 print_node_brief (file, "", body->num_of_exec, 2);
2855 if (verbose)
2856 print_clast_stmt (file, body->clast_body);
2859 DEBUG_FUNCTION void
2860 debug_opencl_body (opencl_body body, bool verbose)
2862 dump_opencl_body (body, stderr, verbose);
2865 void
2866 dump_opencl_clast_meta (opencl_clast_meta meta, FILE *file,
2867 bool verbose, int indent)
2869 if (!verbose)
2870 /* Just print structure of meta. */
2872 while (meta)
2874 indent_to (file, indent);
2875 fprintf (file, "<in = %d, out = %d, dev = %s, ok = %s>",
2876 meta->in_depth, meta->out_depth,
2877 meta->on_device?"true":"false",
2878 meta->access_unsupported?"false":"true");
2879 dump_opencl_clast_meta (meta->body, file, false, indent + 4);
2880 meta = meta->next;
2883 else
2885 fprintf (file, "<in = %d, out = %d, dev = %s, ok = %s>",
2886 meta->in_depth, meta->out_depth,
2887 meta->on_device?"true":"false",
2888 meta->access_unsupported?"false":"true");
2890 fprintf (file, "\nModified on host::\n");
2891 debug_bitmap_file (file, meta->modified_on_host);
2893 fprintf (file, "\nModified on device::\n");
2894 debug_bitmap_file (file, meta->modified_on_device);
2896 fprintf (file, "\nAccess::\n");
2897 debug_bitmap_file (file, meta->access);
2899 fprintf (file, "\nCan be private::\n");
2900 debug_bitmap_file (file, meta->can_be_private);
2904 DEBUG_FUNCTION void
2905 debug_opencl_clast_meta (opencl_clast_meta meta, bool verbose)
2907 dump_opencl_clast_meta (meta, stderr, verbose, 0);
2910 static int
2911 print_char_p_htab (void **h, void *v)
2913 char **ptr = (char **) h;
2914 FILE *file = (FILE *) v;
2916 fprintf (file, " %s\n", *ptr);
2917 return 1;
2920 static int
2921 print_tree_to_data_htab (void **h, void *v)
2923 map_tree_to_data *map = (map_tree_to_data *) h;
2924 FILE *file = (FILE *) v;
2925 tree key = (*map)->key;
2926 opencl_data data = (*map)->value;
2928 print_node_brief (file, "key = ", key, 2);
2929 fprintf (file, " data_id = %d\n", data->id);
2930 return 1;
2933 static int
2934 print_ref_to_data_htab (void **h, void *v)
2936 map_ref_to_data *map = (map_ref_to_data *) h;
2937 FILE *file = (FILE *) v;
2938 data_reference_p key = (*map)->key;
2939 opencl_data data = (*map)->value;
2941 fprintf (file, "key::\n");
2942 dump_data_reference (file, key);
2943 fprintf (file, "data_id = %d\n\n", data->id);
2944 return 1;
2947 void
2948 dump_opencl_main (opencl_main code_gen, FILE *file, bool verbose)
2950 fprintf (file, "Current meta::\n");
2951 dump_opencl_clast_meta (code_gen->curr_meta, file, false, 2);
2952 fprintf (file, "\n");
2954 if (code_gen->current_body)
2956 fprintf (file, "Current body::\n");
2957 dump_opencl_body (code_gen->current_body, file, verbose);
2960 fprintf (file, "\n\nData init basic block::\n");
2961 dump_bb (code_gen->data_init_bb, stderr, 0);
2963 if (code_gen->defined_vars)
2965 fprintf (file, "Defined variables::\n");
2966 htab_traverse_noresize (code_gen->defined_vars, print_char_p_htab,
2967 file);
2970 if (code_gen->global_defined_vars)
2972 fprintf (file, "Global defined variables::\n");
2973 htab_traverse_noresize (code_gen->global_defined_vars,
2974 print_char_p_htab, file);
2977 fprintf (file, "Refs to data::\n");
2978 htab_traverse_noresize (code_gen->ref_to_data,
2979 print_ref_to_data_htab, file);
2981 fprintf (file, "Trees to data::\n");
2982 htab_traverse_noresize (code_gen->tree_to_data,
2983 print_tree_to_data_htab, file);
2985 if (verbose)
2986 fprintf (file, "%s\n", dyn_string_buf (code_gen->main_program));
2989 DEBUG_FUNCTION void
2990 debug_opencl_main (opencl_main code_gen, bool verbose)
2992 dump_opencl_main (code_gen, stderr, verbose);
2995 DEBUG_FUNCTION void
2996 debug_opencl_program (void)
2998 fprintf (stderr, "%s", dyn_string_buf (main_program_src));
3001 #endif
3002 #include "gt-graphite-opencl.h"