1 /* Lowering and expansion of OpenMP directives for HSA GPU agents.
3 Copyright (C) 2013-2017 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
27 #include "tree-pass.h"
30 #include "pretty-print.h"
31 #include "fold-const.h"
33 #include "gimple-iterator.h"
34 #include "gimple-walk.h"
35 #include "tree-inline.h"
36 #include "langhooks.h"
37 #include "omp-general.h"
40 #include "gimple-pretty-print.h"
42 /* Return the lastprivate predicate for a given gridified loop described by
46 omp_grid_lastprivate_predicate (struct omp_for_data
*fd
)
48 /* When dealing with a gridified loop, we need to check up to three collapsed
49 iteration variables but they are not actually captured in this fd.
50 Fortunately, we can easily rely on HSA builtins to get this
54 if (gimple_omp_for_kind (fd
->for_stmt
) == GF_OMP_FOR_KIND_GRID_LOOP
55 && gimple_omp_for_grid_intra_group (fd
->for_stmt
))
57 id
= builtin_decl_explicit (BUILT_IN_HSA_WORKITEMID
);
58 size
= builtin_decl_explicit (BUILT_IN_HSA_CURRENTWORKGROUPSIZE
);
62 id
= builtin_decl_explicit (BUILT_IN_HSA_WORKITEMABSID
);
63 size
= builtin_decl_explicit (BUILT_IN_HSA_GRIDSIZE
);
66 for (int dim
= 0; dim
< fd
->collapse
; dim
++)
68 tree dim_tree
= build_int_cstu (unsigned_type_node
, dim
);
69 tree u1
= build_int_cstu (unsigned_type_node
, 1);
71 = build2 (EQ_EXPR
, boolean_type_node
,
72 build2 (PLUS_EXPR
, unsigned_type_node
,
73 build_call_expr (id
, 1, dim_tree
), u1
),
74 build_call_expr (size
, 1, dim_tree
));
76 cond
= build2 (TRUTH_AND_EXPR
, boolean_type_node
, cond
, c2
);
83 /* Structure describing the basic properties of the loop we ara analyzing
84 whether it can be gridified and when it is gridified. */
88 /* True when we are doing tiling gridification, i.e. when there is a distinct
89 distribute loop over groups and a loop construct over work-items. False
90 when distribute and parallel for loops form a combined construct. */
92 /* Location of the target construct for optimization information
94 location_t target_loc
;
95 /* The collapse clause of the involved loops. Collapse value of all of them
96 must be the same for gridification to take place. */
98 /* Group sizes, if requested by the user or NULL if not requested. */
102 #define GRID_MISSED_MSG_PREFIX "Will not turn target construct into a " \
103 "gridified HSA kernel because "
105 /* Return true if STMT is an assignment of a register-type into a local
106 VAR_DECL. If GRID is non-NULL, the assignment additionally must not be to
107 any of the trees specifying group sizes there. */
110 grid_safe_assignment_p (gimple
*stmt
, grid_prop
*grid
)
112 gassign
*assign
= dyn_cast
<gassign
*> (stmt
);
115 if (gimple_clobber_p (assign
))
117 tree lhs
= gimple_assign_lhs (assign
);
119 || !is_gimple_reg_type (TREE_TYPE (lhs
))
120 || is_global_var (lhs
))
123 for (unsigned i
= 0; i
< grid
->collapse
; i
++)
124 if (lhs
== grid
->group_sizes
[i
])
129 /* Return true if all statements in SEQ are assignments to local register-type
130 variables that do not hold group size information. */
133 grid_seq_only_contains_local_assignments (gimple_seq seq
, grid_prop
*grid
)
138 gimple_stmt_iterator gsi
;
139 for (gsi
= gsi_start (seq
); !gsi_end_p (gsi
); gsi_next (&gsi
))
140 if (!grid_safe_assignment_p (gsi_stmt (gsi
), grid
))
145 /* Scan statements in SEQ and call itself recursively on any bind. GRID
146 describes hitherto discovered properties of the loop that is evaluated for
147 possible gridification. If during whole search only assignments to
148 register-type local variables (that do not overwrite group size information)
149 and one single OMP statement is encountered, return true, otherwise return
150 false. RET is where we store any OMP statement encountered. */
153 grid_find_single_omp_among_assignments_1 (gimple_seq seq
, grid_prop
*grid
,
154 const char *name
, gimple
**ret
)
156 gimple_stmt_iterator gsi
;
157 for (gsi
= gsi_start (seq
); !gsi_end_p (gsi
); gsi_next (&gsi
))
159 gimple
*stmt
= gsi_stmt (gsi
);
161 if (grid_safe_assignment_p (stmt
, grid
))
163 if (gbind
*bind
= dyn_cast
<gbind
*> (stmt
))
165 gimple_seq bind_body
= gimple_bind_body (bind
);
166 if (!grid_find_single_omp_among_assignments_1 (bind_body
, grid
, name
,
170 else if (is_gimple_omp (stmt
))
174 if (dump_enabled_p ())
176 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
177 GRID_MISSED_MSG_PREFIX
"%s construct "
178 "contains multiple OpenMP constructs\n",
180 dump_printf_loc (MSG_NOTE
, gimple_location (*ret
),
181 "The first OpenMP construct within "
183 dump_printf_loc (MSG_NOTE
, gimple_location (stmt
),
184 "The second OpenMP construct within "
193 if (dump_enabled_p ())
195 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
196 GRID_MISSED_MSG_PREFIX
"%s construct contains "
197 "a complex statement\n", name
);
198 dump_printf_loc (MSG_NOTE
, gimple_location (stmt
),
199 "This statement cannot be analyzed for "
208 /* Scan statements in SEQ and make sure that it and any binds in it contain
209 only assignments to local register-type variables (that do not overwrite
210 group size information) and one OMP construct. If so, return that
211 construct, otherwise return NULL. GRID describes hitherto discovered
212 properties of the loop that is evaluated for possible gridification. If
213 dumping is enabled and function fails, use NAME to dump a note with the
214 reason for failure. */
217 grid_find_single_omp_among_assignments (gimple_seq seq
, grid_prop
*grid
,
222 if (dump_enabled_p ())
223 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
224 GRID_MISSED_MSG_PREFIX
"%s construct has empty body\n",
230 if (grid_find_single_omp_among_assignments_1 (seq
, grid
, name
, &ret
))
232 if (!ret
&& dump_enabled_p ())
233 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
234 GRID_MISSED_MSG_PREFIX
"%s construct does not contain"
235 " any other OpenMP construct\n", name
);
242 /* Walker function looking for statements there is no point gridifying (and for
243 noreturn function calls which we cannot do). Return non-NULL if such a
244 function is found. */
247 grid_find_ungridifiable_statement (gimple_stmt_iterator
*gsi
,
249 struct walk_stmt_info
*wi
)
251 *handled_ops_p
= false;
252 gimple
*stmt
= gsi_stmt (*gsi
);
253 switch (gimple_code (stmt
))
256 if (gimple_call_noreturn_p (as_a
<gcall
*> (stmt
)))
258 *handled_ops_p
= true;
260 return error_mark_node
;
264 /* We may reduce the following list if we find a way to implement the
265 clauses, but now there is no point trying further. */
266 case GIMPLE_OMP_CRITICAL
:
267 case GIMPLE_OMP_TASKGROUP
:
268 case GIMPLE_OMP_TASK
:
269 case GIMPLE_OMP_SECTION
:
270 case GIMPLE_OMP_SECTIONS
:
271 case GIMPLE_OMP_SECTIONS_SWITCH
:
272 case GIMPLE_OMP_TARGET
:
273 case GIMPLE_OMP_ORDERED
:
274 *handled_ops_p
= true;
276 return error_mark_node
;
283 /* Examine clauses of omp parallel statement PAR and if any prevents
284 gridification, issue a missed-optimization diagnostics and return false,
285 otherwise return true. GRID describes hitherto discovered properties of the
286 loop that is evaluated for possible gridification. */
289 grid_parallel_clauses_gridifiable (gomp_parallel
*par
, location_t tloc
)
291 tree clauses
= gimple_omp_parallel_clauses (par
);
294 switch (OMP_CLAUSE_CODE (clauses
))
296 case OMP_CLAUSE_NUM_THREADS
:
297 if (dump_enabled_p ())
299 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, tloc
,
300 GRID_MISSED_MSG_PREFIX
"because there is "
301 "a num_threads clause of the parallel "
303 dump_printf_loc (MSG_NOTE
, gimple_location (par
),
304 "Parallel construct has a num_threads clause\n");
308 case OMP_CLAUSE_REDUCTION
:
309 if (dump_enabled_p ())
311 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, tloc
,
312 GRID_MISSED_MSG_PREFIX
"a reduction clause "
314 dump_printf_loc (MSG_NOTE
, gimple_location (par
),
315 "Parallel construct has a reduction clause\n");
322 clauses
= OMP_CLAUSE_CHAIN (clauses
);
327 /* Examine clauses and the body of omp loop statement GFOR and if something
328 prevents gridification, issue a missed-optimization diagnostics and return
329 false, otherwise return true. GRID describes hitherto discovered properties
330 of the loop that is evaluated for possible gridification. */
333 grid_inner_loop_gridifiable_p (gomp_for
*gfor
, grid_prop
*grid
)
335 if (!grid_seq_only_contains_local_assignments (gimple_omp_for_pre_body (gfor
),
338 if (dump_enabled_p ())
340 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
341 GRID_MISSED_MSG_PREFIX
"the inner loop "
342 "loop bounds computation contains a complex "
344 dump_printf_loc (MSG_NOTE
, gimple_location (gfor
),
345 "Loop construct cannot be analyzed for "
351 tree clauses
= gimple_omp_for_clauses (gfor
);
354 switch (OMP_CLAUSE_CODE (clauses
))
356 case OMP_CLAUSE_SCHEDULE
:
357 if (OMP_CLAUSE_SCHEDULE_KIND (clauses
) != OMP_CLAUSE_SCHEDULE_AUTO
)
359 if (dump_enabled_p ())
361 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
362 GRID_MISSED_MSG_PREFIX
"the inner loop "
363 "has a non-automatic schedule clause\n");
364 dump_printf_loc (MSG_NOTE
, gimple_location (gfor
),
365 "Loop construct has a non automatic "
366 "schedule clause\n");
372 case OMP_CLAUSE_REDUCTION
:
373 if (dump_enabled_p ())
375 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
376 GRID_MISSED_MSG_PREFIX
"a reduction "
377 "clause is present\n ");
378 dump_printf_loc (MSG_NOTE
, gimple_location (gfor
),
379 "Loop construct has a reduction schedule "
387 clauses
= OMP_CLAUSE_CHAIN (clauses
);
389 struct walk_stmt_info wi
;
390 memset (&wi
, 0, sizeof (wi
));
391 if (walk_gimple_seq (gimple_omp_body (gfor
),
392 grid_find_ungridifiable_statement
,
395 gimple
*bad
= (gimple
*) wi
.info
;
396 if (dump_enabled_p ())
398 if (is_gimple_call (bad
))
399 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
400 GRID_MISSED_MSG_PREFIX
"the inner loop contains "
401 "call to a noreturn function\n");
403 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
404 GRID_MISSED_MSG_PREFIX
"the inner loop contains "
405 "statement %s which cannot be transformed\n",
406 gimple_code_name
[(int) gimple_code (bad
)]);
407 dump_printf_loc (MSG_NOTE
, gimple_location (bad
),
408 "This statement cannot be analyzed for "
416 /* Given distribute omp construct represented by DIST, which in the original
417 source forms a compound construct with a looping construct, return true if it
418 can be turned into a gridified HSA kernel. Otherwise return false. GRID
419 describes hitherto discovered properties of the loop that is evaluated for
420 possible gridification. */
423 grid_dist_follows_simple_pattern (gomp_for
*dist
, grid_prop
*grid
)
425 location_t tloc
= grid
->target_loc
;
426 gimple
*stmt
= grid_find_single_omp_among_assignments (gimple_omp_body (dist
),
430 || !(par
= dyn_cast
<gomp_parallel
*> (stmt
))
431 || !grid_parallel_clauses_gridifiable (par
, tloc
))
434 stmt
= grid_find_single_omp_among_assignments (gimple_omp_body (par
), grid
,
437 if (!stmt
|| !(gfor
= dyn_cast
<gomp_for
*> (stmt
)))
440 if (gimple_omp_for_kind (gfor
) != GF_OMP_FOR_KIND_FOR
)
442 if (dump_enabled_p ())
443 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, tloc
,
444 GRID_MISSED_MSG_PREFIX
"the inner loop is not "
445 "a simple for loop\n");
448 gcc_assert (gimple_omp_for_collapse (gfor
) == grid
->collapse
);
450 if (!grid_inner_loop_gridifiable_p (gfor
, grid
))
456 /* Given an omp loop statement GFOR, return true if it can participate in
457 tiling gridification, i.e. in one where the distribute and parallel for
458 loops do not form a compound statement. GRID describes hitherto discovered
459 properties of the loop that is evaluated for possible gridification. */
462 grid_gfor_follows_tiling_pattern (gomp_for
*gfor
, grid_prop
*grid
)
464 if (gimple_omp_for_kind (gfor
) != GF_OMP_FOR_KIND_FOR
)
466 if (dump_enabled_p ())
468 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
469 GRID_MISSED_MSG_PREFIX
"an inner loop is not "
470 "a simple for loop\n");
471 dump_printf_loc (MSG_NOTE
, gimple_location (gfor
),
472 "This statement is not a simple for loop\n");
477 if (!grid_inner_loop_gridifiable_p (gfor
, grid
))
480 if (gimple_omp_for_collapse (gfor
) != grid
->collapse
)
482 if (dump_enabled_p ())
484 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
485 GRID_MISSED_MSG_PREFIX
"an inner loop does not "
486 "have use the same collapse clause\n");
487 dump_printf_loc (MSG_NOTE
, gimple_location (gfor
),
488 "Loop construct uses a different collapse clause\n");
493 struct omp_for_data fd
;
494 struct omp_for_data_loop
*loops
495 = (struct omp_for_data_loop
*)alloca (grid
->collapse
496 * sizeof (struct omp_for_data_loop
));
497 omp_extract_for_data (gfor
, &fd
, loops
);
498 for (unsigned i
= 0; i
< grid
->collapse
; i
++)
500 tree itype
, type
= TREE_TYPE (fd
.loops
[i
].v
);
501 if (POINTER_TYPE_P (type
))
502 itype
= signed_type_for (type
);
506 tree n1
= fold_convert (itype
, fd
.loops
[i
].n1
);
507 tree n2
= fold_convert (itype
, fd
.loops
[i
].n2
);
508 tree t
= build_int_cst (itype
,
509 (fd
.loops
[i
].cond_code
== LT_EXPR
? -1 : 1));
510 t
= fold_build2 (PLUS_EXPR
, itype
, fd
.loops
[i
].step
, t
);
511 t
= fold_build2 (PLUS_EXPR
, itype
, t
, n2
);
512 t
= fold_build2 (MINUS_EXPR
, itype
, t
, n1
);
513 if (TYPE_UNSIGNED (itype
) && fd
.loops
[i
].cond_code
== GT_EXPR
)
514 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
,
515 fold_build1 (NEGATE_EXPR
, itype
, t
),
516 fold_build1 (NEGATE_EXPR
, itype
, fd
.loops
[i
].step
));
518 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, t
, fd
.loops
[i
].step
);
520 if (!operand_equal_p (grid
->group_sizes
[i
], t
, 0))
522 if (dump_enabled_p ())
524 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
525 GRID_MISSED_MSG_PREFIX
"the distribute and "
526 "an internal loop do not agree on tile size\n");
527 dump_printf_loc (MSG_NOTE
, gimple_location (gfor
),
528 "Loop construct does not seem to loop over "
537 /* Facing a call to FNDECL in the body of a distribute construct, return true
538 if we can handle it or false if it precludes gridification. */
541 grid_call_permissible_in_distribute_p (tree fndecl
)
543 if (DECL_PURE_P (fndecl
) || TREE_READONLY (fndecl
))
546 const char *name
= IDENTIFIER_POINTER (DECL_NAME (fndecl
));
547 if (strstr (name
, "omp_") != name
)
550 if ((strcmp (name
, "omp_get_thread_num") == 0)
551 || (strcmp (name
, "omp_get_num_threads") == 0)
552 || (strcmp (name
, "omp_get_num_teams") == 0)
553 || (strcmp (name
, "omp_get_team_num") == 0)
554 || (strcmp (name
, "omp_get_level") == 0)
555 || (strcmp (name
, "omp_get_active_level") == 0)
556 || (strcmp (name
, "omp_in_parallel") == 0))
562 /* Facing a call satisfying grid_call_permissible_in_distribute_p in the body
563 of a distribute construct that is pointed at by GSI, modify it as necessary
564 for gridification. If the statement itself got removed, return true. */
567 grid_handle_call_in_distribute (gimple_stmt_iterator
*gsi
)
569 gimple
*stmt
= gsi_stmt (*gsi
);
570 tree fndecl
= gimple_call_fndecl (stmt
);
571 gcc_checking_assert (stmt
);
572 if (DECL_PURE_P (fndecl
) || TREE_READONLY (fndecl
))
575 const char *name
= IDENTIFIER_POINTER (DECL_NAME (fndecl
));
576 if ((strcmp (name
, "omp_get_thread_num") == 0)
577 || (strcmp (name
, "omp_get_level") == 0)
578 || (strcmp (name
, "omp_get_active_level") == 0)
579 || (strcmp (name
, "omp_in_parallel") == 0))
581 tree lhs
= gimple_call_lhs (stmt
);
585 = gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
586 gsi_insert_before (gsi
, assign
, GSI_SAME_STMT
);
588 gsi_remove (gsi
, true);
592 /* The rest of the omp functions can stay as they are, HSA back-end will
593 handle them correctly. */
594 gcc_checking_assert ((strcmp (name
, "omp_get_num_threads") == 0)
595 || (strcmp (name
, "omp_get_num_teams") == 0)
596 || (strcmp (name
, "omp_get_team_num") == 0));
600 /* Given a sequence of statements within a distribute omp construct or a
601 parallel construct, which in the original source does not form a compound
602 construct with a looping construct, return true if it does not prevent us
603 from turning it into a gridified HSA kernel. Otherwise return false. GRID
604 describes hitherto discovered properties of the loop that is evaluated for
605 possible gridification. IN_PARALLEL must be true if seq is within a
606 parallel construct and flase if it is only within a distribute
610 grid_dist_follows_tiling_pattern (gimple_seq seq
, grid_prop
*grid
,
613 gimple_stmt_iterator gsi
;
614 for (gsi
= gsi_start (seq
); !gsi_end_p (gsi
); gsi_next (&gsi
))
616 gimple
*stmt
= gsi_stmt (gsi
);
618 if (grid_safe_assignment_p (stmt
, grid
)
619 || gimple_code (stmt
) == GIMPLE_GOTO
620 || gimple_code (stmt
) == GIMPLE_LABEL
621 || gimple_code (stmt
) == GIMPLE_COND
)
623 else if (gbind
*bind
= dyn_cast
<gbind
*> (stmt
))
625 if (!grid_dist_follows_tiling_pattern (gimple_bind_body (bind
),
630 else if (gtry
*try_stmt
= dyn_cast
<gtry
*> (stmt
))
632 if (gimple_try_kind (try_stmt
) == GIMPLE_TRY_CATCH
)
634 if (dump_enabled_p ())
636 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
637 GRID_MISSED_MSG_PREFIX
"the distribute "
638 "construct contains a try..catch region\n");
639 dump_printf_loc (MSG_NOTE
, gimple_location (try_stmt
),
640 "This statement cannot be analyzed for "
641 "tiled gridification\n");
645 if (!grid_dist_follows_tiling_pattern (gimple_try_eval (try_stmt
),
648 if (!grid_dist_follows_tiling_pattern (gimple_try_cleanup (try_stmt
),
653 else if (is_gimple_call (stmt
))
655 tree fndecl
= gimple_call_fndecl (stmt
);
656 if (fndecl
&& grid_call_permissible_in_distribute_p (fndecl
))
659 if (dump_enabled_p ())
661 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
662 GRID_MISSED_MSG_PREFIX
"the distribute "
663 "construct contains a call\n");
664 dump_printf_loc (MSG_NOTE
, gimple_location (stmt
),
665 "This statement cannot be analyzed for "
666 "tiled gridification\n");
670 else if (gomp_parallel
*par
= dyn_cast
<gomp_parallel
*> (stmt
))
674 if (dump_enabled_p ())
676 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
677 GRID_MISSED_MSG_PREFIX
"a parallel "
678 "construct contains another parallel "
680 dump_printf_loc (MSG_NOTE
, gimple_location (stmt
),
681 "This parallel construct is nested in "
686 if (!grid_parallel_clauses_gridifiable (par
, grid
->target_loc
)
687 || !grid_dist_follows_tiling_pattern (gimple_omp_body (par
),
691 else if (gomp_for
*gfor
= dyn_cast
<gomp_for
*> (stmt
))
695 if (dump_enabled_p ())
697 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
698 GRID_MISSED_MSG_PREFIX
"a loop "
699 "construct is not nested within a parallel "
701 dump_printf_loc (MSG_NOTE
, gimple_location (stmt
),
702 "This loop construct is not nested in "
703 "a parallel construct\n");
707 if (!grid_gfor_follows_tiling_pattern (gfor
, grid
))
712 if (dump_enabled_p ())
714 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
715 GRID_MISSED_MSG_PREFIX
"the distribute "
716 "construct contains a complex statement\n");
717 dump_printf_loc (MSG_NOTE
, gimple_location (stmt
),
718 "This statement cannot be analyzed for "
719 "tiled gridification\n");
727 /* If TARGET follows a pattern that can be turned into a gridified HSA kernel,
728 return true, otherwise return false. In the case of success, also fill in
729 GRID with information describing the kernel grid. */
732 grid_target_follows_gridifiable_pattern (gomp_target
*target
, grid_prop
*grid
)
734 if (gimple_omp_target_kind (target
) != GF_OMP_TARGET_KIND_REGION
)
737 location_t tloc
= gimple_location (target
);
738 grid
->target_loc
= tloc
;
740 = grid_find_single_omp_among_assignments (gimple_omp_body (target
),
744 gomp_teams
*teams
= dyn_cast
<gomp_teams
*> (stmt
);
745 tree group_size
= NULL
;
748 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, tloc
,
749 GRID_MISSED_MSG_PREFIX
"it does not have a sole teams "
750 "construct in it.\n");
754 tree clauses
= gimple_omp_teams_clauses (teams
);
757 switch (OMP_CLAUSE_CODE (clauses
))
759 case OMP_CLAUSE_NUM_TEAMS
:
760 if (dump_enabled_p ())
761 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, tloc
,
762 GRID_MISSED_MSG_PREFIX
"the teams construct "
763 "contains a num_teams clause\n ");
766 case OMP_CLAUSE_REDUCTION
:
767 if (dump_enabled_p ())
768 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, tloc
,
769 GRID_MISSED_MSG_PREFIX
"a reduction "
770 "clause is present\n ");
773 case OMP_CLAUSE_THREAD_LIMIT
:
774 if (!integer_zerop (OMP_CLAUSE_OPERAND (clauses
, 0)))
775 group_size
= OMP_CLAUSE_OPERAND (clauses
, 0);
781 clauses
= OMP_CLAUSE_CHAIN (clauses
);
784 stmt
= grid_find_single_omp_among_assignments (gimple_omp_body (teams
), grid
,
788 gomp_for
*dist
= dyn_cast
<gomp_for
*> (stmt
);
791 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, tloc
,
792 GRID_MISSED_MSG_PREFIX
"the teams construct does not "
793 "have a single distribute construct in it.\n");
797 gcc_assert (gimple_omp_for_kind (dist
) == GF_OMP_FOR_KIND_DISTRIBUTE
);
799 grid
->collapse
= gimple_omp_for_collapse (dist
);
800 if (grid
->collapse
> 3)
802 if (dump_enabled_p ())
803 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, tloc
,
804 GRID_MISSED_MSG_PREFIX
"the distribute construct "
805 "contains collapse clause with parameter greater "
810 struct omp_for_data fd
;
811 struct omp_for_data_loop
*dist_loops
812 = (struct omp_for_data_loop
*)alloca (grid
->collapse
813 * sizeof (struct omp_for_data_loop
));
814 omp_extract_for_data (dist
, &fd
, dist_loops
);
817 if (group_size
&& !operand_equal_p (group_size
, fd
.chunk_size
, 0))
819 if (dump_enabled_p ())
820 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, tloc
,
821 GRID_MISSED_MSG_PREFIX
"the teams "
822 "thread limit is different from distribute "
826 group_size
= fd
.chunk_size
;
828 if (group_size
&& grid
->collapse
> 1)
830 if (dump_enabled_p ())
831 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, tloc
,
832 GRID_MISSED_MSG_PREFIX
"group size cannot be "
833 "set using thread_limit or schedule clauses "
834 "when also using a collapse clause greater than 1\n");
838 if (gimple_omp_for_combined_p (dist
))
840 grid
->tiling
= false;
841 grid
->group_sizes
[0] = group_size
;
842 for (unsigned i
= 1; i
< grid
->collapse
; i
++)
843 grid
->group_sizes
[i
] = NULL
;
844 return grid_dist_follows_simple_pattern (dist
, grid
);
851 if (dump_enabled_p ())
852 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, tloc
,
853 GRID_MISSED_MSG_PREFIX
"group size cannot be set "
854 "using thread_limit or schedule clauses when "
855 "distribute and loop constructs do not form "
856 "one combined construct\n");
859 for (unsigned i
= 0; i
< grid
->collapse
; i
++)
861 if (fd
.loops
[i
].cond_code
== GT_EXPR
)
862 grid
->group_sizes
[i
] = fold_build1 (NEGATE_EXPR
,
863 TREE_TYPE (fd
.loops
[i
].step
),
866 grid
->group_sizes
[i
] = fd
.loops
[i
].step
;
868 return grid_dist_follows_tiling_pattern (gimple_omp_body (dist
), grid
,
873 /* Operand walker, used to remap pre-body declarations according to a hash map
877 grid_remap_prebody_decls (tree
*tp
, int *walk_subtrees
, void *data
)
881 if (DECL_P (t
) || TYPE_P (t
))
888 struct walk_stmt_info
*wi
= (struct walk_stmt_info
*) data
;
889 hash_map
<tree
, tree
> *declmap
= (hash_map
<tree
, tree
> *) wi
->info
;
890 tree
*repl
= declmap
->get (t
);
897 /* Identifiers of segments into which a particular variable should be places
900 enum grid_var_segment
{GRID_SEGMENT_PRIVATE
, GRID_SEGMENT_GROUP
,
901 GRID_SEGMENT_GLOBAL
};
903 /* Mark VAR so that it is eventually placed into SEGMENT. Place an artificial
904 builtin call into SEQ that will make sure the variable is always considered
908 grid_mark_variable_segment (tree var
, enum grid_var_segment segment
)
910 /* Making a non-addressable variables would require that we re-gimplify all
911 their uses. Fortunately, we do not have to do this because if they are
912 not addressable, it means they are not used in atomic or parallel
913 statements and so relaxed GPU consistency rules mean we can just keep them
915 if (!TREE_ADDRESSABLE (var
))
920 case GRID_SEGMENT_GROUP
:
921 DECL_ATTRIBUTES (var
) = tree_cons (get_identifier ("hsa_group_segment"),
922 NULL
, DECL_ATTRIBUTES (var
));
924 case GRID_SEGMENT_GLOBAL
:
925 DECL_ATTRIBUTES (var
) = tree_cons (get_identifier ("hsa_global_segment"),
926 NULL
, DECL_ATTRIBUTES (var
));
932 if (!TREE_STATIC (var
))
934 TREE_STATIC (var
) = 1;
935 varpool_node::finalize_decl (var
);
940 /* Copy leading register-type assignments to local variables in SRC to just
941 before DST, Creating temporaries, adjusting mapping of operands in WI and
942 remapping operands as necessary. Add any new temporaries to TGT_BIND.
943 Return the first statement that does not conform to grid_safe_assignment_p
944 or NULL. If VAR_SEGMENT is not GRID_SEGMENT_PRIVATE, also mark all
945 variables in traversed bind statements so that they are put into the
946 appropriate segment. */
949 grid_copy_leading_local_assignments (gimple_seq src
, gimple_stmt_iterator
*dst
,
951 enum grid_var_segment var_segment
,
952 struct walk_stmt_info
*wi
)
954 hash_map
<tree
, tree
> *declmap
= (hash_map
<tree
, tree
> *) wi
->info
;
955 gimple_stmt_iterator gsi
;
956 for (gsi
= gsi_start (src
); !gsi_end_p (gsi
); gsi_next (&gsi
))
958 gimple
*stmt
= gsi_stmt (gsi
);
959 if (gbind
*bind
= dyn_cast
<gbind
*> (stmt
))
961 gimple
*r
= grid_copy_leading_local_assignments
962 (gimple_bind_body (bind
), dst
, tgt_bind
, var_segment
, wi
);
964 if (var_segment
!= GRID_SEGMENT_PRIVATE
)
965 for (tree var
= gimple_bind_vars (bind
);
967 var
= DECL_CHAIN (var
))
968 grid_mark_variable_segment (var
, var_segment
);
974 if (!grid_safe_assignment_p (stmt
, NULL
))
976 tree lhs
= gimple_assign_lhs (as_a
<gassign
*> (stmt
));
977 tree repl
= copy_var_decl (lhs
, create_tmp_var_name (NULL
),
979 DECL_CONTEXT (repl
) = current_function_decl
;
980 gimple_bind_append_vars (tgt_bind
, repl
);
982 declmap
->put (lhs
, repl
);
983 gassign
*copy
= as_a
<gassign
*> (gimple_copy (stmt
));
984 walk_gimple_op (copy
, grid_remap_prebody_decls
, wi
);
985 gsi_insert_before (dst
, copy
, GSI_SAME_STMT
);
990 /* Statement walker function to make adjustments to statements within the
991 gridifed kernel copy. */
994 grid_process_grid_body (gimple_stmt_iterator
*gsi
, bool *handled_ops_p
,
995 struct walk_stmt_info
*)
997 *handled_ops_p
= false;
998 gimple
*stmt
= gsi_stmt (*gsi
);
999 if (gimple_code (stmt
) == GIMPLE_OMP_FOR
1000 && (gimple_omp_for_kind (stmt
) & GF_OMP_FOR_SIMD
))
1002 gomp_for
*loop
= as_a
<gomp_for
*> (stmt
);
1003 tree clauses
= gimple_omp_for_clauses (loop
);
1004 tree cl
= omp_find_clause (clauses
, OMP_CLAUSE_SAFELEN
);
1006 OMP_CLAUSE_SAFELEN_EXPR (cl
) = integer_one_node
;
1009 tree c
= build_omp_clause (UNKNOWN_LOCATION
, OMP_CLAUSE_SAFELEN
);
1010 OMP_CLAUSE_SAFELEN_EXPR (c
) = integer_one_node
;
1011 OMP_CLAUSE_CHAIN (c
) = clauses
;
1012 gimple_omp_for_set_clauses (loop
, c
);
1018 /* Given a PARLOOP that is a normal for looping construct but also a part of a
1019 combined construct with a simd loop, eliminate the simd loop. */
1022 grid_eliminate_combined_simd_part (gomp_for
*parloop
)
1024 struct walk_stmt_info wi
;
1026 memset (&wi
, 0, sizeof (wi
));
1028 enum gf_mask msk
= GF_OMP_FOR_SIMD
;
1029 wi
.info
= (void *) &msk
;
1030 walk_gimple_seq (gimple_omp_body (parloop
), omp_find_combined_for
, NULL
, &wi
);
1031 gimple
*stmt
= (gimple
*) wi
.info
;
1032 /* We expect that the SIMD id the only statement in the parallel loop. */
1034 && gimple_code (stmt
) == GIMPLE_OMP_FOR
1035 && (gimple_omp_for_kind (stmt
) == GF_OMP_FOR_SIMD
)
1036 && gimple_omp_for_combined_into_p (stmt
)
1037 && !gimple_omp_for_combined_p (stmt
));
1038 gomp_for
*simd
= as_a
<gomp_for
*> (stmt
);
1040 /* Copy over the iteration properties because the body refers to the index in
1041 the bottmom-most loop. */
1042 unsigned i
, collapse
= gimple_omp_for_collapse (parloop
);
1043 gcc_checking_assert (collapse
== gimple_omp_for_collapse (simd
));
1044 for (i
= 0; i
< collapse
; i
++)
1046 gimple_omp_for_set_index (parloop
, i
, gimple_omp_for_index (simd
, i
));
1047 gimple_omp_for_set_initial (parloop
, i
, gimple_omp_for_initial (simd
, i
));
1048 gimple_omp_for_set_final (parloop
, i
, gimple_omp_for_final (simd
, i
));
1049 gimple_omp_for_set_incr (parloop
, i
, gimple_omp_for_incr (simd
, i
));
1052 tree
*tgt
= gimple_omp_for_clauses_ptr (parloop
);
1054 tgt
= &OMP_CLAUSE_CHAIN (*tgt
);
1056 /* Copy over all clauses, except for linaer clauses, which are turned into
1057 private clauses, and all other simd-specificl clauses, which are
1059 tree
*pc
= gimple_omp_for_clauses_ptr (simd
);
1063 switch (TREE_CODE (c
))
1065 case OMP_CLAUSE_LINEAR
:
1067 tree priv
= build_omp_clause (UNKNOWN_LOCATION
, OMP_CLAUSE_PRIVATE
);
1068 OMP_CLAUSE_DECL (priv
) = OMP_CLAUSE_DECL (c
);
1069 OMP_CLAUSE_CHAIN (priv
) = NULL
;
1071 tgt
= &OMP_CLAUSE_CHAIN (priv
);
1072 pc
= &OMP_CLAUSE_CHAIN (c
);
1076 case OMP_CLAUSE_SAFELEN
:
1077 case OMP_CLAUSE_SIMDLEN
:
1078 case OMP_CLAUSE_ALIGNED
:
1079 pc
= &OMP_CLAUSE_CHAIN (c
);
1083 *pc
= OMP_CLAUSE_CHAIN (c
);
1084 OMP_CLAUSE_CHAIN (c
) = NULL
;
1086 tgt
= &OMP_CLAUSE_CHAIN(c
);
1091 /* Finally, throw away the simd and mark the parallel loop as not
1093 gimple_omp_set_body (parloop
, gimple_omp_body (simd
));
1094 gimple_omp_for_set_combined_p (parloop
, false);
1097 /* Statement walker function marking all parallels as grid_phony and loops as
1098 grid ones representing threads of a particular thread group. */
1101 grid_mark_tiling_loops (gimple_stmt_iterator
*gsi
, bool *handled_ops_p
,
1102 struct walk_stmt_info
*wi_in
)
1104 *handled_ops_p
= false;
1105 if (gomp_for
*loop
= dyn_cast
<gomp_for
*> (gsi_stmt (*gsi
)))
1107 *handled_ops_p
= true;
1108 gimple_omp_for_set_kind (loop
, GF_OMP_FOR_KIND_GRID_LOOP
);
1109 gimple_omp_for_set_grid_intra_group (loop
, true);
1110 if (gimple_omp_for_combined_p (loop
))
1111 grid_eliminate_combined_simd_part (loop
);
1113 struct walk_stmt_info body_wi
;
1114 memset (&body_wi
, 0, sizeof (body_wi
));
1115 walk_gimple_seq_mod (gimple_omp_body_ptr (loop
),
1116 grid_process_grid_body
, NULL
, &body_wi
);
1118 gbind
*bind
= (gbind
*) wi_in
->info
;
1120 for (c
= gimple_omp_for_clauses (loop
); c
; c
= OMP_CLAUSE_CHAIN (c
))
1121 if (OMP_CLAUSE_CODE (c
) == OMP_CLAUSE_LASTPRIVATE
)
1123 push_gimplify_context ();
1124 tree ov
= OMP_CLAUSE_DECL (c
);
1125 tree gv
= copy_var_decl (ov
, create_tmp_var_name (NULL
),
1128 grid_mark_variable_segment (gv
, GRID_SEGMENT_GROUP
);
1129 DECL_CONTEXT (gv
) = current_function_decl
;
1130 gimple_bind_append_vars (bind
, gv
);
1131 tree x
= lang_hooks
.decls
.omp_clause_assign_op (c
, gv
, ov
);
1132 gimplify_and_add (x
, &OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c
));
1133 x
= lang_hooks
.decls
.omp_clause_copy_ctor (c
, ov
, gv
);
1134 gimple_seq l
= NULL
;
1135 gimplify_and_add (x
, &l
);
1136 gsi_insert_seq_after (gsi
, l
, GSI_SAME_STMT
);
1137 pop_gimplify_context (bind
);
1143 /* Statement walker function marking all parallels as grid_phony and loops as
1144 grid ones representing threads of a particular thread group. */
1147 grid_mark_tiling_parallels_and_loops (gimple_stmt_iterator
*gsi
,
1148 bool *handled_ops_p
,
1149 struct walk_stmt_info
*wi_in
)
1151 *handled_ops_p
= false;
1152 wi_in
->removed_stmt
= false;
1153 gimple
*stmt
= gsi_stmt (*gsi
);
1154 if (gbind
*bind
= dyn_cast
<gbind
*> (stmt
))
1156 for (tree var
= gimple_bind_vars (bind
); var
; var
= DECL_CHAIN (var
))
1157 grid_mark_variable_segment (var
, GRID_SEGMENT_GROUP
);
1159 else if (gomp_parallel
*parallel
= dyn_cast
<gomp_parallel
*> (stmt
))
1161 *handled_ops_p
= true;
1162 gimple_omp_parallel_set_grid_phony (parallel
, true);
1164 gbind
*new_bind
= gimple_build_bind (NULL
, NULL
, make_node (BLOCK
));
1165 gimple_bind_set_body (new_bind
, gimple_omp_body (parallel
));
1166 gimple_seq s
= NULL
;
1167 gimple_seq_add_stmt (&s
, new_bind
);
1168 gimple_omp_set_body (parallel
, s
);
1170 struct walk_stmt_info wi_par
;
1171 memset (&wi_par
, 0, sizeof (wi_par
));
1172 wi_par
.info
= new_bind
;
1173 walk_gimple_seq_mod (gimple_bind_body_ptr (new_bind
),
1174 grid_mark_tiling_loops
, NULL
, &wi_par
);
1176 else if (is_a
<gcall
*> (stmt
))
1177 wi_in
->removed_stmt
= grid_handle_call_in_distribute (gsi
);
1181 /* Given freshly copied top level kernel SEQ, identify the individual OMP
1182 components, mark them as part of kernel, copy assignment leading to them
1183 just before DST, remapping them using WI and adding new temporaries to
1184 TGT_BIND, and and return the loop that will be used for kernel dispatch. */
1187 grid_process_kernel_body_copy (grid_prop
*grid
, gimple_seq seq
,
1188 gimple_stmt_iterator
*dst
,
1189 gbind
*tgt_bind
, struct walk_stmt_info
*wi
)
1191 gimple
*stmt
= grid_copy_leading_local_assignments (seq
, dst
, tgt_bind
,
1192 GRID_SEGMENT_GLOBAL
, wi
);
1193 gomp_teams
*teams
= dyn_cast
<gomp_teams
*> (stmt
);
1195 gimple_omp_teams_set_grid_phony (teams
, true);
1196 stmt
= grid_copy_leading_local_assignments (gimple_omp_body (teams
), dst
,
1197 tgt_bind
, GRID_SEGMENT_GLOBAL
,
1199 gcc_checking_assert (stmt
);
1200 gomp_for
*dist
= dyn_cast
<gomp_for
*> (stmt
);
1202 gimple_seq prebody
= gimple_omp_for_pre_body (dist
);
1204 grid_copy_leading_local_assignments (prebody
, dst
, tgt_bind
,
1205 GRID_SEGMENT_GROUP
, wi
);
1209 gimple_omp_for_set_kind (dist
, GF_OMP_FOR_KIND_GRID_LOOP
);
1210 gimple_omp_for_set_grid_group_iter (dist
, true);
1212 struct walk_stmt_info wi_tiled
;
1213 memset (&wi_tiled
, 0, sizeof (wi_tiled
));
1214 walk_gimple_seq_mod (gimple_omp_body_ptr (dist
),
1215 grid_mark_tiling_parallels_and_loops
, NULL
,
1221 gimple_omp_for_set_grid_phony (dist
, true);
1222 stmt
= grid_copy_leading_local_assignments (gimple_omp_body (dist
), dst
,
1224 GRID_SEGMENT_PRIVATE
, wi
);
1225 gcc_checking_assert (stmt
);
1226 gomp_parallel
*parallel
= as_a
<gomp_parallel
*> (stmt
);
1227 gimple_omp_parallel_set_grid_phony (parallel
, true);
1228 stmt
= grid_copy_leading_local_assignments (gimple_omp_body (parallel
),
1230 GRID_SEGMENT_PRIVATE
, wi
);
1231 gomp_for
*inner_loop
= as_a
<gomp_for
*> (stmt
);
1232 gimple_omp_for_set_kind (inner_loop
, GF_OMP_FOR_KIND_GRID_LOOP
);
1233 prebody
= gimple_omp_for_pre_body (inner_loop
);
1235 grid_copy_leading_local_assignments (prebody
, dst
, tgt_bind
,
1236 GRID_SEGMENT_PRIVATE
, wi
);
1238 if (gimple_omp_for_combined_p (inner_loop
))
1239 grid_eliminate_combined_simd_part (inner_loop
);
1240 struct walk_stmt_info body_wi
;
1241 memset (&body_wi
, 0, sizeof (body_wi
));
1242 walk_gimple_seq_mod (gimple_omp_body_ptr (inner_loop
),
1243 grid_process_grid_body
, NULL
, &body_wi
);
1249 /* If TARGET points to a GOMP_TARGET which follows a gridifiable pattern,
1250 create a GPU kernel for it. GSI must point to the same statement, TGT_BIND
1251 is the bind into which temporaries inserted before TARGET should be
1255 grid_attempt_target_gridification (gomp_target
*target
,
1256 gimple_stmt_iterator
*gsi
,
1259 /* removed group_size */
1261 memset (&grid
, 0, sizeof (grid
));
1262 if (!target
|| !grid_target_follows_gridifiable_pattern (target
, &grid
))
1265 location_t loc
= gimple_location (target
);
1266 if (dump_enabled_p ())
1267 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, loc
,
1268 "Target construct will be turned into a gridified HSA "
1271 /* Copy target body to a GPUKERNEL construct: */
1272 gimple_seq kernel_seq
= copy_gimple_seq_and_replace_locals
1273 (gimple_omp_body (target
));
1275 hash_map
<tree
, tree
> *declmap
= new hash_map
<tree
, tree
>;
1276 struct walk_stmt_info wi
;
1277 memset (&wi
, 0, sizeof (struct walk_stmt_info
));
1280 /* Copy assignments in between OMP statements before target, mark OMP
1281 statements within copy appropriately. */
1282 gomp_for
*inner_loop
= grid_process_kernel_body_copy (&grid
, kernel_seq
, gsi
,
1286 = as_a
<gbind
*> (gimple_seq_first (gimple_omp_body (target
)));
1287 gbind
*new_bind
= as_a
<gbind
*> (gimple_seq_first (kernel_seq
));
1288 tree new_block
= gimple_bind_block (new_bind
);
1289 tree enc_block
= BLOCK_SUPERCONTEXT (gimple_bind_block (old_bind
));
1290 BLOCK_CHAIN (new_block
) = BLOCK_SUBBLOCKS (enc_block
);
1291 BLOCK_SUBBLOCKS (enc_block
) = new_block
;
1292 BLOCK_SUPERCONTEXT (new_block
) = enc_block
;
1293 gimple
*gpukernel
= gimple_build_omp_grid_body (kernel_seq
);
1295 (gimple_bind_body_ptr (as_a
<gbind
*> (gimple_omp_body (target
))),
1298 for (size_t i
= 0; i
< grid
.collapse
; i
++)
1299 walk_tree (&grid
.group_sizes
[i
], grid_remap_prebody_decls
, &wi
, NULL
);
1300 push_gimplify_context ();
1301 for (size_t i
= 0; i
< grid
.collapse
; i
++)
1303 tree itype
, type
= TREE_TYPE (gimple_omp_for_index (inner_loop
, i
));
1304 if (POINTER_TYPE_P (type
))
1305 itype
= signed_type_for (type
);
1309 enum tree_code cond_code
= gimple_omp_for_cond (inner_loop
, i
);
1310 tree n1
= unshare_expr (gimple_omp_for_initial (inner_loop
, i
));
1311 walk_tree (&n1
, grid_remap_prebody_decls
, &wi
, NULL
);
1312 tree n2
= unshare_expr (gimple_omp_for_final (inner_loop
, i
));
1313 walk_tree (&n2
, grid_remap_prebody_decls
, &wi
, NULL
);
1314 omp_adjust_for_condition (loc
, &cond_code
, &n2
);
1315 n1
= fold_convert (itype
, n1
);
1316 n2
= fold_convert (itype
, n2
);
1318 tree cond
= fold_build2 (cond_code
, boolean_type_node
, n1
, n2
);
1320 = omp_get_for_step_from_incr (loc
, gimple_omp_for_incr (inner_loop
, i
));
1322 tree t
= build_int_cst (itype
, (cond_code
== LT_EXPR
? -1 : 1));
1323 t
= fold_build2 (PLUS_EXPR
, itype
, step
, t
);
1324 t
= fold_build2 (PLUS_EXPR
, itype
, t
, n2
);
1325 t
= fold_build2 (MINUS_EXPR
, itype
, t
, n1
);
1326 if (TYPE_UNSIGNED (itype
) && cond_code
== GT_EXPR
)
1327 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
,
1328 fold_build1 (NEGATE_EXPR
, itype
, t
),
1329 fold_build1 (NEGATE_EXPR
, itype
, step
));
1331 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, t
, step
);
1332 t
= fold_build3 (COND_EXPR
, itype
, cond
, t
, build_zero_cst (itype
));
1335 if (cond_code
== GT_EXPR
)
1336 step
= fold_build1 (NEGATE_EXPR
, itype
, step
);
1337 t
= fold_build2 (MULT_EXPR
, itype
, t
, step
);
1340 tree gs
= fold_convert (uint32_type_node
, t
);
1341 gimple_seq tmpseq
= NULL
;
1342 gimplify_expr (&gs
, &tmpseq
, NULL
, is_gimple_val
, fb_rvalue
);
1343 if (!gimple_seq_empty_p (tmpseq
))
1344 gsi_insert_seq_before (gsi
, tmpseq
, GSI_SAME_STMT
);
1347 if (grid
.group_sizes
[i
])
1349 ws
= fold_convert (uint32_type_node
, grid
.group_sizes
[i
]);
1351 gimplify_expr (&ws
, &tmpseq
, NULL
, is_gimple_val
, fb_rvalue
);
1352 if (!gimple_seq_empty_p (tmpseq
))
1353 gsi_insert_seq_before (gsi
, tmpseq
, GSI_SAME_STMT
);
1356 ws
= build_zero_cst (uint32_type_node
);
1358 tree c
= build_omp_clause (UNKNOWN_LOCATION
, OMP_CLAUSE__GRIDDIM_
);
1359 OMP_CLAUSE__GRIDDIM__DIMENSION (c
) = i
;
1360 OMP_CLAUSE__GRIDDIM__SIZE (c
) = gs
;
1361 OMP_CLAUSE__GRIDDIM__GROUP (c
) = ws
;
1362 OMP_CLAUSE_CHAIN (c
) = gimple_omp_target_clauses (target
);
1363 gimple_omp_target_set_clauses (target
, c
);
1365 pop_gimplify_context (tgt_bind
);
1370 /* Walker function doing all the work for create_target_kernels. */
1373 grid_gridify_all_targets_stmt (gimple_stmt_iterator
*gsi
,
1374 bool *handled_ops_p
,
1375 struct walk_stmt_info
*incoming
)
1377 *handled_ops_p
= false;
1379 gimple
*stmt
= gsi_stmt (*gsi
);
1380 gomp_target
*target
= dyn_cast
<gomp_target
*> (stmt
);
1383 gbind
*tgt_bind
= (gbind
*) incoming
->info
;
1384 gcc_checking_assert (tgt_bind
);
1385 grid_attempt_target_gridification (target
, gsi
, tgt_bind
);
1388 gbind
*bind
= dyn_cast
<gbind
*> (stmt
);
1391 *handled_ops_p
= true;
1392 struct walk_stmt_info wi
;
1393 memset (&wi
, 0, sizeof (wi
));
1395 walk_gimple_seq_mod (gimple_bind_body_ptr (bind
),
1396 grid_gridify_all_targets_stmt
, NULL
, &wi
);
1401 /* Attempt to gridify all target constructs in BODY_P. All such targets will
1402 have their bodies duplicated, with the new copy being put into a
1403 gimple_omp_grid_body statement. All kernel-related construct within the
1404 grid_body will be marked with phony flags or kernel kinds. Moreover, some
1405 re-structuring is often needed, such as copying pre-bodies before the target
1406 construct so that kernel grid sizes can be computed. */
1409 omp_grid_gridify_all_targets (gimple_seq
*body_p
)
1411 struct walk_stmt_info wi
;
1412 memset (&wi
, 0, sizeof (wi
));
1413 walk_gimple_seq_mod (body_p
, grid_gridify_all_targets_stmt
, NULL
, &wi
);