1 /* Lowering and expansion of OpenMP directives for HSA GPU agents.
3 Copyright (C) 2013-2019 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
27 #include "tree-pass.h"
30 #include "pretty-print.h"
31 #include "fold-const.h"
33 #include "gimple-iterator.h"
34 #include "gimple-walk.h"
35 #include "tree-inline.h"
36 #include "langhooks.h"
37 #include "omp-general.h"
40 #include "gimple-pretty-print.h"
42 /* Return the lastprivate predicate for a given gridified loop described by
46 omp_grid_lastprivate_predicate (struct omp_for_data
*fd
)
48 /* When dealing with a gridified loop, we need to check up to three collapsed
49 iteration variables but they are not actually captured in this fd.
50 Fortunately, we can easily rely on HSA builtins to get this
54 if (gimple_omp_for_kind (fd
->for_stmt
) == GF_OMP_FOR_KIND_GRID_LOOP
55 && gimple_omp_for_grid_intra_group (fd
->for_stmt
))
57 id
= builtin_decl_explicit (BUILT_IN_HSA_WORKITEMID
);
58 size
= builtin_decl_explicit (BUILT_IN_HSA_CURRENTWORKGROUPSIZE
);
62 id
= builtin_decl_explicit (BUILT_IN_HSA_WORKITEMABSID
);
63 size
= builtin_decl_explicit (BUILT_IN_HSA_GRIDSIZE
);
66 for (int dim
= 0; dim
< fd
->collapse
; dim
++)
68 tree dim_tree
= build_int_cstu (unsigned_type_node
, dim
);
69 tree u1
= build_int_cstu (unsigned_type_node
, 1);
71 = build2 (EQ_EXPR
, boolean_type_node
,
72 build2 (PLUS_EXPR
, unsigned_type_node
,
73 build_call_expr (id
, 1, dim_tree
), u1
),
74 build_call_expr (size
, 1, dim_tree
));
76 cond
= build2 (TRUTH_AND_EXPR
, boolean_type_node
, cond
, c2
);
83 /* Structure describing the basic properties of the loop we ara analyzing
84 whether it can be gridified and when it is gridified. */
89 /* True when we are doing tiling gridification, i.e. when there is a distinct
90 distribute loop over groups and a loop construct over work-items. False
91 when distribute and parallel for loops form a combined construct. */
93 /* Location of the target construct for optimization information
95 dump_user_location_t target_loc
;
96 /* The collapse clause of the involved loops. Collapse value of all of them
97 must be the same for gridification to take place. */
99 /* Group sizes, if requested by the user or NULL if not requested. */
103 #define GRID_MISSED_MSG_PREFIX "Will not turn target construct into a " \
104 "gridified HSA kernel because "
106 /* Return true if STMT is an assignment of a register-type into a local
107 VAR_DECL. If GRID is non-NULL, the assignment additionally must not be to
108 any of the trees specifying group sizes there. */
111 grid_safe_assignment_p (gimple
*stmt
, grid_prop
*grid
)
113 gassign
*assign
= dyn_cast
<gassign
*> (stmt
);
116 if (gimple_clobber_p (assign
))
118 tree lhs
= gimple_assign_lhs (assign
);
120 || !is_gimple_reg_type (TREE_TYPE (lhs
))
121 || is_global_var (lhs
))
124 for (unsigned i
= 0; i
< grid
->collapse
; i
++)
125 if (lhs
== grid
->group_sizes
[i
])
130 /* Return true if all statements in SEQ are assignments to local register-type
131 variables that do not hold group size information. */
134 grid_seq_only_contains_local_assignments (gimple_seq seq
, grid_prop
*grid
)
139 gimple_stmt_iterator gsi
;
140 for (gsi
= gsi_start (seq
); !gsi_end_p (gsi
); gsi_next (&gsi
))
141 if (!grid_safe_assignment_p (gsi_stmt (gsi
), grid
))
146 /* Scan statements in SEQ and call itself recursively on any bind. GRID
147 describes hitherto discovered properties of the loop that is evaluated for
148 possible gridification. If during whole search only assignments to
149 register-type local variables (that do not overwrite group size information)
150 and one single OMP statement is encountered, return true, otherwise return
151 false. RET is where we store any OMP statement encountered. */
154 grid_find_single_omp_among_assignments_1 (gimple_seq seq
, grid_prop
*grid
,
155 const char *name
, gimple
**ret
)
157 gimple_stmt_iterator gsi
;
158 for (gsi
= gsi_start (seq
); !gsi_end_p (gsi
); gsi_next (&gsi
))
160 gimple
*stmt
= gsi_stmt (gsi
);
162 if (grid_safe_assignment_p (stmt
, grid
))
164 if (gbind
*bind
= dyn_cast
<gbind
*> (stmt
))
166 gimple_seq bind_body
= gimple_bind_body (bind
);
167 if (!grid_find_single_omp_among_assignments_1 (bind_body
, grid
, name
,
171 else if (is_gimple_omp (stmt
))
175 if (dump_enabled_p ())
177 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
178 GRID_MISSED_MSG_PREFIX
"%s construct "
179 "contains multiple OpenMP constructs\n",
181 dump_printf_loc (MSG_NOTE
, *ret
,
182 "The first OpenMP construct within "
184 dump_printf_loc (MSG_NOTE
, stmt
,
185 "The second OpenMP construct within "
194 if (dump_enabled_p ())
196 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
197 GRID_MISSED_MSG_PREFIX
"%s construct contains "
198 "a complex statement\n", name
);
199 dump_printf_loc (MSG_NOTE
, stmt
,
200 "This statement cannot be analyzed for "
209 /* Scan statements in SEQ and make sure that it and any binds in it contain
210 only assignments to local register-type variables (that do not overwrite
211 group size information) and one OMP construct. If so, return that
212 construct, otherwise return NULL. GRID describes hitherto discovered
213 properties of the loop that is evaluated for possible gridification. If
214 dumping is enabled and function fails, use NAME to dump a note with the
215 reason for failure. */
218 grid_find_single_omp_among_assignments (gimple_seq seq
, grid_prop
*grid
,
223 if (dump_enabled_p ())
224 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
225 GRID_MISSED_MSG_PREFIX
"%s construct has empty body\n",
231 if (grid_find_single_omp_among_assignments_1 (seq
, grid
, name
, &ret
))
233 if (!ret
&& dump_enabled_p ())
234 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
235 GRID_MISSED_MSG_PREFIX
"%s construct does not contain"
236 " any other OpenMP construct\n", name
);
243 /* Walker function looking for statements there is no point gridifying (and for
244 noreturn function calls which we cannot do). Return non-NULL if such a
245 function is found. */
248 grid_find_ungridifiable_statement (gimple_stmt_iterator
*gsi
,
250 struct walk_stmt_info
*wi
)
252 *handled_ops_p
= false;
253 gimple
*stmt
= gsi_stmt (*gsi
);
254 switch (gimple_code (stmt
))
257 if (gimple_call_noreturn_p (as_a
<gcall
*> (stmt
)))
259 *handled_ops_p
= true;
261 return error_mark_node
;
265 /* We may reduce the following list if we find a way to implement the
266 clauses, but now there is no point trying further. */
267 case GIMPLE_OMP_CRITICAL
:
268 case GIMPLE_OMP_TASKGROUP
:
269 case GIMPLE_OMP_TASK
:
270 case GIMPLE_OMP_SECTION
:
271 case GIMPLE_OMP_SECTIONS
:
272 case GIMPLE_OMP_SECTIONS_SWITCH
:
273 case GIMPLE_OMP_TARGET
:
274 case GIMPLE_OMP_ORDERED
:
275 *handled_ops_p
= true;
277 return error_mark_node
;
284 /* Examine clauses of omp parallel statement PAR and if any prevents
285 gridification, issue a missed-optimization diagnostics and return false,
286 otherwise return true. GRID describes hitherto discovered properties of the
287 loop that is evaluated for possible gridification. */
290 grid_parallel_clauses_gridifiable (gomp_parallel
*par
, dump_user_location_t tloc
)
292 tree clauses
= gimple_omp_parallel_clauses (par
);
295 switch (OMP_CLAUSE_CODE (clauses
))
297 case OMP_CLAUSE_NUM_THREADS
:
298 if (dump_enabled_p ())
300 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, tloc
,
301 GRID_MISSED_MSG_PREFIX
"because there is "
302 "a num_threads clause of the parallel "
304 dump_printf_loc (MSG_NOTE
, par
,
305 "Parallel construct has a num_threads clause\n");
309 case OMP_CLAUSE_REDUCTION
:
310 if (dump_enabled_p ())
312 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, tloc
,
313 GRID_MISSED_MSG_PREFIX
"a reduction clause "
315 dump_printf_loc (MSG_NOTE
, par
,
316 "Parallel construct has a reduction clause\n");
323 clauses
= OMP_CLAUSE_CHAIN (clauses
);
328 /* Examine clauses and the body of omp loop statement GFOR and if something
329 prevents gridification, issue a missed-optimization diagnostics and return
330 false, otherwise return true. GRID describes hitherto discovered properties
331 of the loop that is evaluated for possible gridification. */
334 grid_inner_loop_gridifiable_p (gomp_for
*gfor
, grid_prop
*grid
)
336 if (!grid_seq_only_contains_local_assignments (gimple_omp_for_pre_body (gfor
),
339 if (dump_enabled_p ())
341 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
342 GRID_MISSED_MSG_PREFIX
"the inner loop "
343 "loop bounds computation contains a complex "
345 dump_printf_loc (MSG_NOTE
, gfor
,
346 "Loop construct cannot be analyzed for "
352 tree clauses
= gimple_omp_for_clauses (gfor
);
355 switch (OMP_CLAUSE_CODE (clauses
))
357 case OMP_CLAUSE_SCHEDULE
:
358 if (OMP_CLAUSE_SCHEDULE_KIND (clauses
) != OMP_CLAUSE_SCHEDULE_AUTO
)
360 if (dump_enabled_p ())
362 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
363 GRID_MISSED_MSG_PREFIX
"the inner loop "
364 "has a non-automatic schedule clause\n");
365 dump_printf_loc (MSG_NOTE
, gfor
,
366 "Loop construct has a non automatic "
367 "schedule clause\n");
373 case OMP_CLAUSE_REDUCTION
:
374 if (dump_enabled_p ())
376 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
377 GRID_MISSED_MSG_PREFIX
"a reduction "
378 "clause is present\n ");
379 dump_printf_loc (MSG_NOTE
, gfor
,
380 "Loop construct has a reduction schedule "
388 clauses
= OMP_CLAUSE_CHAIN (clauses
);
390 struct walk_stmt_info wi
;
391 memset (&wi
, 0, sizeof (wi
));
392 if (walk_gimple_seq (gimple_omp_body (gfor
),
393 grid_find_ungridifiable_statement
,
396 gimple
*bad
= (gimple
*) wi
.info
;
397 if (dump_enabled_p ())
399 if (is_gimple_call (bad
))
400 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
401 GRID_MISSED_MSG_PREFIX
"the inner loop contains "
402 "call to a noreturn function\n");
404 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
405 GRID_MISSED_MSG_PREFIX
"the inner loop contains "
406 "statement %s which cannot be transformed\n",
407 gimple_code_name
[(int) gimple_code (bad
)]);
408 dump_printf_loc (MSG_NOTE
, bad
,
409 "This statement cannot be analyzed for "
417 /* Given distribute omp construct represented by DIST, which in the original
418 source forms a compound construct with a looping construct, return true if it
419 can be turned into a gridified HSA kernel. Otherwise return false. GRID
420 describes hitherto discovered properties of the loop that is evaluated for
421 possible gridification. */
424 grid_dist_follows_simple_pattern (gomp_for
*dist
, grid_prop
*grid
)
426 dump_user_location_t tloc
= grid
->target_loc
;
427 gimple
*stmt
= grid_find_single_omp_among_assignments (gimple_omp_body (dist
),
431 || !(par
= dyn_cast
<gomp_parallel
*> (stmt
))
432 || !grid_parallel_clauses_gridifiable (par
, tloc
))
435 stmt
= grid_find_single_omp_among_assignments (gimple_omp_body (par
), grid
,
438 if (!stmt
|| !(gfor
= dyn_cast
<gomp_for
*> (stmt
)))
441 if (gimple_omp_for_kind (gfor
) != GF_OMP_FOR_KIND_FOR
)
443 if (dump_enabled_p ())
444 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, tloc
,
445 GRID_MISSED_MSG_PREFIX
"the inner loop is not "
446 "a simple for loop\n");
449 gcc_assert (gimple_omp_for_collapse (gfor
) == grid
->collapse
);
451 if (!grid_inner_loop_gridifiable_p (gfor
, grid
))
457 /* Given an omp loop statement GFOR, return true if it can participate in
458 tiling gridification, i.e. in one where the distribute and parallel for
459 loops do not form a compound statement. GRID describes hitherto discovered
460 properties of the loop that is evaluated for possible gridification. */
463 grid_gfor_follows_tiling_pattern (gomp_for
*gfor
, grid_prop
*grid
)
465 if (gimple_omp_for_kind (gfor
) != GF_OMP_FOR_KIND_FOR
)
467 if (dump_enabled_p ())
469 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
470 GRID_MISSED_MSG_PREFIX
"an inner loop is not "
471 "a simple for loop\n");
472 dump_printf_loc (MSG_NOTE
, gfor
,
473 "This statement is not a simple for loop\n");
478 if (!grid_inner_loop_gridifiable_p (gfor
, grid
))
481 if (gimple_omp_for_collapse (gfor
) != grid
->collapse
)
483 if (dump_enabled_p ())
485 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
486 GRID_MISSED_MSG_PREFIX
"an inner loop does not "
487 "have use the same collapse clause\n");
488 dump_printf_loc (MSG_NOTE
, gfor
,
489 "Loop construct uses a different collapse clause\n");
494 struct omp_for_data fd
;
495 struct omp_for_data_loop
*loops
496 = (struct omp_for_data_loop
*)alloca (grid
->collapse
497 * sizeof (struct omp_for_data_loop
));
498 omp_extract_for_data (gfor
, &fd
, loops
);
499 for (unsigned i
= 0; i
< grid
->collapse
; i
++)
501 tree itype
, type
= TREE_TYPE (fd
.loops
[i
].v
);
502 if (POINTER_TYPE_P (type
))
503 itype
= signed_type_for (type
);
507 tree n1
= fold_convert (itype
, fd
.loops
[i
].n1
);
508 tree n2
= fold_convert (itype
, fd
.loops
[i
].n2
);
509 tree t
= build_int_cst (itype
,
510 (fd
.loops
[i
].cond_code
== LT_EXPR
? -1 : 1));
511 t
= fold_build2 (PLUS_EXPR
, itype
, fd
.loops
[i
].step
, t
);
512 t
= fold_build2 (PLUS_EXPR
, itype
, t
, n2
);
513 t
= fold_build2 (MINUS_EXPR
, itype
, t
, n1
);
514 if (TYPE_UNSIGNED (itype
) && fd
.loops
[i
].cond_code
== GT_EXPR
)
515 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
,
516 fold_build1 (NEGATE_EXPR
, itype
, t
),
517 fold_build1 (NEGATE_EXPR
, itype
, fd
.loops
[i
].step
));
519 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, t
, fd
.loops
[i
].step
);
521 if (!operand_equal_p (grid
->group_sizes
[i
], t
, 0))
523 if (dump_enabled_p ())
525 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
526 GRID_MISSED_MSG_PREFIX
"the distribute and "
527 "an internal loop do not agree on tile size\n");
528 dump_printf_loc (MSG_NOTE
, gfor
,
529 "Loop construct does not seem to loop over "
538 /* Facing a call to FNDECL in the body of a distribute construct, return true
539 if we can handle it or false if it precludes gridification. */
542 grid_call_permissible_in_distribute_p (tree fndecl
)
544 if (DECL_PURE_P (fndecl
) || TREE_READONLY (fndecl
))
547 const char *name
= IDENTIFIER_POINTER (DECL_NAME (fndecl
));
548 if (strstr (name
, "omp_") != name
)
551 if ((strcmp (name
, "omp_get_thread_num") == 0)
552 || (strcmp (name
, "omp_get_num_threads") == 0)
553 || (strcmp (name
, "omp_get_num_teams") == 0)
554 || (strcmp (name
, "omp_get_team_num") == 0)
555 || (strcmp (name
, "omp_get_level") == 0)
556 || (strcmp (name
, "omp_get_active_level") == 0)
557 || (strcmp (name
, "omp_in_parallel") == 0))
563 /* Facing a call satisfying grid_call_permissible_in_distribute_p in the body
564 of a distribute construct that is pointed at by GSI, modify it as necessary
565 for gridification. If the statement itself got removed, return true. */
568 grid_handle_call_in_distribute (gimple_stmt_iterator
*gsi
)
570 gimple
*stmt
= gsi_stmt (*gsi
);
571 tree fndecl
= gimple_call_fndecl (stmt
);
572 gcc_checking_assert (stmt
);
573 if (DECL_PURE_P (fndecl
) || TREE_READONLY (fndecl
))
576 const char *name
= IDENTIFIER_POINTER (DECL_NAME (fndecl
));
577 if ((strcmp (name
, "omp_get_thread_num") == 0)
578 || (strcmp (name
, "omp_get_level") == 0)
579 || (strcmp (name
, "omp_get_active_level") == 0)
580 || (strcmp (name
, "omp_in_parallel") == 0))
582 tree lhs
= gimple_call_lhs (stmt
);
586 = gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
587 gsi_insert_before (gsi
, assign
, GSI_SAME_STMT
);
589 gsi_remove (gsi
, true);
593 /* The rest of the omp functions can stay as they are, HSA back-end will
594 handle them correctly. */
595 gcc_checking_assert ((strcmp (name
, "omp_get_num_threads") == 0)
596 || (strcmp (name
, "omp_get_num_teams") == 0)
597 || (strcmp (name
, "omp_get_team_num") == 0));
601 /* Given a sequence of statements within a distribute omp construct or a
602 parallel construct, which in the original source does not form a compound
603 construct with a looping construct, return true if it does not prevent us
604 from turning it into a gridified HSA kernel. Otherwise return false. GRID
605 describes hitherto discovered properties of the loop that is evaluated for
606 possible gridification. IN_PARALLEL must be true if seq is within a
607 parallel construct and flase if it is only within a distribute
611 grid_dist_follows_tiling_pattern (gimple_seq seq
, grid_prop
*grid
,
614 gimple_stmt_iterator gsi
;
615 for (gsi
= gsi_start (seq
); !gsi_end_p (gsi
); gsi_next (&gsi
))
617 gimple
*stmt
= gsi_stmt (gsi
);
619 if (grid_safe_assignment_p (stmt
, grid
)
620 || gimple_code (stmt
) == GIMPLE_GOTO
621 || gimple_code (stmt
) == GIMPLE_LABEL
622 || gimple_code (stmt
) == GIMPLE_COND
)
624 else if (gbind
*bind
= dyn_cast
<gbind
*> (stmt
))
626 if (!grid_dist_follows_tiling_pattern (gimple_bind_body (bind
),
631 else if (gtry
*try_stmt
= dyn_cast
<gtry
*> (stmt
))
633 if (gimple_try_kind (try_stmt
) == GIMPLE_TRY_CATCH
)
635 if (dump_enabled_p ())
637 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
638 GRID_MISSED_MSG_PREFIX
"the distribute "
639 "construct contains a try..catch region\n");
640 dump_printf_loc (MSG_NOTE
, try_stmt
,
641 "This statement cannot be analyzed for "
642 "tiled gridification\n");
646 if (!grid_dist_follows_tiling_pattern (gimple_try_eval (try_stmt
),
649 if (!grid_dist_follows_tiling_pattern (gimple_try_cleanup (try_stmt
),
654 else if (is_gimple_call (stmt
))
656 tree fndecl
= gimple_call_fndecl (stmt
);
657 if (fndecl
&& grid_call_permissible_in_distribute_p (fndecl
))
660 if (dump_enabled_p ())
662 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
663 GRID_MISSED_MSG_PREFIX
"the distribute "
664 "construct contains a call\n");
665 dump_printf_loc (MSG_NOTE
, stmt
,
666 "This statement cannot be analyzed for "
667 "tiled gridification\n");
671 else if (gomp_parallel
*par
= dyn_cast
<gomp_parallel
*> (stmt
))
675 if (dump_enabled_p ())
677 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
678 GRID_MISSED_MSG_PREFIX
"a parallel "
679 "construct contains another parallel "
681 dump_printf_loc (MSG_NOTE
, stmt
,
682 "This parallel construct is nested in "
687 if (!grid_parallel_clauses_gridifiable (par
, grid
->target_loc
)
688 || !grid_dist_follows_tiling_pattern (gimple_omp_body (par
),
692 else if (gomp_for
*gfor
= dyn_cast
<gomp_for
*> (stmt
))
696 if (dump_enabled_p ())
698 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
699 GRID_MISSED_MSG_PREFIX
"a loop "
700 "construct is not nested within a parallel "
702 dump_printf_loc (MSG_NOTE
, stmt
,
703 "This loop construct is not nested in "
704 "a parallel construct\n");
708 if (!grid_gfor_follows_tiling_pattern (gfor
, grid
))
713 if (dump_enabled_p ())
715 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, grid
->target_loc
,
716 GRID_MISSED_MSG_PREFIX
"the distribute "
717 "construct contains a complex statement\n");
718 dump_printf_loc (MSG_NOTE
, stmt
,
719 "This statement cannot be analyzed for "
720 "tiled gridification\n");
728 /* If TARGET follows a pattern that can be turned into a gridified HSA kernel,
729 return true, otherwise return false. In the case of success, also fill in
730 GRID with information describing the kernel grid. */
733 grid_target_follows_gridifiable_pattern (gomp_target
*target
, grid_prop
*grid
)
735 if (gimple_omp_target_kind (target
) != GF_OMP_TARGET_KIND_REGION
)
738 dump_user_location_t tloc
= target
;
739 grid
->target_loc
= tloc
;
741 = grid_find_single_omp_among_assignments (gimple_omp_body (target
),
745 gomp_teams
*teams
= dyn_cast
<gomp_teams
*> (stmt
);
746 tree group_size
= NULL
;
749 if (dump_enabled_p ())
750 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, tloc
,
751 GRID_MISSED_MSG_PREFIX
"it does not have a sole "
752 "teams construct in it.\n");
756 tree clauses
= gimple_omp_teams_clauses (teams
);
759 switch (OMP_CLAUSE_CODE (clauses
))
761 case OMP_CLAUSE_NUM_TEAMS
:
762 if (dump_enabled_p ())
763 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, tloc
,
764 GRID_MISSED_MSG_PREFIX
"the teams construct "
765 "contains a num_teams clause\n ");
768 case OMP_CLAUSE_REDUCTION
:
769 if (dump_enabled_p ())
770 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, tloc
,
771 GRID_MISSED_MSG_PREFIX
"a reduction "
772 "clause is present\n ");
775 case OMP_CLAUSE_THREAD_LIMIT
:
776 if (!integer_zerop (OMP_CLAUSE_OPERAND (clauses
, 0)))
777 group_size
= OMP_CLAUSE_OPERAND (clauses
, 0);
783 clauses
= OMP_CLAUSE_CHAIN (clauses
);
786 stmt
= grid_find_single_omp_among_assignments (gimple_omp_body (teams
), grid
,
790 gomp_for
*dist
= dyn_cast
<gomp_for
*> (stmt
);
793 if (dump_enabled_p ())
794 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, tloc
,
795 GRID_MISSED_MSG_PREFIX
"the teams construct does not "
796 "have a single distribute construct in it.\n");
800 gcc_assert (gimple_omp_for_kind (dist
) == GF_OMP_FOR_KIND_DISTRIBUTE
);
802 grid
->collapse
= gimple_omp_for_collapse (dist
);
803 if (grid
->collapse
> 3)
805 if (dump_enabled_p ())
806 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, tloc
,
807 GRID_MISSED_MSG_PREFIX
"the distribute construct "
808 "contains collapse clause with parameter greater "
813 struct omp_for_data fd
;
814 struct omp_for_data_loop
*dist_loops
815 = (struct omp_for_data_loop
*)alloca (grid
->collapse
816 * sizeof (struct omp_for_data_loop
));
817 omp_extract_for_data (dist
, &fd
, dist_loops
);
820 if (group_size
&& !operand_equal_p (group_size
, fd
.chunk_size
, 0))
822 if (dump_enabled_p ())
823 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, tloc
,
824 GRID_MISSED_MSG_PREFIX
"the teams "
825 "thread limit is different from distribute "
829 group_size
= fd
.chunk_size
;
831 if (group_size
&& grid
->collapse
> 1)
833 if (dump_enabled_p ())
834 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, tloc
,
835 GRID_MISSED_MSG_PREFIX
"group size cannot be "
836 "set using thread_limit or schedule clauses "
837 "when also using a collapse clause greater than 1\n");
841 if (gimple_omp_for_combined_p (dist
))
843 grid
->tiling
= false;
844 grid
->group_sizes
[0] = group_size
;
845 for (unsigned i
= 1; i
< grid
->collapse
; i
++)
846 grid
->group_sizes
[i
] = NULL
;
847 return grid_dist_follows_simple_pattern (dist
, grid
);
854 if (dump_enabled_p ())
855 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, tloc
,
856 GRID_MISSED_MSG_PREFIX
"group size cannot be set "
857 "using thread_limit or schedule clauses when "
858 "distribute and loop constructs do not form "
859 "one combined construct\n");
862 for (unsigned i
= 0; i
< grid
->collapse
; i
++)
864 if (fd
.loops
[i
].cond_code
== GT_EXPR
)
865 grid
->group_sizes
[i
] = fold_build1 (NEGATE_EXPR
,
866 TREE_TYPE (fd
.loops
[i
].step
),
869 grid
->group_sizes
[i
] = fd
.loops
[i
].step
;
871 return grid_dist_follows_tiling_pattern (gimple_omp_body (dist
), grid
,
876 /* Operand walker, used to remap pre-body declarations according to a hash map
880 grid_remap_prebody_decls (tree
*tp
, int *walk_subtrees
, void *data
)
884 if (DECL_P (t
) || TYPE_P (t
))
891 struct walk_stmt_info
*wi
= (struct walk_stmt_info
*) data
;
892 hash_map
<tree
, tree
> *declmap
= (hash_map
<tree
, tree
> *) wi
->info
;
893 tree
*repl
= declmap
->get (t
);
900 /* Identifiers of segments into which a particular variable should be places
903 enum grid_var_segment
{GRID_SEGMENT_PRIVATE
, GRID_SEGMENT_GROUP
,
904 GRID_SEGMENT_GLOBAL
};
906 /* Mark VAR so that it is eventually placed into SEGMENT. Place an artificial
907 builtin call into SEQ that will make sure the variable is always considered
911 grid_mark_variable_segment (tree var
, enum grid_var_segment segment
)
913 /* Making a non-addressable variables would require that we re-gimplify all
914 their uses. Fortunately, we do not have to do this because if they are
915 not addressable, it means they are not used in atomic or parallel
916 statements and so relaxed GPU consistency rules mean we can just keep them
918 if (!TREE_ADDRESSABLE (var
))
923 case GRID_SEGMENT_GROUP
:
924 DECL_ATTRIBUTES (var
) = tree_cons (get_identifier ("hsa_group_segment"),
925 NULL
, DECL_ATTRIBUTES (var
));
927 case GRID_SEGMENT_GLOBAL
:
928 DECL_ATTRIBUTES (var
) = tree_cons (get_identifier ("hsa_global_segment"),
929 NULL
, DECL_ATTRIBUTES (var
));
935 if (!TREE_STATIC (var
))
937 TREE_STATIC (var
) = 1;
938 const char *prefix
= IDENTIFIER_POINTER (DECL_NAME (var
));
939 SET_DECL_ASSEMBLER_NAME (var
, create_tmp_var_name (prefix
));
940 varpool_node::finalize_decl (var
);
945 /* Copy leading register-type assignments to local variables in SRC to just
946 before DST, Creating temporaries, adjusting mapping of operands in WI and
947 remapping operands as necessary. Add any new temporaries to TGT_BIND.
948 Return the first statement that does not conform to grid_safe_assignment_p
949 or NULL. If VAR_SEGMENT is not GRID_SEGMENT_PRIVATE, also mark all
950 variables in traversed bind statements so that they are put into the
951 appropriate segment. */
954 grid_copy_leading_local_assignments (gimple_seq src
, gimple_stmt_iterator
*dst
,
956 enum grid_var_segment var_segment
,
957 struct walk_stmt_info
*wi
)
959 hash_map
<tree
, tree
> *declmap
= (hash_map
<tree
, tree
> *) wi
->info
;
960 gimple_stmt_iterator gsi
;
961 for (gsi
= gsi_start (src
); !gsi_end_p (gsi
); gsi_next (&gsi
))
963 gimple
*stmt
= gsi_stmt (gsi
);
964 if (gbind
*bind
= dyn_cast
<gbind
*> (stmt
))
966 gimple
*r
= grid_copy_leading_local_assignments
967 (gimple_bind_body (bind
), dst
, tgt_bind
, var_segment
, wi
);
969 if (var_segment
!= GRID_SEGMENT_PRIVATE
)
970 for (tree var
= gimple_bind_vars (bind
);
972 var
= DECL_CHAIN (var
))
973 grid_mark_variable_segment (var
, var_segment
);
979 if (!grid_safe_assignment_p (stmt
, NULL
))
981 tree lhs
= gimple_assign_lhs (as_a
<gassign
*> (stmt
));
982 tree repl
= copy_var_decl (lhs
, create_tmp_var_name (NULL
),
984 DECL_CONTEXT (repl
) = current_function_decl
;
985 gimple_bind_append_vars (tgt_bind
, repl
);
987 declmap
->put (lhs
, repl
);
988 gassign
*copy
= as_a
<gassign
*> (gimple_copy (stmt
));
989 walk_gimple_op (copy
, grid_remap_prebody_decls
, wi
);
990 gsi_insert_before (dst
, copy
, GSI_SAME_STMT
);
995 /* Statement walker function to make adjustments to statements within the
996 gridifed kernel copy. */
999 grid_process_grid_body (gimple_stmt_iterator
*gsi
, bool *handled_ops_p
,
1000 struct walk_stmt_info
*)
1002 *handled_ops_p
= false;
1003 gimple
*stmt
= gsi_stmt (*gsi
);
1004 if (gimple_code (stmt
) == GIMPLE_OMP_FOR
1005 && gimple_omp_for_kind (stmt
) == GF_OMP_FOR_KIND_SIMD
)
1007 gomp_for
*loop
= as_a
<gomp_for
*> (stmt
);
1008 tree clauses
= gimple_omp_for_clauses (loop
);
1009 tree cl
= omp_find_clause (clauses
, OMP_CLAUSE_SAFELEN
);
1011 OMP_CLAUSE_SAFELEN_EXPR (cl
) = integer_one_node
;
1014 tree c
= build_omp_clause (UNKNOWN_LOCATION
, OMP_CLAUSE_SAFELEN
);
1015 OMP_CLAUSE_SAFELEN_EXPR (c
) = integer_one_node
;
1016 OMP_CLAUSE_CHAIN (c
) = clauses
;
1017 gimple_omp_for_set_clauses (loop
, c
);
1023 /* Given a PARLOOP that is a normal for looping construct but also a part of a
1024 combined construct with a simd loop, eliminate the simd loop. */
1027 grid_eliminate_combined_simd_part (gomp_for
*parloop
)
1029 struct walk_stmt_info wi
;
1031 memset (&wi
, 0, sizeof (wi
));
1033 enum gf_mask msk
= GF_OMP_FOR_KIND_SIMD
;
1034 wi
.info
= (void *) &msk
;
1035 walk_gimple_seq (gimple_omp_body (parloop
), omp_find_combined_for
, NULL
, &wi
);
1036 gimple
*stmt
= (gimple
*) wi
.info
;
1037 /* We expect that the SIMD id the only statement in the parallel loop. */
1039 && gimple_code (stmt
) == GIMPLE_OMP_FOR
1040 && (gimple_omp_for_kind (stmt
) == GF_OMP_FOR_KIND_SIMD
)
1041 && gimple_omp_for_combined_into_p (stmt
)
1042 && !gimple_omp_for_combined_p (stmt
));
1043 gomp_for
*simd
= as_a
<gomp_for
*> (stmt
);
1045 /* Copy over the iteration properties because the body refers to the index in
1046 the bottmom-most loop. */
1047 unsigned i
, collapse
= gimple_omp_for_collapse (parloop
);
1048 gcc_checking_assert (collapse
== gimple_omp_for_collapse (simd
));
1049 for (i
= 0; i
< collapse
; i
++)
1051 gimple_omp_for_set_index (parloop
, i
, gimple_omp_for_index (simd
, i
));
1052 gimple_omp_for_set_initial (parloop
, i
, gimple_omp_for_initial (simd
, i
));
1053 gimple_omp_for_set_final (parloop
, i
, gimple_omp_for_final (simd
, i
));
1054 gimple_omp_for_set_incr (parloop
, i
, gimple_omp_for_incr (simd
, i
));
1057 tree
*tgt
= gimple_omp_for_clauses_ptr (parloop
);
1059 tgt
= &OMP_CLAUSE_CHAIN (*tgt
);
1061 /* Copy over all clauses, except for linear clauses, which are turned into
1062 private clauses, and all other simd-specific clauses, which are
1064 tree
*pc
= gimple_omp_for_clauses_ptr (simd
);
1068 switch (TREE_CODE (c
))
1070 case OMP_CLAUSE_LINEAR
:
1072 tree priv
= build_omp_clause (UNKNOWN_LOCATION
, OMP_CLAUSE_PRIVATE
);
1073 OMP_CLAUSE_DECL (priv
) = OMP_CLAUSE_DECL (c
);
1074 OMP_CLAUSE_CHAIN (priv
) = NULL
;
1076 tgt
= &OMP_CLAUSE_CHAIN (priv
);
1077 pc
= &OMP_CLAUSE_CHAIN (c
);
1081 case OMP_CLAUSE_SAFELEN
:
1082 case OMP_CLAUSE_SIMDLEN
:
1083 case OMP_CLAUSE_ALIGNED
:
1084 pc
= &OMP_CLAUSE_CHAIN (c
);
1088 *pc
= OMP_CLAUSE_CHAIN (c
);
1089 OMP_CLAUSE_CHAIN (c
) = NULL
;
1091 tgt
= &OMP_CLAUSE_CHAIN (c
);
1096 /* Finally, throw away the simd and mark the parallel loop as not
1098 gimple_omp_set_body (parloop
, gimple_omp_body (simd
));
1099 gimple_omp_for_set_combined_p (parloop
, false);
1102 /* Statement walker function marking all parallels as grid_phony and loops as
1103 grid ones representing threads of a particular thread group. */
1106 grid_mark_tiling_loops (gimple_stmt_iterator
*gsi
, bool *handled_ops_p
,
1107 struct walk_stmt_info
*wi_in
)
1109 *handled_ops_p
= false;
1110 if (gomp_for
*loop
= dyn_cast
<gomp_for
*> (gsi_stmt (*gsi
)))
1112 *handled_ops_p
= true;
1113 gimple_omp_for_set_kind (loop
, GF_OMP_FOR_KIND_GRID_LOOP
);
1114 gimple_omp_for_set_grid_intra_group (loop
, true);
1115 if (gimple_omp_for_combined_p (loop
))
1116 grid_eliminate_combined_simd_part (loop
);
1118 struct walk_stmt_info body_wi
;
1119 memset (&body_wi
, 0, sizeof (body_wi
));
1120 walk_gimple_seq_mod (gimple_omp_body_ptr (loop
),
1121 grid_process_grid_body
, NULL
, &body_wi
);
1123 gbind
*bind
= (gbind
*) wi_in
->info
;
1125 for (c
= gimple_omp_for_clauses (loop
); c
; c
= OMP_CLAUSE_CHAIN (c
))
1126 if (OMP_CLAUSE_CODE (c
) == OMP_CLAUSE_LASTPRIVATE
)
1128 push_gimplify_context ();
1129 tree ov
= OMP_CLAUSE_DECL (c
);
1130 tree gv
= copy_var_decl (ov
, create_tmp_var_name (NULL
),
1133 grid_mark_variable_segment (gv
, GRID_SEGMENT_GROUP
);
1134 DECL_CONTEXT (gv
) = current_function_decl
;
1135 gimple_bind_append_vars (bind
, gv
);
1136 tree x
= lang_hooks
.decls
.omp_clause_assign_op (c
, gv
, ov
);
1137 gimplify_and_add (x
, &OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c
));
1138 x
= lang_hooks
.decls
.omp_clause_copy_ctor (c
, ov
, gv
);
1139 gimple_seq l
= NULL
;
1140 gimplify_and_add (x
, &l
);
1141 gsi_insert_seq_after (gsi
, l
, GSI_SAME_STMT
);
1142 pop_gimplify_context (bind
);
1148 /* Statement walker function marking all parallels as grid_phony and loops as
1149 grid ones representing threads of a particular thread group. */
1152 grid_mark_tiling_parallels_and_loops (gimple_stmt_iterator
*gsi
,
1153 bool *handled_ops_p
,
1154 struct walk_stmt_info
*wi_in
)
1156 *handled_ops_p
= false;
1157 wi_in
->removed_stmt
= false;
1158 gimple
*stmt
= gsi_stmt (*gsi
);
1159 if (gbind
*bind
= dyn_cast
<gbind
*> (stmt
))
1161 for (tree var
= gimple_bind_vars (bind
); var
; var
= DECL_CHAIN (var
))
1162 grid_mark_variable_segment (var
, GRID_SEGMENT_GROUP
);
1164 else if (gomp_parallel
*parallel
= dyn_cast
<gomp_parallel
*> (stmt
))
1166 *handled_ops_p
= true;
1167 gimple_omp_parallel_set_grid_phony (parallel
, true);
1169 gbind
*new_bind
= gimple_build_bind (NULL
, NULL
, make_node (BLOCK
));
1170 gimple_bind_set_body (new_bind
, gimple_omp_body (parallel
));
1171 gimple_seq s
= NULL
;
1172 gimple_seq_add_stmt (&s
, new_bind
);
1173 gimple_omp_set_body (parallel
, s
);
1175 struct walk_stmt_info wi_par
;
1176 memset (&wi_par
, 0, sizeof (wi_par
));
1177 wi_par
.info
= new_bind
;
1178 walk_gimple_seq_mod (gimple_bind_body_ptr (new_bind
),
1179 grid_mark_tiling_loops
, NULL
, &wi_par
);
1181 else if (is_a
<gcall
*> (stmt
))
1182 wi_in
->removed_stmt
= grid_handle_call_in_distribute (gsi
);
1186 /* Given freshly copied top level kernel SEQ, identify the individual OMP
1187 components, mark them as part of kernel, copy assignment leading to them
1188 just before DST, remapping them using WI and adding new temporaries to
1189 TGT_BIND, and and return the loop that will be used for kernel dispatch. */
1192 grid_process_kernel_body_copy (grid_prop
*grid
, gimple_seq seq
,
1193 gimple_stmt_iterator
*dst
,
1194 gbind
*tgt_bind
, struct walk_stmt_info
*wi
)
1196 gimple
*stmt
= grid_copy_leading_local_assignments (seq
, dst
, tgt_bind
,
1197 GRID_SEGMENT_GLOBAL
, wi
);
1198 gomp_teams
*teams
= dyn_cast
<gomp_teams
*> (stmt
);
1200 gimple_omp_teams_set_grid_phony (teams
, true);
1201 stmt
= grid_copy_leading_local_assignments (gimple_omp_body (teams
), dst
,
1202 tgt_bind
, GRID_SEGMENT_GLOBAL
,
1204 gcc_checking_assert (stmt
);
1205 gomp_for
*dist
= dyn_cast
<gomp_for
*> (stmt
);
1207 gimple_seq prebody
= gimple_omp_for_pre_body (dist
);
1209 grid_copy_leading_local_assignments (prebody
, dst
, tgt_bind
,
1210 GRID_SEGMENT_GROUP
, wi
);
1214 gimple_omp_for_set_kind (dist
, GF_OMP_FOR_KIND_GRID_LOOP
);
1215 gimple_omp_for_set_grid_group_iter (dist
, true);
1217 struct walk_stmt_info wi_tiled
;
1218 memset (&wi_tiled
, 0, sizeof (wi_tiled
));
1219 walk_gimple_seq_mod (gimple_omp_body_ptr (dist
),
1220 grid_mark_tiling_parallels_and_loops
, NULL
,
1226 gimple_omp_for_set_grid_phony (dist
, true);
1227 stmt
= grid_copy_leading_local_assignments (gimple_omp_body (dist
), dst
,
1229 GRID_SEGMENT_PRIVATE
, wi
);
1230 gcc_checking_assert (stmt
);
1231 gomp_parallel
*parallel
= as_a
<gomp_parallel
*> (stmt
);
1232 gimple_omp_parallel_set_grid_phony (parallel
, true);
1233 stmt
= grid_copy_leading_local_assignments (gimple_omp_body (parallel
),
1235 GRID_SEGMENT_PRIVATE
, wi
);
1236 gomp_for
*inner_loop
= as_a
<gomp_for
*> (stmt
);
1237 gimple_omp_for_set_kind (inner_loop
, GF_OMP_FOR_KIND_GRID_LOOP
);
1238 prebody
= gimple_omp_for_pre_body (inner_loop
);
1240 grid_copy_leading_local_assignments (prebody
, dst
, tgt_bind
,
1241 GRID_SEGMENT_PRIVATE
, wi
);
1243 if (gimple_omp_for_combined_p (inner_loop
))
1244 grid_eliminate_combined_simd_part (inner_loop
);
1245 struct walk_stmt_info body_wi
;
1246 memset (&body_wi
, 0, sizeof (body_wi
));
1247 walk_gimple_seq_mod (gimple_omp_body_ptr (inner_loop
),
1248 grid_process_grid_body
, NULL
, &body_wi
);
1254 /* If TARGET points to a GOMP_TARGET which follows a gridifiable pattern,
1255 create a GPU kernel for it. GSI must point to the same statement, TGT_BIND
1256 is the bind into which temporaries inserted before TARGET should be
1260 grid_attempt_target_gridification (gomp_target
*target
,
1261 gimple_stmt_iterator
*gsi
,
1264 /* removed group_size */
1265 grid_prop grid
= {};
1266 if (!target
|| !grid_target_follows_gridifiable_pattern (target
, &grid
))
1269 location_t loc
= gimple_location (target
);
1270 if (dump_enabled_p ())
1271 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, target
,
1272 "Target construct will be turned into a gridified HSA "
1275 /* Copy target body to a GPUKERNEL construct: */
1276 gimple_seq kernel_seq
= copy_gimple_seq_and_replace_locals
1277 (gimple_omp_body (target
));
1279 hash_map
<tree
, tree
> *declmap
= new hash_map
<tree
, tree
>;
1280 struct walk_stmt_info wi
;
1281 memset (&wi
, 0, sizeof (struct walk_stmt_info
));
1284 /* Copy assignments in between OMP statements before target, mark OMP
1285 statements within copy appropriately. */
1286 gomp_for
*inner_loop
= grid_process_kernel_body_copy (&grid
, kernel_seq
, gsi
,
1290 = as_a
<gbind
*> (gimple_seq_first (gimple_omp_body (target
)));
1291 gbind
*new_bind
= as_a
<gbind
*> (gimple_seq_first (kernel_seq
));
1292 tree new_block
= gimple_bind_block (new_bind
);
1293 tree enc_block
= BLOCK_SUPERCONTEXT (gimple_bind_block (old_bind
));
1294 BLOCK_CHAIN (new_block
) = BLOCK_SUBBLOCKS (enc_block
);
1295 BLOCK_SUBBLOCKS (enc_block
) = new_block
;
1296 BLOCK_SUPERCONTEXT (new_block
) = enc_block
;
1297 gimple
*gpukernel
= gimple_build_omp_grid_body (kernel_seq
);
1299 (gimple_bind_body_ptr (as_a
<gbind
*> (gimple_omp_body (target
))),
1302 for (size_t i
= 0; i
< grid
.collapse
; i
++)
1303 walk_tree (&grid
.group_sizes
[i
], grid_remap_prebody_decls
, &wi
, NULL
);
1304 push_gimplify_context ();
1305 for (size_t i
= 0; i
< grid
.collapse
; i
++)
1307 tree index_var
= gimple_omp_for_index (inner_loop
, i
);
1308 tree itype
, type
= TREE_TYPE (index_var
);
1309 if (POINTER_TYPE_P (type
))
1310 itype
= signed_type_for (type
);
1314 enum tree_code cond_code
= gimple_omp_for_cond (inner_loop
, i
);
1315 tree n1
= unshare_expr (gimple_omp_for_initial (inner_loop
, i
));
1316 walk_tree (&n1
, grid_remap_prebody_decls
, &wi
, NULL
);
1317 tree n2
= unshare_expr (gimple_omp_for_final (inner_loop
, i
));
1318 walk_tree (&n2
, grid_remap_prebody_decls
, &wi
, NULL
);
1320 = omp_get_for_step_from_incr (loc
, gimple_omp_for_incr (inner_loop
, i
));
1321 omp_adjust_for_condition (loc
, &cond_code
, &n2
, index_var
, step
);
1322 n1
= fold_convert (itype
, n1
);
1323 n2
= fold_convert (itype
, n2
);
1325 tree cond
= fold_build2 (cond_code
, boolean_type_node
, n1
, n2
);
1327 tree t
= build_int_cst (itype
, (cond_code
== LT_EXPR
? -1 : 1));
1328 t
= fold_build2 (PLUS_EXPR
, itype
, step
, t
);
1329 t
= fold_build2 (PLUS_EXPR
, itype
, t
, n2
);
1330 t
= fold_build2 (MINUS_EXPR
, itype
, t
, n1
);
1331 if (TYPE_UNSIGNED (itype
) && cond_code
== GT_EXPR
)
1332 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
,
1333 fold_build1 (NEGATE_EXPR
, itype
, t
),
1334 fold_build1 (NEGATE_EXPR
, itype
, step
));
1336 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, t
, step
);
1337 t
= fold_build3 (COND_EXPR
, itype
, cond
, t
, build_zero_cst (itype
));
1340 if (cond_code
== GT_EXPR
)
1341 step
= fold_build1 (NEGATE_EXPR
, itype
, step
);
1342 t
= fold_build2 (MULT_EXPR
, itype
, t
, step
);
1345 tree gs
= fold_convert (uint32_type_node
, t
);
1346 gimple_seq tmpseq
= NULL
;
1347 gimplify_expr (&gs
, &tmpseq
, NULL
, is_gimple_val
, fb_rvalue
);
1348 if (!gimple_seq_empty_p (tmpseq
))
1349 gsi_insert_seq_before (gsi
, tmpseq
, GSI_SAME_STMT
);
1352 if (grid
.group_sizes
[i
])
1354 ws
= fold_convert (uint32_type_node
, grid
.group_sizes
[i
]);
1356 gimplify_expr (&ws
, &tmpseq
, NULL
, is_gimple_val
, fb_rvalue
);
1357 if (!gimple_seq_empty_p (tmpseq
))
1358 gsi_insert_seq_before (gsi
, tmpseq
, GSI_SAME_STMT
);
1361 ws
= build_zero_cst (uint32_type_node
);
1363 tree c
= build_omp_clause (UNKNOWN_LOCATION
, OMP_CLAUSE__GRIDDIM_
);
1364 OMP_CLAUSE__GRIDDIM__DIMENSION (c
) = i
;
1365 OMP_CLAUSE__GRIDDIM__SIZE (c
) = gs
;
1366 OMP_CLAUSE__GRIDDIM__GROUP (c
) = ws
;
1367 OMP_CLAUSE_CHAIN (c
) = gimple_omp_target_clauses (target
);
1368 gimple_omp_target_set_clauses (target
, c
);
1370 pop_gimplify_context (tgt_bind
);
1375 /* Walker function doing all the work for create_target_kernels. */
1378 grid_gridify_all_targets_stmt (gimple_stmt_iterator
*gsi
,
1379 bool *handled_ops_p
,
1380 struct walk_stmt_info
*incoming
)
1382 *handled_ops_p
= false;
1384 gimple
*stmt
= gsi_stmt (*gsi
);
1385 gomp_target
*target
= dyn_cast
<gomp_target
*> (stmt
);
1388 gbind
*tgt_bind
= (gbind
*) incoming
->info
;
1389 gcc_checking_assert (tgt_bind
);
1390 grid_attempt_target_gridification (target
, gsi
, tgt_bind
);
1393 gbind
*bind
= dyn_cast
<gbind
*> (stmt
);
1396 *handled_ops_p
= true;
1397 struct walk_stmt_info wi
;
1398 memset (&wi
, 0, sizeof (wi
));
1400 walk_gimple_seq_mod (gimple_bind_body_ptr (bind
),
1401 grid_gridify_all_targets_stmt
, NULL
, &wi
);
1406 /* Attempt to gridify all target constructs in BODY_P. All such targets will
1407 have their bodies duplicated, with the new copy being put into a
1408 gimple_omp_grid_body statement. All kernel-related construct within the
1409 grid_body will be marked with phony flags or kernel kinds. Moreover, some
1410 re-structuring is often needed, such as copying pre-bodies before the target
1411 construct so that kernel grid sizes can be computed. */
1414 omp_grid_gridify_all_targets (gimple_seq
*body_p
)
1416 struct walk_stmt_info wi
;
1417 memset (&wi
, 0, sizeof (wi
));
1418 walk_gimple_seq_mod (body_p
, grid_gridify_all_targets_stmt
, NULL
, &wi
);