pr70100.c: Add -mvsx.
[official-gcc.git] / gcc / omp-grid.c
blobe7e18aaa2e1e4f4acfcd250773d9e7da09f5e0ab
1 /* Lowering and expansion of OpenMP directives for HSA GPU agents.
3 Copyright (C) 2013-2019 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "tree-pass.h"
28 #include "ssa.h"
29 #include "cgraph.h"
30 #include "pretty-print.h"
31 #include "fold-const.h"
32 #include "gimplify.h"
33 #include "gimple-iterator.h"
34 #include "gimple-walk.h"
35 #include "tree-inline.h"
36 #include "langhooks.h"
37 #include "omp-general.h"
38 #include "omp-low.h"
39 #include "omp-grid.h"
40 #include "gimple-pretty-print.h"
42 /* Return the lastprivate predicate for a given gridified loop described by
43 FD). */
45 tree
46 omp_grid_lastprivate_predicate (struct omp_for_data *fd)
48 /* When dealing with a gridified loop, we need to check up to three collapsed
49 iteration variables but they are not actually captured in this fd.
50 Fortunately, we can easily rely on HSA builtins to get this
51 information. */
53 tree id, size;
54 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP
55 && gimple_omp_for_grid_intra_group (fd->for_stmt))
57 id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMID);
58 size = builtin_decl_explicit (BUILT_IN_HSA_CURRENTWORKGROUPSIZE);
60 else
62 id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMABSID);
63 size = builtin_decl_explicit (BUILT_IN_HSA_GRIDSIZE);
65 tree cond = NULL;
66 for (int dim = 0; dim < fd->collapse; dim++)
68 tree dim_tree = build_int_cstu (unsigned_type_node, dim);
69 tree u1 = build_int_cstu (unsigned_type_node, 1);
70 tree c2
71 = build2 (EQ_EXPR, boolean_type_node,
72 build2 (PLUS_EXPR, unsigned_type_node,
73 build_call_expr (id, 1, dim_tree), u1),
74 build_call_expr (size, 1, dim_tree));
75 if (cond)
76 cond = build2 (TRUTH_AND_EXPR, boolean_type_node, cond, c2);
77 else
78 cond = c2;
80 return cond;
83 /* Structure describing the basic properties of the loop we ara analyzing
84 whether it can be gridified and when it is gridified. */
86 class grid_prop
88 public:
89 /* True when we are doing tiling gridification, i.e. when there is a distinct
90 distribute loop over groups and a loop construct over work-items. False
91 when distribute and parallel for loops form a combined construct. */
92 bool tiling;
93 /* Location of the target construct for optimization information
94 messages. */
95 dump_user_location_t target_loc;
96 /* The collapse clause of the involved loops. Collapse value of all of them
97 must be the same for gridification to take place. */
98 size_t collapse;
99 /* Group sizes, if requested by the user or NULL if not requested. */
100 tree group_sizes[3];
103 #define GRID_MISSED_MSG_PREFIX "Will not turn target construct into a " \
104 "gridified HSA kernel because "
106 /* Return true if STMT is an assignment of a register-type into a local
107 VAR_DECL. If GRID is non-NULL, the assignment additionally must not be to
108 any of the trees specifying group sizes there. */
110 static bool
111 grid_safe_assignment_p (gimple *stmt, grid_prop *grid)
113 gassign *assign = dyn_cast <gassign *> (stmt);
114 if (!assign)
115 return false;
116 if (gimple_clobber_p (assign))
117 return true;
118 tree lhs = gimple_assign_lhs (assign);
119 if (!VAR_P (lhs)
120 || !is_gimple_reg_type (TREE_TYPE (lhs))
121 || is_global_var (lhs))
122 return false;
123 if (grid)
124 for (unsigned i = 0; i < grid->collapse; i++)
125 if (lhs == grid->group_sizes[i])
126 return false;
127 return true;
130 /* Return true if all statements in SEQ are assignments to local register-type
131 variables that do not hold group size information. */
133 static bool
134 grid_seq_only_contains_local_assignments (gimple_seq seq, grid_prop *grid)
136 if (!seq)
137 return true;
139 gimple_stmt_iterator gsi;
140 for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
141 if (!grid_safe_assignment_p (gsi_stmt (gsi), grid))
142 return false;
143 return true;
146 /* Scan statements in SEQ and call itself recursively on any bind. GRID
147 describes hitherto discovered properties of the loop that is evaluated for
148 possible gridification. If during whole search only assignments to
149 register-type local variables (that do not overwrite group size information)
150 and one single OMP statement is encountered, return true, otherwise return
151 false. RET is where we store any OMP statement encountered. */
153 static bool
154 grid_find_single_omp_among_assignments_1 (gimple_seq seq, grid_prop *grid,
155 const char *name, gimple **ret)
157 gimple_stmt_iterator gsi;
158 for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
160 gimple *stmt = gsi_stmt (gsi);
162 if (grid_safe_assignment_p (stmt, grid))
163 continue;
164 if (gbind *bind = dyn_cast <gbind *> (stmt))
166 gimple_seq bind_body = gimple_bind_body (bind);
167 if (!grid_find_single_omp_among_assignments_1 (bind_body, grid, name,
168 ret))
169 return false;
171 else if (is_gimple_omp (stmt))
173 if (*ret)
175 if (dump_enabled_p ())
177 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
178 GRID_MISSED_MSG_PREFIX "%s construct "
179 "contains multiple OpenMP constructs\n",
180 name);
181 dump_printf_loc (MSG_NOTE, *ret,
182 "The first OpenMP construct within "
183 "a parallel\n");
184 dump_printf_loc (MSG_NOTE, stmt,
185 "The second OpenMP construct within "
186 "a parallel\n");
188 return false;
190 *ret = stmt;
192 else
194 if (dump_enabled_p ())
196 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
197 GRID_MISSED_MSG_PREFIX "%s construct contains "
198 "a complex statement\n", name);
199 dump_printf_loc (MSG_NOTE, stmt,
200 "This statement cannot be analyzed for "
201 "gridification\n");
203 return false;
206 return true;
209 /* Scan statements in SEQ and make sure that it and any binds in it contain
210 only assignments to local register-type variables (that do not overwrite
211 group size information) and one OMP construct. If so, return that
212 construct, otherwise return NULL. GRID describes hitherto discovered
213 properties of the loop that is evaluated for possible gridification. If
214 dumping is enabled and function fails, use NAME to dump a note with the
215 reason for failure. */
217 static gimple *
218 grid_find_single_omp_among_assignments (gimple_seq seq, grid_prop *grid,
219 const char *name)
221 if (!seq)
223 if (dump_enabled_p ())
224 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
225 GRID_MISSED_MSG_PREFIX "%s construct has empty body\n",
226 name);
227 return NULL;
230 gimple *ret = NULL;
231 if (grid_find_single_omp_among_assignments_1 (seq, grid, name, &ret))
233 if (!ret && dump_enabled_p ())
234 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
235 GRID_MISSED_MSG_PREFIX "%s construct does not contain"
236 " any other OpenMP construct\n", name);
237 return ret;
239 else
240 return NULL;
243 /* Walker function looking for statements there is no point gridifying (and for
244 noreturn function calls which we cannot do). Return non-NULL if such a
245 function is found. */
247 static tree
248 grid_find_ungridifiable_statement (gimple_stmt_iterator *gsi,
249 bool *handled_ops_p,
250 struct walk_stmt_info *wi)
252 *handled_ops_p = false;
253 gimple *stmt = gsi_stmt (*gsi);
254 switch (gimple_code (stmt))
256 case GIMPLE_CALL:
257 if (gimple_call_noreturn_p (as_a <gcall *> (stmt)))
259 *handled_ops_p = true;
260 wi->info = stmt;
261 return error_mark_node;
263 break;
265 /* We may reduce the following list if we find a way to implement the
266 clauses, but now there is no point trying further. */
267 case GIMPLE_OMP_CRITICAL:
268 case GIMPLE_OMP_TASKGROUP:
269 case GIMPLE_OMP_TASK:
270 case GIMPLE_OMP_SECTION:
271 case GIMPLE_OMP_SECTIONS:
272 case GIMPLE_OMP_SECTIONS_SWITCH:
273 case GIMPLE_OMP_TARGET:
274 case GIMPLE_OMP_ORDERED:
275 *handled_ops_p = true;
276 wi->info = stmt;
277 return error_mark_node;
278 default:
279 break;
281 return NULL;
284 /* Examine clauses of omp parallel statement PAR and if any prevents
285 gridification, issue a missed-optimization diagnostics and return false,
286 otherwise return true. GRID describes hitherto discovered properties of the
287 loop that is evaluated for possible gridification. */
289 static bool
290 grid_parallel_clauses_gridifiable (gomp_parallel *par, dump_user_location_t tloc)
292 tree clauses = gimple_omp_parallel_clauses (par);
293 while (clauses)
295 switch (OMP_CLAUSE_CODE (clauses))
297 case OMP_CLAUSE_NUM_THREADS:
298 if (dump_enabled_p ())
300 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
301 GRID_MISSED_MSG_PREFIX "because there is "
302 "a num_threads clause of the parallel "
303 "construct\n");
304 dump_printf_loc (MSG_NOTE, par,
305 "Parallel construct has a num_threads clause\n");
307 return false;
309 case OMP_CLAUSE_REDUCTION:
310 if (dump_enabled_p ())
312 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
313 GRID_MISSED_MSG_PREFIX "a reduction clause "
314 "is present\n ");
315 dump_printf_loc (MSG_NOTE, par,
316 "Parallel construct has a reduction clause\n");
318 return false;
320 default:
321 break;
323 clauses = OMP_CLAUSE_CHAIN (clauses);
325 return true;
328 /* Examine clauses and the body of omp loop statement GFOR and if something
329 prevents gridification, issue a missed-optimization diagnostics and return
330 false, otherwise return true. GRID describes hitherto discovered properties
331 of the loop that is evaluated for possible gridification. */
333 static bool
334 grid_inner_loop_gridifiable_p (gomp_for *gfor, grid_prop *grid)
336 if (!grid_seq_only_contains_local_assignments (gimple_omp_for_pre_body (gfor),
337 grid))
339 if (dump_enabled_p ())
341 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
342 GRID_MISSED_MSG_PREFIX "the inner loop "
343 "loop bounds computation contains a complex "
344 "statement\n");
345 dump_printf_loc (MSG_NOTE, gfor,
346 "Loop construct cannot be analyzed for "
347 "gridification\n");
349 return false;
352 tree clauses = gimple_omp_for_clauses (gfor);
353 while (clauses)
355 switch (OMP_CLAUSE_CODE (clauses))
357 case OMP_CLAUSE_SCHEDULE:
358 if (OMP_CLAUSE_SCHEDULE_KIND (clauses) != OMP_CLAUSE_SCHEDULE_AUTO)
360 if (dump_enabled_p ())
362 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
363 GRID_MISSED_MSG_PREFIX "the inner loop "
364 "has a non-automatic schedule clause\n");
365 dump_printf_loc (MSG_NOTE, gfor,
366 "Loop construct has a non automatic "
367 "schedule clause\n");
369 return false;
371 break;
373 case OMP_CLAUSE_REDUCTION:
374 if (dump_enabled_p ())
376 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
377 GRID_MISSED_MSG_PREFIX "a reduction "
378 "clause is present\n ");
379 dump_printf_loc (MSG_NOTE, gfor,
380 "Loop construct has a reduction schedule "
381 "clause\n");
383 return false;
385 default:
386 break;
388 clauses = OMP_CLAUSE_CHAIN (clauses);
390 struct walk_stmt_info wi;
391 memset (&wi, 0, sizeof (wi));
392 if (walk_gimple_seq (gimple_omp_body (gfor),
393 grid_find_ungridifiable_statement,
394 NULL, &wi))
396 gimple *bad = (gimple *) wi.info;
397 if (dump_enabled_p ())
399 if (is_gimple_call (bad))
400 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
401 GRID_MISSED_MSG_PREFIX "the inner loop contains "
402 "call to a noreturn function\n");
403 else
404 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
405 GRID_MISSED_MSG_PREFIX "the inner loop contains "
406 "statement %s which cannot be transformed\n",
407 gimple_code_name[(int) gimple_code (bad)]);
408 dump_printf_loc (MSG_NOTE, bad,
409 "This statement cannot be analyzed for "
410 "gridification\n");
412 return false;
414 return true;
417 /* Given distribute omp construct represented by DIST, which in the original
418 source forms a compound construct with a looping construct, return true if it
419 can be turned into a gridified HSA kernel. Otherwise return false. GRID
420 describes hitherto discovered properties of the loop that is evaluated for
421 possible gridification. */
423 static bool
424 grid_dist_follows_simple_pattern (gomp_for *dist, grid_prop *grid)
426 dump_user_location_t tloc = grid->target_loc;
427 gimple *stmt = grid_find_single_omp_among_assignments (gimple_omp_body (dist),
428 grid, "distribute");
429 gomp_parallel *par;
430 if (!stmt
431 || !(par = dyn_cast <gomp_parallel *> (stmt))
432 || !grid_parallel_clauses_gridifiable (par, tloc))
433 return false;
435 stmt = grid_find_single_omp_among_assignments (gimple_omp_body (par), grid,
436 "parallel");
437 gomp_for *gfor;
438 if (!stmt || !(gfor = dyn_cast <gomp_for *> (stmt)))
439 return false;
441 if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR)
443 if (dump_enabled_p ())
444 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
445 GRID_MISSED_MSG_PREFIX "the inner loop is not "
446 "a simple for loop\n");
447 return false;
449 gcc_assert (gimple_omp_for_collapse (gfor) == grid->collapse);
451 if (!grid_inner_loop_gridifiable_p (gfor, grid))
452 return false;
454 return true;
457 /* Given an omp loop statement GFOR, return true if it can participate in
458 tiling gridification, i.e. in one where the distribute and parallel for
459 loops do not form a compound statement. GRID describes hitherto discovered
460 properties of the loop that is evaluated for possible gridification. */
462 static bool
463 grid_gfor_follows_tiling_pattern (gomp_for *gfor, grid_prop *grid)
465 if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR)
467 if (dump_enabled_p ())
469 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
470 GRID_MISSED_MSG_PREFIX "an inner loop is not "
471 "a simple for loop\n");
472 dump_printf_loc (MSG_NOTE, gfor,
473 "This statement is not a simple for loop\n");
475 return false;
478 if (!grid_inner_loop_gridifiable_p (gfor, grid))
479 return false;
481 if (gimple_omp_for_collapse (gfor) != grid->collapse)
483 if (dump_enabled_p ())
485 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
486 GRID_MISSED_MSG_PREFIX "an inner loop does not "
487 "have use the same collapse clause\n");
488 dump_printf_loc (MSG_NOTE, gfor,
489 "Loop construct uses a different collapse clause\n");
491 return false;
494 struct omp_for_data fd;
495 struct omp_for_data_loop *loops
496 = (struct omp_for_data_loop *)alloca (grid->collapse
497 * sizeof (struct omp_for_data_loop));
498 omp_extract_for_data (gfor, &fd, loops);
499 for (unsigned i = 0; i < grid->collapse; i++)
501 tree itype, type = TREE_TYPE (fd.loops[i].v);
502 if (POINTER_TYPE_P (type))
503 itype = signed_type_for (type);
504 else
505 itype = type;
507 tree n1 = fold_convert (itype, fd.loops[i].n1);
508 tree n2 = fold_convert (itype, fd.loops[i].n2);
509 tree t = build_int_cst (itype,
510 (fd.loops[i].cond_code == LT_EXPR ? -1 : 1));
511 t = fold_build2 (PLUS_EXPR, itype, fd.loops[i].step, t);
512 t = fold_build2 (PLUS_EXPR, itype, t, n2);
513 t = fold_build2 (MINUS_EXPR, itype, t, n1);
514 if (TYPE_UNSIGNED (itype) && fd.loops[i].cond_code == GT_EXPR)
515 t = fold_build2 (TRUNC_DIV_EXPR, itype,
516 fold_build1 (NEGATE_EXPR, itype, t),
517 fold_build1 (NEGATE_EXPR, itype, fd.loops[i].step));
518 else
519 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, fd.loops[i].step);
521 if (!operand_equal_p (grid->group_sizes[i], t, 0))
523 if (dump_enabled_p ())
525 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
526 GRID_MISSED_MSG_PREFIX "the distribute and "
527 "an internal loop do not agree on tile size\n");
528 dump_printf_loc (MSG_NOTE, gfor,
529 "Loop construct does not seem to loop over "
530 "a tile size\n");
532 return false;
535 return true;
538 /* Facing a call to FNDECL in the body of a distribute construct, return true
539 if we can handle it or false if it precludes gridification. */
541 static bool
542 grid_call_permissible_in_distribute_p (tree fndecl)
544 if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl))
545 return true;
547 const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl));
548 if (strstr (name, "omp_") != name)
549 return false;
551 if ((strcmp (name, "omp_get_thread_num") == 0)
552 || (strcmp (name, "omp_get_num_threads") == 0)
553 || (strcmp (name, "omp_get_num_teams") == 0)
554 || (strcmp (name, "omp_get_team_num") == 0)
555 || (strcmp (name, "omp_get_level") == 0)
556 || (strcmp (name, "omp_get_active_level") == 0)
557 || (strcmp (name, "omp_in_parallel") == 0))
558 return true;
560 return false;
563 /* Facing a call satisfying grid_call_permissible_in_distribute_p in the body
564 of a distribute construct that is pointed at by GSI, modify it as necessary
565 for gridification. If the statement itself got removed, return true. */
567 static bool
568 grid_handle_call_in_distribute (gimple_stmt_iterator *gsi)
570 gimple *stmt = gsi_stmt (*gsi);
571 tree fndecl = gimple_call_fndecl (stmt);
572 gcc_checking_assert (stmt);
573 if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl))
574 return false;
576 const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl));
577 if ((strcmp (name, "omp_get_thread_num") == 0)
578 || (strcmp (name, "omp_get_level") == 0)
579 || (strcmp (name, "omp_get_active_level") == 0)
580 || (strcmp (name, "omp_in_parallel") == 0))
582 tree lhs = gimple_call_lhs (stmt);
583 if (lhs)
585 gassign *assign
586 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
587 gsi_insert_before (gsi, assign, GSI_SAME_STMT);
589 gsi_remove (gsi, true);
590 return true;
593 /* The rest of the omp functions can stay as they are, HSA back-end will
594 handle them correctly. */
595 gcc_checking_assert ((strcmp (name, "omp_get_num_threads") == 0)
596 || (strcmp (name, "omp_get_num_teams") == 0)
597 || (strcmp (name, "omp_get_team_num") == 0));
598 return false;
601 /* Given a sequence of statements within a distribute omp construct or a
602 parallel construct, which in the original source does not form a compound
603 construct with a looping construct, return true if it does not prevent us
604 from turning it into a gridified HSA kernel. Otherwise return false. GRID
605 describes hitherto discovered properties of the loop that is evaluated for
606 possible gridification. IN_PARALLEL must be true if seq is within a
607 parallel construct and flase if it is only within a distribute
608 construct. */
610 static bool
611 grid_dist_follows_tiling_pattern (gimple_seq seq, grid_prop *grid,
612 bool in_parallel)
614 gimple_stmt_iterator gsi;
615 for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
617 gimple *stmt = gsi_stmt (gsi);
619 if (grid_safe_assignment_p (stmt, grid)
620 || gimple_code (stmt) == GIMPLE_GOTO
621 || gimple_code (stmt) == GIMPLE_LABEL
622 || gimple_code (stmt) == GIMPLE_COND)
623 continue;
624 else if (gbind *bind = dyn_cast <gbind *> (stmt))
626 if (!grid_dist_follows_tiling_pattern (gimple_bind_body (bind),
627 grid, in_parallel))
628 return false;
629 continue;
631 else if (gtry *try_stmt = dyn_cast <gtry *> (stmt))
633 if (gimple_try_kind (try_stmt) == GIMPLE_TRY_CATCH)
635 if (dump_enabled_p ())
637 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
638 GRID_MISSED_MSG_PREFIX "the distribute "
639 "construct contains a try..catch region\n");
640 dump_printf_loc (MSG_NOTE, try_stmt,
641 "This statement cannot be analyzed for "
642 "tiled gridification\n");
644 return false;
646 if (!grid_dist_follows_tiling_pattern (gimple_try_eval (try_stmt),
647 grid, in_parallel))
648 return false;
649 if (!grid_dist_follows_tiling_pattern (gimple_try_cleanup (try_stmt),
650 grid, in_parallel))
651 return false;
652 continue;
654 else if (is_gimple_call (stmt))
656 tree fndecl = gimple_call_fndecl (stmt);
657 if (fndecl && grid_call_permissible_in_distribute_p (fndecl))
658 continue;
660 if (dump_enabled_p ())
662 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
663 GRID_MISSED_MSG_PREFIX "the distribute "
664 "construct contains a call\n");
665 dump_printf_loc (MSG_NOTE, stmt,
666 "This statement cannot be analyzed for "
667 "tiled gridification\n");
669 return false;
671 else if (gomp_parallel *par = dyn_cast <gomp_parallel *> (stmt))
673 if (in_parallel)
675 if (dump_enabled_p ())
677 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
678 GRID_MISSED_MSG_PREFIX "a parallel "
679 "construct contains another parallel "
680 "construct\n");
681 dump_printf_loc (MSG_NOTE, stmt,
682 "This parallel construct is nested in "
683 "another one\n");
685 return false;
687 if (!grid_parallel_clauses_gridifiable (par, grid->target_loc)
688 || !grid_dist_follows_tiling_pattern (gimple_omp_body (par),
689 grid, true))
690 return false;
692 else if (gomp_for *gfor = dyn_cast <gomp_for *> (stmt))
694 if (!in_parallel)
696 if (dump_enabled_p ())
698 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
699 GRID_MISSED_MSG_PREFIX "a loop "
700 "construct is not nested within a parallel "
701 "construct\n");
702 dump_printf_loc (MSG_NOTE, stmt,
703 "This loop construct is not nested in "
704 "a parallel construct\n");
706 return false;
708 if (!grid_gfor_follows_tiling_pattern (gfor, grid))
709 return false;
711 else
713 if (dump_enabled_p ())
715 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
716 GRID_MISSED_MSG_PREFIX "the distribute "
717 "construct contains a complex statement\n");
718 dump_printf_loc (MSG_NOTE, stmt,
719 "This statement cannot be analyzed for "
720 "tiled gridification\n");
722 return false;
725 return true;
728 /* If TARGET follows a pattern that can be turned into a gridified HSA kernel,
729 return true, otherwise return false. In the case of success, also fill in
730 GRID with information describing the kernel grid. */
732 static bool
733 grid_target_follows_gridifiable_pattern (gomp_target *target, grid_prop *grid)
735 if (gimple_omp_target_kind (target) != GF_OMP_TARGET_KIND_REGION)
736 return false;
738 dump_user_location_t tloc = target;
739 grid->target_loc = tloc;
740 gimple *stmt
741 = grid_find_single_omp_among_assignments (gimple_omp_body (target),
742 grid, "target");
743 if (!stmt)
744 return false;
745 gomp_teams *teams = dyn_cast <gomp_teams *> (stmt);
746 tree group_size = NULL;
747 if (!teams)
749 if (dump_enabled_p ())
750 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
751 GRID_MISSED_MSG_PREFIX "it does not have a sole "
752 "teams construct in it.\n");
753 return false;
756 tree clauses = gimple_omp_teams_clauses (teams);
757 while (clauses)
759 switch (OMP_CLAUSE_CODE (clauses))
761 case OMP_CLAUSE_NUM_TEAMS:
762 if (dump_enabled_p ())
763 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
764 GRID_MISSED_MSG_PREFIX "the teams construct "
765 "contains a num_teams clause\n ");
766 return false;
768 case OMP_CLAUSE_REDUCTION:
769 if (dump_enabled_p ())
770 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
771 GRID_MISSED_MSG_PREFIX "a reduction "
772 "clause is present\n ");
773 return false;
775 case OMP_CLAUSE_THREAD_LIMIT:
776 if (!integer_zerop (OMP_CLAUSE_OPERAND (clauses, 0)))
777 group_size = OMP_CLAUSE_OPERAND (clauses, 0);
778 break;
780 default:
781 break;
783 clauses = OMP_CLAUSE_CHAIN (clauses);
786 stmt = grid_find_single_omp_among_assignments (gimple_omp_body (teams), grid,
787 "teams");
788 if (!stmt)
789 return false;
790 gomp_for *dist = dyn_cast <gomp_for *> (stmt);
791 if (!dist)
793 if (dump_enabled_p ())
794 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
795 GRID_MISSED_MSG_PREFIX "the teams construct does not "
796 "have a single distribute construct in it.\n");
797 return false;
800 gcc_assert (gimple_omp_for_kind (dist) == GF_OMP_FOR_KIND_DISTRIBUTE);
802 grid->collapse = gimple_omp_for_collapse (dist);
803 if (grid->collapse > 3)
805 if (dump_enabled_p ())
806 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
807 GRID_MISSED_MSG_PREFIX "the distribute construct "
808 "contains collapse clause with parameter greater "
809 "than 3\n");
810 return false;
813 struct omp_for_data fd;
814 struct omp_for_data_loop *dist_loops
815 = (struct omp_for_data_loop *)alloca (grid->collapse
816 * sizeof (struct omp_for_data_loop));
817 omp_extract_for_data (dist, &fd, dist_loops);
818 if (fd.chunk_size)
820 if (group_size && !operand_equal_p (group_size, fd.chunk_size, 0))
822 if (dump_enabled_p ())
823 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
824 GRID_MISSED_MSG_PREFIX "the teams "
825 "thread limit is different from distribute "
826 "schedule chunk\n");
827 return false;
829 group_size = fd.chunk_size;
831 if (group_size && grid->collapse > 1)
833 if (dump_enabled_p ())
834 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
835 GRID_MISSED_MSG_PREFIX "group size cannot be "
836 "set using thread_limit or schedule clauses "
837 "when also using a collapse clause greater than 1\n");
838 return false;
841 if (gimple_omp_for_combined_p (dist))
843 grid->tiling = false;
844 grid->group_sizes[0] = group_size;
845 for (unsigned i = 1; i < grid->collapse; i++)
846 grid->group_sizes[i] = NULL;
847 return grid_dist_follows_simple_pattern (dist, grid);
849 else
851 grid->tiling = true;
852 if (group_size)
854 if (dump_enabled_p ())
855 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
856 GRID_MISSED_MSG_PREFIX "group size cannot be set "
857 "using thread_limit or schedule clauses when "
858 "distribute and loop constructs do not form "
859 "one combined construct\n");
860 return false;
862 for (unsigned i = 0; i < grid->collapse; i++)
864 if (fd.loops[i].cond_code == GT_EXPR)
865 grid->group_sizes[i] = fold_build1 (NEGATE_EXPR,
866 TREE_TYPE (fd.loops[i].step),
867 fd.loops[i].step);
868 else
869 grid->group_sizes[i] = fd.loops[i].step;
871 return grid_dist_follows_tiling_pattern (gimple_omp_body (dist), grid,
872 false);
876 /* Operand walker, used to remap pre-body declarations according to a hash map
877 provided in DATA. */
879 static tree
880 grid_remap_prebody_decls (tree *tp, int *walk_subtrees, void *data)
882 tree t = *tp;
884 if (DECL_P (t) || TYPE_P (t))
885 *walk_subtrees = 0;
886 else
887 *walk_subtrees = 1;
889 if (VAR_P (t))
891 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
892 hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info;
893 tree *repl = declmap->get (t);
894 if (repl)
895 *tp = *repl;
897 return NULL_TREE;
900 /* Identifiers of segments into which a particular variable should be places
901 when gridifying. */
903 enum grid_var_segment {GRID_SEGMENT_PRIVATE, GRID_SEGMENT_GROUP,
904 GRID_SEGMENT_GLOBAL};
906 /* Mark VAR so that it is eventually placed into SEGMENT. Place an artificial
907 builtin call into SEQ that will make sure the variable is always considered
908 address taken. */
910 static void
911 grid_mark_variable_segment (tree var, enum grid_var_segment segment)
913 /* Making a non-addressable variables would require that we re-gimplify all
914 their uses. Fortunately, we do not have to do this because if they are
915 not addressable, it means they are not used in atomic or parallel
916 statements and so relaxed GPU consistency rules mean we can just keep them
917 private. */
918 if (!TREE_ADDRESSABLE (var))
919 return;
921 switch (segment)
923 case GRID_SEGMENT_GROUP:
924 DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_group_segment"),
925 NULL, DECL_ATTRIBUTES (var));
926 break;
927 case GRID_SEGMENT_GLOBAL:
928 DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_global_segment"),
929 NULL, DECL_ATTRIBUTES (var));
930 break;
931 default:
932 gcc_unreachable ();
935 if (!TREE_STATIC (var))
937 TREE_STATIC (var) = 1;
938 const char *prefix = IDENTIFIER_POINTER (DECL_NAME (var));
939 SET_DECL_ASSEMBLER_NAME (var, create_tmp_var_name (prefix));
940 varpool_node::finalize_decl (var);
945 /* Copy leading register-type assignments to local variables in SRC to just
946 before DST, Creating temporaries, adjusting mapping of operands in WI and
947 remapping operands as necessary. Add any new temporaries to TGT_BIND.
948 Return the first statement that does not conform to grid_safe_assignment_p
949 or NULL. If VAR_SEGMENT is not GRID_SEGMENT_PRIVATE, also mark all
950 variables in traversed bind statements so that they are put into the
951 appropriate segment. */
953 static gimple *
954 grid_copy_leading_local_assignments (gimple_seq src, gimple_stmt_iterator *dst,
955 gbind *tgt_bind,
956 enum grid_var_segment var_segment,
957 struct walk_stmt_info *wi)
959 hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info;
960 gimple_stmt_iterator gsi;
961 for (gsi = gsi_start (src); !gsi_end_p (gsi); gsi_next (&gsi))
963 gimple *stmt = gsi_stmt (gsi);
964 if (gbind *bind = dyn_cast <gbind *> (stmt))
966 gimple *r = grid_copy_leading_local_assignments
967 (gimple_bind_body (bind), dst, tgt_bind, var_segment, wi);
969 if (var_segment != GRID_SEGMENT_PRIVATE)
970 for (tree var = gimple_bind_vars (bind);
971 var;
972 var = DECL_CHAIN (var))
973 grid_mark_variable_segment (var, var_segment);
974 if (r)
975 return r;
976 else
977 continue;
979 if (!grid_safe_assignment_p (stmt, NULL))
980 return stmt;
981 tree lhs = gimple_assign_lhs (as_a <gassign *> (stmt));
982 tree repl = copy_var_decl (lhs, create_tmp_var_name (NULL),
983 TREE_TYPE (lhs));
984 DECL_CONTEXT (repl) = current_function_decl;
985 gimple_bind_append_vars (tgt_bind, repl);
987 declmap->put (lhs, repl);
988 gassign *copy = as_a <gassign *> (gimple_copy (stmt));
989 walk_gimple_op (copy, grid_remap_prebody_decls, wi);
990 gsi_insert_before (dst, copy, GSI_SAME_STMT);
992 return NULL;
995 /* Statement walker function to make adjustments to statements within the
996 gridifed kernel copy. */
998 static tree
999 grid_process_grid_body (gimple_stmt_iterator *gsi, bool *handled_ops_p,
1000 struct walk_stmt_info *)
1002 *handled_ops_p = false;
1003 gimple *stmt = gsi_stmt (*gsi);
1004 if (gimple_code (stmt) == GIMPLE_OMP_FOR
1005 && gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_SIMD)
1007 gomp_for *loop = as_a <gomp_for *> (stmt);
1008 tree clauses = gimple_omp_for_clauses (loop);
1009 tree cl = omp_find_clause (clauses, OMP_CLAUSE_SAFELEN);
1010 if (cl)
1011 OMP_CLAUSE_SAFELEN_EXPR (cl) = integer_one_node;
1012 else
1014 tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_SAFELEN);
1015 OMP_CLAUSE_SAFELEN_EXPR (c) = integer_one_node;
1016 OMP_CLAUSE_CHAIN (c) = clauses;
1017 gimple_omp_for_set_clauses (loop, c);
1020 return NULL_TREE;
1023 /* Given a PARLOOP that is a normal for looping construct but also a part of a
1024 combined construct with a simd loop, eliminate the simd loop. */
1026 static void
1027 grid_eliminate_combined_simd_part (gomp_for *parloop)
1029 struct walk_stmt_info wi;
1031 memset (&wi, 0, sizeof (wi));
1032 wi.val_only = true;
1033 enum gf_mask msk = GF_OMP_FOR_KIND_SIMD;
1034 wi.info = (void *) &msk;
1035 walk_gimple_seq (gimple_omp_body (parloop), omp_find_combined_for, NULL, &wi);
1036 gimple *stmt = (gimple *) wi.info;
1037 /* We expect that the SIMD id the only statement in the parallel loop. */
1038 gcc_assert (stmt
1039 && gimple_code (stmt) == GIMPLE_OMP_FOR
1040 && (gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_SIMD)
1041 && gimple_omp_for_combined_into_p (stmt)
1042 && !gimple_omp_for_combined_p (stmt));
1043 gomp_for *simd = as_a <gomp_for *> (stmt);
1045 /* Copy over the iteration properties because the body refers to the index in
1046 the bottmom-most loop. */
1047 unsigned i, collapse = gimple_omp_for_collapse (parloop);
1048 gcc_checking_assert (collapse == gimple_omp_for_collapse (simd));
1049 for (i = 0; i < collapse; i++)
1051 gimple_omp_for_set_index (parloop, i, gimple_omp_for_index (simd, i));
1052 gimple_omp_for_set_initial (parloop, i, gimple_omp_for_initial (simd, i));
1053 gimple_omp_for_set_final (parloop, i, gimple_omp_for_final (simd, i));
1054 gimple_omp_for_set_incr (parloop, i, gimple_omp_for_incr (simd, i));
1057 tree *tgt= gimple_omp_for_clauses_ptr (parloop);
1058 while (*tgt)
1059 tgt = &OMP_CLAUSE_CHAIN (*tgt);
1061 /* Copy over all clauses, except for linear clauses, which are turned into
1062 private clauses, and all other simd-specific clauses, which are
1063 ignored. */
1064 tree *pc = gimple_omp_for_clauses_ptr (simd);
1065 while (*pc)
1067 tree c = *pc;
1068 switch (TREE_CODE (c))
1070 case OMP_CLAUSE_LINEAR:
1072 tree priv = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_PRIVATE);
1073 OMP_CLAUSE_DECL (priv) = OMP_CLAUSE_DECL (c);
1074 OMP_CLAUSE_CHAIN (priv) = NULL;
1075 *tgt = priv;
1076 tgt = &OMP_CLAUSE_CHAIN (priv);
1077 pc = &OMP_CLAUSE_CHAIN (c);
1078 break;
1081 case OMP_CLAUSE_SAFELEN:
1082 case OMP_CLAUSE_SIMDLEN:
1083 case OMP_CLAUSE_ALIGNED:
1084 pc = &OMP_CLAUSE_CHAIN (c);
1085 break;
1087 default:
1088 *pc = OMP_CLAUSE_CHAIN (c);
1089 OMP_CLAUSE_CHAIN (c) = NULL;
1090 *tgt = c;
1091 tgt = &OMP_CLAUSE_CHAIN (c);
1092 break;
1096 /* Finally, throw away the simd and mark the parallel loop as not
1097 combined. */
1098 gimple_omp_set_body (parloop, gimple_omp_body (simd));
1099 gimple_omp_for_set_combined_p (parloop, false);
1102 /* Statement walker function marking all parallels as grid_phony and loops as
1103 grid ones representing threads of a particular thread group. */
1105 static tree
1106 grid_mark_tiling_loops (gimple_stmt_iterator *gsi, bool *handled_ops_p,
1107 struct walk_stmt_info *wi_in)
1109 *handled_ops_p = false;
1110 if (gomp_for *loop = dyn_cast <gomp_for *> (gsi_stmt (*gsi)))
1112 *handled_ops_p = true;
1113 gimple_omp_for_set_kind (loop, GF_OMP_FOR_KIND_GRID_LOOP);
1114 gimple_omp_for_set_grid_intra_group (loop, true);
1115 if (gimple_omp_for_combined_p (loop))
1116 grid_eliminate_combined_simd_part (loop);
1118 struct walk_stmt_info body_wi;
1119 memset (&body_wi, 0, sizeof (body_wi));
1120 walk_gimple_seq_mod (gimple_omp_body_ptr (loop),
1121 grid_process_grid_body, NULL, &body_wi);
1123 gbind *bind = (gbind *) wi_in->info;
1124 tree c;
1125 for (c = gimple_omp_for_clauses (loop); c; c = OMP_CLAUSE_CHAIN (c))
1126 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE)
1128 push_gimplify_context ();
1129 tree ov = OMP_CLAUSE_DECL (c);
1130 tree gv = copy_var_decl (ov, create_tmp_var_name (NULL),
1131 TREE_TYPE (ov));
1133 grid_mark_variable_segment (gv, GRID_SEGMENT_GROUP);
1134 DECL_CONTEXT (gv) = current_function_decl;
1135 gimple_bind_append_vars (bind, gv);
1136 tree x = lang_hooks.decls.omp_clause_assign_op (c, gv, ov);
1137 gimplify_and_add (x, &OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c));
1138 x = lang_hooks.decls.omp_clause_copy_ctor (c, ov, gv);
1139 gimple_seq l = NULL;
1140 gimplify_and_add (x, &l);
1141 gsi_insert_seq_after (gsi, l, GSI_SAME_STMT);
1142 pop_gimplify_context (bind);
1145 return NULL_TREE;
1148 /* Statement walker function marking all parallels as grid_phony and loops as
1149 grid ones representing threads of a particular thread group. */
1151 static tree
1152 grid_mark_tiling_parallels_and_loops (gimple_stmt_iterator *gsi,
1153 bool *handled_ops_p,
1154 struct walk_stmt_info *wi_in)
1156 *handled_ops_p = false;
1157 wi_in->removed_stmt = false;
1158 gimple *stmt = gsi_stmt (*gsi);
1159 if (gbind *bind = dyn_cast <gbind *> (stmt))
1161 for (tree var = gimple_bind_vars (bind); var; var = DECL_CHAIN (var))
1162 grid_mark_variable_segment (var, GRID_SEGMENT_GROUP);
1164 else if (gomp_parallel *parallel = dyn_cast <gomp_parallel *> (stmt))
1166 *handled_ops_p = true;
1167 gimple_omp_parallel_set_grid_phony (parallel, true);
1169 gbind *new_bind = gimple_build_bind (NULL, NULL, make_node (BLOCK));
1170 gimple_bind_set_body (new_bind, gimple_omp_body (parallel));
1171 gimple_seq s = NULL;
1172 gimple_seq_add_stmt (&s, new_bind);
1173 gimple_omp_set_body (parallel, s);
1175 struct walk_stmt_info wi_par;
1176 memset (&wi_par, 0, sizeof (wi_par));
1177 wi_par.info = new_bind;
1178 walk_gimple_seq_mod (gimple_bind_body_ptr (new_bind),
1179 grid_mark_tiling_loops, NULL, &wi_par);
1181 else if (is_a <gcall *> (stmt))
1182 wi_in->removed_stmt = grid_handle_call_in_distribute (gsi);
1183 return NULL_TREE;
1186 /* Given freshly copied top level kernel SEQ, identify the individual OMP
1187 components, mark them as part of kernel, copy assignment leading to them
1188 just before DST, remapping them using WI and adding new temporaries to
1189 TGT_BIND, and and return the loop that will be used for kernel dispatch. */
1191 static gomp_for *
1192 grid_process_kernel_body_copy (grid_prop *grid, gimple_seq seq,
1193 gimple_stmt_iterator *dst,
1194 gbind *tgt_bind, struct walk_stmt_info *wi)
1196 gimple *stmt = grid_copy_leading_local_assignments (seq, dst, tgt_bind,
1197 GRID_SEGMENT_GLOBAL, wi);
1198 gomp_teams *teams = dyn_cast <gomp_teams *> (stmt);
1199 gcc_assert (teams);
1200 gimple_omp_teams_set_grid_phony (teams, true);
1201 stmt = grid_copy_leading_local_assignments (gimple_omp_body (teams), dst,
1202 tgt_bind, GRID_SEGMENT_GLOBAL,
1203 wi);
1204 gcc_checking_assert (stmt);
1205 gomp_for *dist = dyn_cast <gomp_for *> (stmt);
1206 gcc_assert (dist);
1207 gimple_seq prebody = gimple_omp_for_pre_body (dist);
1208 if (prebody)
1209 grid_copy_leading_local_assignments (prebody, dst, tgt_bind,
1210 GRID_SEGMENT_GROUP, wi);
1212 if (grid->tiling)
1214 gimple_omp_for_set_kind (dist, GF_OMP_FOR_KIND_GRID_LOOP);
1215 gimple_omp_for_set_grid_group_iter (dist, true);
1217 struct walk_stmt_info wi_tiled;
1218 memset (&wi_tiled, 0, sizeof (wi_tiled));
1219 walk_gimple_seq_mod (gimple_omp_body_ptr (dist),
1220 grid_mark_tiling_parallels_and_loops, NULL,
1221 &wi_tiled);
1222 return dist;
1224 else
1226 gimple_omp_for_set_grid_phony (dist, true);
1227 stmt = grid_copy_leading_local_assignments (gimple_omp_body (dist), dst,
1228 tgt_bind,
1229 GRID_SEGMENT_PRIVATE, wi);
1230 gcc_checking_assert (stmt);
1231 gomp_parallel *parallel = as_a <gomp_parallel *> (stmt);
1232 gimple_omp_parallel_set_grid_phony (parallel, true);
1233 stmt = grid_copy_leading_local_assignments (gimple_omp_body (parallel),
1234 dst, tgt_bind,
1235 GRID_SEGMENT_PRIVATE, wi);
1236 gomp_for *inner_loop = as_a <gomp_for *> (stmt);
1237 gimple_omp_for_set_kind (inner_loop, GF_OMP_FOR_KIND_GRID_LOOP);
1238 prebody = gimple_omp_for_pre_body (inner_loop);
1239 if (prebody)
1240 grid_copy_leading_local_assignments (prebody, dst, tgt_bind,
1241 GRID_SEGMENT_PRIVATE, wi);
1243 if (gimple_omp_for_combined_p (inner_loop))
1244 grid_eliminate_combined_simd_part (inner_loop);
1245 struct walk_stmt_info body_wi;
1246 memset (&body_wi, 0, sizeof (body_wi));
1247 walk_gimple_seq_mod (gimple_omp_body_ptr (inner_loop),
1248 grid_process_grid_body, NULL, &body_wi);
1250 return inner_loop;
1254 /* If TARGET points to a GOMP_TARGET which follows a gridifiable pattern,
1255 create a GPU kernel for it. GSI must point to the same statement, TGT_BIND
1256 is the bind into which temporaries inserted before TARGET should be
1257 added. */
1259 static void
1260 grid_attempt_target_gridification (gomp_target *target,
1261 gimple_stmt_iterator *gsi,
1262 gbind *tgt_bind)
1264 /* removed group_size */
1265 grid_prop grid = {};
1266 if (!target || !grid_target_follows_gridifiable_pattern (target, &grid))
1267 return;
1269 location_t loc = gimple_location (target);
1270 if (dump_enabled_p ())
1271 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, target,
1272 "Target construct will be turned into a gridified HSA "
1273 "kernel\n");
1275 /* Copy target body to a GPUKERNEL construct: */
1276 gimple_seq kernel_seq = copy_gimple_seq_and_replace_locals
1277 (gimple_omp_body (target));
1279 hash_map<tree, tree> *declmap = new hash_map<tree, tree>;
1280 struct walk_stmt_info wi;
1281 memset (&wi, 0, sizeof (struct walk_stmt_info));
1282 wi.info = declmap;
1284 /* Copy assignments in between OMP statements before target, mark OMP
1285 statements within copy appropriately. */
1286 gomp_for *inner_loop = grid_process_kernel_body_copy (&grid, kernel_seq, gsi,
1287 tgt_bind, &wi);
1289 gbind *old_bind
1290 = as_a <gbind *> (gimple_seq_first (gimple_omp_body (target)));
1291 gbind *new_bind = as_a <gbind *> (gimple_seq_first (kernel_seq));
1292 tree new_block = gimple_bind_block (new_bind);
1293 tree enc_block = BLOCK_SUPERCONTEXT (gimple_bind_block (old_bind));
1294 BLOCK_CHAIN (new_block) = BLOCK_SUBBLOCKS (enc_block);
1295 BLOCK_SUBBLOCKS (enc_block) = new_block;
1296 BLOCK_SUPERCONTEXT (new_block) = enc_block;
1297 gimple *gpukernel = gimple_build_omp_grid_body (kernel_seq);
1298 gimple_seq_add_stmt
1299 (gimple_bind_body_ptr (as_a <gbind *> (gimple_omp_body (target))),
1300 gpukernel);
1302 for (size_t i = 0; i < grid.collapse; i++)
1303 walk_tree (&grid.group_sizes[i], grid_remap_prebody_decls, &wi, NULL);
1304 push_gimplify_context ();
1305 for (size_t i = 0; i < grid.collapse; i++)
1307 tree index_var = gimple_omp_for_index (inner_loop, i);
1308 tree itype, type = TREE_TYPE (index_var);
1309 if (POINTER_TYPE_P (type))
1310 itype = signed_type_for (type);
1311 else
1312 itype = type;
1314 enum tree_code cond_code = gimple_omp_for_cond (inner_loop, i);
1315 tree n1 = unshare_expr (gimple_omp_for_initial (inner_loop, i));
1316 walk_tree (&n1, grid_remap_prebody_decls, &wi, NULL);
1317 tree n2 = unshare_expr (gimple_omp_for_final (inner_loop, i));
1318 walk_tree (&n2, grid_remap_prebody_decls, &wi, NULL);
1319 tree step
1320 = omp_get_for_step_from_incr (loc, gimple_omp_for_incr (inner_loop, i));
1321 omp_adjust_for_condition (loc, &cond_code, &n2, index_var, step);
1322 n1 = fold_convert (itype, n1);
1323 n2 = fold_convert (itype, n2);
1325 tree cond = fold_build2 (cond_code, boolean_type_node, n1, n2);
1327 tree t = build_int_cst (itype, (cond_code == LT_EXPR ? -1 : 1));
1328 t = fold_build2 (PLUS_EXPR, itype, step, t);
1329 t = fold_build2 (PLUS_EXPR, itype, t, n2);
1330 t = fold_build2 (MINUS_EXPR, itype, t, n1);
1331 if (TYPE_UNSIGNED (itype) && cond_code == GT_EXPR)
1332 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1333 fold_build1 (NEGATE_EXPR, itype, t),
1334 fold_build1 (NEGATE_EXPR, itype, step));
1335 else
1336 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
1337 t = fold_build3 (COND_EXPR, itype, cond, t, build_zero_cst (itype));
1338 if (grid.tiling)
1340 if (cond_code == GT_EXPR)
1341 step = fold_build1 (NEGATE_EXPR, itype, step);
1342 t = fold_build2 (MULT_EXPR, itype, t, step);
1345 tree gs = fold_convert (uint32_type_node, t);
1346 gimple_seq tmpseq = NULL;
1347 gimplify_expr (&gs, &tmpseq, NULL, is_gimple_val, fb_rvalue);
1348 if (!gimple_seq_empty_p (tmpseq))
1349 gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT);
1351 tree ws;
1352 if (grid.group_sizes[i])
1354 ws = fold_convert (uint32_type_node, grid.group_sizes[i]);
1355 tmpseq = NULL;
1356 gimplify_expr (&ws, &tmpseq, NULL, is_gimple_val, fb_rvalue);
1357 if (!gimple_seq_empty_p (tmpseq))
1358 gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT);
1360 else
1361 ws = build_zero_cst (uint32_type_node);
1363 tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__GRIDDIM_);
1364 OMP_CLAUSE__GRIDDIM__DIMENSION (c) = i;
1365 OMP_CLAUSE__GRIDDIM__SIZE (c) = gs;
1366 OMP_CLAUSE__GRIDDIM__GROUP (c) = ws;
1367 OMP_CLAUSE_CHAIN (c) = gimple_omp_target_clauses (target);
1368 gimple_omp_target_set_clauses (target, c);
1370 pop_gimplify_context (tgt_bind);
1371 delete declmap;
1372 return;
1375 /* Walker function doing all the work for create_target_kernels. */
1377 static tree
1378 grid_gridify_all_targets_stmt (gimple_stmt_iterator *gsi,
1379 bool *handled_ops_p,
1380 struct walk_stmt_info *incoming)
1382 *handled_ops_p = false;
1384 gimple *stmt = gsi_stmt (*gsi);
1385 gomp_target *target = dyn_cast <gomp_target *> (stmt);
1386 if (target)
1388 gbind *tgt_bind = (gbind *) incoming->info;
1389 gcc_checking_assert (tgt_bind);
1390 grid_attempt_target_gridification (target, gsi, tgt_bind);
1391 return NULL_TREE;
1393 gbind *bind = dyn_cast <gbind *> (stmt);
1394 if (bind)
1396 *handled_ops_p = true;
1397 struct walk_stmt_info wi;
1398 memset (&wi, 0, sizeof (wi));
1399 wi.info = bind;
1400 walk_gimple_seq_mod (gimple_bind_body_ptr (bind),
1401 grid_gridify_all_targets_stmt, NULL, &wi);
1403 return NULL_TREE;
1406 /* Attempt to gridify all target constructs in BODY_P. All such targets will
1407 have their bodies duplicated, with the new copy being put into a
1408 gimple_omp_grid_body statement. All kernel-related construct within the
1409 grid_body will be marked with phony flags or kernel kinds. Moreover, some
1410 re-structuring is often needed, such as copying pre-bodies before the target
1411 construct so that kernel grid sizes can be computed. */
1413 void
1414 omp_grid_gridify_all_targets (gimple_seq *body_p)
1416 struct walk_stmt_info wi;
1417 memset (&wi, 0, sizeof (wi));
1418 walk_gimple_seq_mod (body_p, grid_gridify_all_targets_stmt, NULL, &wi);