PR libstdc++/86603 Move __cpp_lib_list_remove_return_type macro
[official-gcc.git] / gcc / omp-grid.c
blob6edc92fd51980396482220d959324f25d73c396a
1 /* Lowering and expansion of OpenMP directives for HSA GPU agents.
3 Copyright (C) 2013-2018 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "tree-pass.h"
28 #include "ssa.h"
29 #include "cgraph.h"
30 #include "pretty-print.h"
31 #include "fold-const.h"
32 #include "gimplify.h"
33 #include "gimple-iterator.h"
34 #include "gimple-walk.h"
35 #include "tree-inline.h"
36 #include "langhooks.h"
37 #include "omp-general.h"
38 #include "omp-low.h"
39 #include "omp-grid.h"
40 #include "gimple-pretty-print.h"
42 /* Return the lastprivate predicate for a given gridified loop described by
43 FD). */
45 tree
46 omp_grid_lastprivate_predicate (struct omp_for_data *fd)
48 /* When dealing with a gridified loop, we need to check up to three collapsed
49 iteration variables but they are not actually captured in this fd.
50 Fortunately, we can easily rely on HSA builtins to get this
51 information. */
53 tree id, size;
54 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP
55 && gimple_omp_for_grid_intra_group (fd->for_stmt))
57 id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMID);
58 size = builtin_decl_explicit (BUILT_IN_HSA_CURRENTWORKGROUPSIZE);
60 else
62 id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMABSID);
63 size = builtin_decl_explicit (BUILT_IN_HSA_GRIDSIZE);
65 tree cond = NULL;
66 for (int dim = 0; dim < fd->collapse; dim++)
68 tree dim_tree = build_int_cstu (unsigned_type_node, dim);
69 tree u1 = build_int_cstu (unsigned_type_node, 1);
70 tree c2
71 = build2 (EQ_EXPR, boolean_type_node,
72 build2 (PLUS_EXPR, unsigned_type_node,
73 build_call_expr (id, 1, dim_tree), u1),
74 build_call_expr (size, 1, dim_tree));
75 if (cond)
76 cond = build2 (TRUTH_AND_EXPR, boolean_type_node, cond, c2);
77 else
78 cond = c2;
80 return cond;
83 /* Structure describing the basic properties of the loop we ara analyzing
84 whether it can be gridified and when it is gridified. */
86 struct grid_prop
88 /* True when we are doing tiling gridification, i.e. when there is a distinct
89 distribute loop over groups and a loop construct over work-items. False
90 when distribute and parallel for loops form a combined construct. */
91 bool tiling;
92 /* Location of the target construct for optimization information
93 messages. */
94 dump_user_location_t target_loc;
95 /* The collapse clause of the involved loops. Collapse value of all of them
96 must be the same for gridification to take place. */
97 size_t collapse;
98 /* Group sizes, if requested by the user or NULL if not requested. */
99 tree group_sizes[3];
102 #define GRID_MISSED_MSG_PREFIX "Will not turn target construct into a " \
103 "gridified HSA kernel because "
105 /* Return true if STMT is an assignment of a register-type into a local
106 VAR_DECL. If GRID is non-NULL, the assignment additionally must not be to
107 any of the trees specifying group sizes there. */
109 static bool
110 grid_safe_assignment_p (gimple *stmt, grid_prop *grid)
112 gassign *assign = dyn_cast <gassign *> (stmt);
113 if (!assign)
114 return false;
115 if (gimple_clobber_p (assign))
116 return true;
117 tree lhs = gimple_assign_lhs (assign);
118 if (!VAR_P (lhs)
119 || !is_gimple_reg_type (TREE_TYPE (lhs))
120 || is_global_var (lhs))
121 return false;
122 if (grid)
123 for (unsigned i = 0; i < grid->collapse; i++)
124 if (lhs == grid->group_sizes[i])
125 return false;
126 return true;
129 /* Return true if all statements in SEQ are assignments to local register-type
130 variables that do not hold group size information. */
132 static bool
133 grid_seq_only_contains_local_assignments (gimple_seq seq, grid_prop *grid)
135 if (!seq)
136 return true;
138 gimple_stmt_iterator gsi;
139 for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
140 if (!grid_safe_assignment_p (gsi_stmt (gsi), grid))
141 return false;
142 return true;
145 /* Scan statements in SEQ and call itself recursively on any bind. GRID
146 describes hitherto discovered properties of the loop that is evaluated for
147 possible gridification. If during whole search only assignments to
148 register-type local variables (that do not overwrite group size information)
149 and one single OMP statement is encountered, return true, otherwise return
150 false. RET is where we store any OMP statement encountered. */
152 static bool
153 grid_find_single_omp_among_assignments_1 (gimple_seq seq, grid_prop *grid,
154 const char *name, gimple **ret)
156 gimple_stmt_iterator gsi;
157 for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
159 gimple *stmt = gsi_stmt (gsi);
161 if (grid_safe_assignment_p (stmt, grid))
162 continue;
163 if (gbind *bind = dyn_cast <gbind *> (stmt))
165 gimple_seq bind_body = gimple_bind_body (bind);
166 if (!grid_find_single_omp_among_assignments_1 (bind_body, grid, name,
167 ret))
168 return false;
170 else if (is_gimple_omp (stmt))
172 if (*ret)
174 if (dump_enabled_p ())
176 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
177 GRID_MISSED_MSG_PREFIX "%s construct "
178 "contains multiple OpenMP constructs\n",
179 name);
180 dump_printf_loc (MSG_NOTE, *ret,
181 "The first OpenMP construct within "
182 "a parallel\n");
183 dump_printf_loc (MSG_NOTE, stmt,
184 "The second OpenMP construct within "
185 "a parallel\n");
187 return false;
189 *ret = stmt;
191 else
193 if (dump_enabled_p ())
195 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
196 GRID_MISSED_MSG_PREFIX "%s construct contains "
197 "a complex statement\n", name);
198 dump_printf_loc (MSG_NOTE, stmt,
199 "This statement cannot be analyzed for "
200 "gridification\n");
202 return false;
205 return true;
208 /* Scan statements in SEQ and make sure that it and any binds in it contain
209 only assignments to local register-type variables (that do not overwrite
210 group size information) and one OMP construct. If so, return that
211 construct, otherwise return NULL. GRID describes hitherto discovered
212 properties of the loop that is evaluated for possible gridification. If
213 dumping is enabled and function fails, use NAME to dump a note with the
214 reason for failure. */
216 static gimple *
217 grid_find_single_omp_among_assignments (gimple_seq seq, grid_prop *grid,
218 const char *name)
220 if (!seq)
222 if (dump_enabled_p ())
223 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
224 GRID_MISSED_MSG_PREFIX "%s construct has empty body\n",
225 name);
226 return NULL;
229 gimple *ret = NULL;
230 if (grid_find_single_omp_among_assignments_1 (seq, grid, name, &ret))
232 if (!ret && dump_enabled_p ())
233 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
234 GRID_MISSED_MSG_PREFIX "%s construct does not contain"
235 " any other OpenMP construct\n", name);
236 return ret;
238 else
239 return NULL;
242 /* Walker function looking for statements there is no point gridifying (and for
243 noreturn function calls which we cannot do). Return non-NULL if such a
244 function is found. */
246 static tree
247 grid_find_ungridifiable_statement (gimple_stmt_iterator *gsi,
248 bool *handled_ops_p,
249 struct walk_stmt_info *wi)
251 *handled_ops_p = false;
252 gimple *stmt = gsi_stmt (*gsi);
253 switch (gimple_code (stmt))
255 case GIMPLE_CALL:
256 if (gimple_call_noreturn_p (as_a <gcall *> (stmt)))
258 *handled_ops_p = true;
259 wi->info = stmt;
260 return error_mark_node;
262 break;
264 /* We may reduce the following list if we find a way to implement the
265 clauses, but now there is no point trying further. */
266 case GIMPLE_OMP_CRITICAL:
267 case GIMPLE_OMP_TASKGROUP:
268 case GIMPLE_OMP_TASK:
269 case GIMPLE_OMP_SECTION:
270 case GIMPLE_OMP_SECTIONS:
271 case GIMPLE_OMP_SECTIONS_SWITCH:
272 case GIMPLE_OMP_TARGET:
273 case GIMPLE_OMP_ORDERED:
274 *handled_ops_p = true;
275 wi->info = stmt;
276 return error_mark_node;
277 default:
278 break;
280 return NULL;
283 /* Examine clauses of omp parallel statement PAR and if any prevents
284 gridification, issue a missed-optimization diagnostics and return false,
285 otherwise return true. GRID describes hitherto discovered properties of the
286 loop that is evaluated for possible gridification. */
288 static bool
289 grid_parallel_clauses_gridifiable (gomp_parallel *par, dump_user_location_t tloc)
291 tree clauses = gimple_omp_parallel_clauses (par);
292 while (clauses)
294 switch (OMP_CLAUSE_CODE (clauses))
296 case OMP_CLAUSE_NUM_THREADS:
297 if (dump_enabled_p ())
299 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
300 GRID_MISSED_MSG_PREFIX "because there is "
301 "a num_threads clause of the parallel "
302 "construct\n");
303 dump_printf_loc (MSG_NOTE, par,
304 "Parallel construct has a num_threads clause\n");
306 return false;
308 case OMP_CLAUSE_REDUCTION:
309 if (dump_enabled_p ())
311 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
312 GRID_MISSED_MSG_PREFIX "a reduction clause "
313 "is present\n ");
314 dump_printf_loc (MSG_NOTE, par,
315 "Parallel construct has a reduction clause\n");
317 return false;
319 default:
320 break;
322 clauses = OMP_CLAUSE_CHAIN (clauses);
324 return true;
327 /* Examine clauses and the body of omp loop statement GFOR and if something
328 prevents gridification, issue a missed-optimization diagnostics and return
329 false, otherwise return true. GRID describes hitherto discovered properties
330 of the loop that is evaluated for possible gridification. */
332 static bool
333 grid_inner_loop_gridifiable_p (gomp_for *gfor, grid_prop *grid)
335 if (!grid_seq_only_contains_local_assignments (gimple_omp_for_pre_body (gfor),
336 grid))
338 if (dump_enabled_p ())
340 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
341 GRID_MISSED_MSG_PREFIX "the inner loop "
342 "loop bounds computation contains a complex "
343 "statement\n");
344 dump_printf_loc (MSG_NOTE, gfor,
345 "Loop construct cannot be analyzed for "
346 "gridification\n");
348 return false;
351 tree clauses = gimple_omp_for_clauses (gfor);
352 while (clauses)
354 switch (OMP_CLAUSE_CODE (clauses))
356 case OMP_CLAUSE_SCHEDULE:
357 if (OMP_CLAUSE_SCHEDULE_KIND (clauses) != OMP_CLAUSE_SCHEDULE_AUTO)
359 if (dump_enabled_p ())
361 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
362 GRID_MISSED_MSG_PREFIX "the inner loop "
363 "has a non-automatic schedule clause\n");
364 dump_printf_loc (MSG_NOTE, gfor,
365 "Loop construct has a non automatic "
366 "schedule clause\n");
368 return false;
370 break;
372 case OMP_CLAUSE_REDUCTION:
373 if (dump_enabled_p ())
375 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
376 GRID_MISSED_MSG_PREFIX "a reduction "
377 "clause is present\n ");
378 dump_printf_loc (MSG_NOTE, gfor,
379 "Loop construct has a reduction schedule "
380 "clause\n");
382 return false;
384 default:
385 break;
387 clauses = OMP_CLAUSE_CHAIN (clauses);
389 struct walk_stmt_info wi;
390 memset (&wi, 0, sizeof (wi));
391 if (walk_gimple_seq (gimple_omp_body (gfor),
392 grid_find_ungridifiable_statement,
393 NULL, &wi))
395 gimple *bad = (gimple *) wi.info;
396 if (dump_enabled_p ())
398 if (is_gimple_call (bad))
399 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
400 GRID_MISSED_MSG_PREFIX "the inner loop contains "
401 "call to a noreturn function\n");
402 else
403 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
404 GRID_MISSED_MSG_PREFIX "the inner loop contains "
405 "statement %s which cannot be transformed\n",
406 gimple_code_name[(int) gimple_code (bad)]);
407 dump_printf_loc (MSG_NOTE, bad,
408 "This statement cannot be analyzed for "
409 "gridification\n");
411 return false;
413 return true;
416 /* Given distribute omp construct represented by DIST, which in the original
417 source forms a compound construct with a looping construct, return true if it
418 can be turned into a gridified HSA kernel. Otherwise return false. GRID
419 describes hitherto discovered properties of the loop that is evaluated for
420 possible gridification. */
422 static bool
423 grid_dist_follows_simple_pattern (gomp_for *dist, grid_prop *grid)
425 dump_user_location_t tloc = grid->target_loc;
426 gimple *stmt = grid_find_single_omp_among_assignments (gimple_omp_body (dist),
427 grid, "distribute");
428 gomp_parallel *par;
429 if (!stmt
430 || !(par = dyn_cast <gomp_parallel *> (stmt))
431 || !grid_parallel_clauses_gridifiable (par, tloc))
432 return false;
434 stmt = grid_find_single_omp_among_assignments (gimple_omp_body (par), grid,
435 "parallel");
436 gomp_for *gfor;
437 if (!stmt || !(gfor = dyn_cast <gomp_for *> (stmt)))
438 return false;
440 if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR)
442 if (dump_enabled_p ())
443 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
444 GRID_MISSED_MSG_PREFIX "the inner loop is not "
445 "a simple for loop\n");
446 return false;
448 gcc_assert (gimple_omp_for_collapse (gfor) == grid->collapse);
450 if (!grid_inner_loop_gridifiable_p (gfor, grid))
451 return false;
453 return true;
456 /* Given an omp loop statement GFOR, return true if it can participate in
457 tiling gridification, i.e. in one where the distribute and parallel for
458 loops do not form a compound statement. GRID describes hitherto discovered
459 properties of the loop that is evaluated for possible gridification. */
461 static bool
462 grid_gfor_follows_tiling_pattern (gomp_for *gfor, grid_prop *grid)
464 if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR)
466 if (dump_enabled_p ())
468 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
469 GRID_MISSED_MSG_PREFIX "an inner loop is not "
470 "a simple for loop\n");
471 dump_printf_loc (MSG_NOTE, gfor,
472 "This statement is not a simple for loop\n");
474 return false;
477 if (!grid_inner_loop_gridifiable_p (gfor, grid))
478 return false;
480 if (gimple_omp_for_collapse (gfor) != grid->collapse)
482 if (dump_enabled_p ())
484 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
485 GRID_MISSED_MSG_PREFIX "an inner loop does not "
486 "have use the same collapse clause\n");
487 dump_printf_loc (MSG_NOTE, gfor,
488 "Loop construct uses a different collapse clause\n");
490 return false;
493 struct omp_for_data fd;
494 struct omp_for_data_loop *loops
495 = (struct omp_for_data_loop *)alloca (grid->collapse
496 * sizeof (struct omp_for_data_loop));
497 omp_extract_for_data (gfor, &fd, loops);
498 for (unsigned i = 0; i < grid->collapse; i++)
500 tree itype, type = TREE_TYPE (fd.loops[i].v);
501 if (POINTER_TYPE_P (type))
502 itype = signed_type_for (type);
503 else
504 itype = type;
506 tree n1 = fold_convert (itype, fd.loops[i].n1);
507 tree n2 = fold_convert (itype, fd.loops[i].n2);
508 tree t = build_int_cst (itype,
509 (fd.loops[i].cond_code == LT_EXPR ? -1 : 1));
510 t = fold_build2 (PLUS_EXPR, itype, fd.loops[i].step, t);
511 t = fold_build2 (PLUS_EXPR, itype, t, n2);
512 t = fold_build2 (MINUS_EXPR, itype, t, n1);
513 if (TYPE_UNSIGNED (itype) && fd.loops[i].cond_code == GT_EXPR)
514 t = fold_build2 (TRUNC_DIV_EXPR, itype,
515 fold_build1 (NEGATE_EXPR, itype, t),
516 fold_build1 (NEGATE_EXPR, itype, fd.loops[i].step));
517 else
518 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, fd.loops[i].step);
520 if (!operand_equal_p (grid->group_sizes[i], t, 0))
522 if (dump_enabled_p ())
524 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
525 GRID_MISSED_MSG_PREFIX "the distribute and "
526 "an internal loop do not agree on tile size\n");
527 dump_printf_loc (MSG_NOTE, gfor,
528 "Loop construct does not seem to loop over "
529 "a tile size\n");
531 return false;
534 return true;
537 /* Facing a call to FNDECL in the body of a distribute construct, return true
538 if we can handle it or false if it precludes gridification. */
540 static bool
541 grid_call_permissible_in_distribute_p (tree fndecl)
543 if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl))
544 return true;
546 const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl));
547 if (strstr (name, "omp_") != name)
548 return false;
550 if ((strcmp (name, "omp_get_thread_num") == 0)
551 || (strcmp (name, "omp_get_num_threads") == 0)
552 || (strcmp (name, "omp_get_num_teams") == 0)
553 || (strcmp (name, "omp_get_team_num") == 0)
554 || (strcmp (name, "omp_get_level") == 0)
555 || (strcmp (name, "omp_get_active_level") == 0)
556 || (strcmp (name, "omp_in_parallel") == 0))
557 return true;
559 return false;
562 /* Facing a call satisfying grid_call_permissible_in_distribute_p in the body
563 of a distribute construct that is pointed at by GSI, modify it as necessary
564 for gridification. If the statement itself got removed, return true. */
566 static bool
567 grid_handle_call_in_distribute (gimple_stmt_iterator *gsi)
569 gimple *stmt = gsi_stmt (*gsi);
570 tree fndecl = gimple_call_fndecl (stmt);
571 gcc_checking_assert (stmt);
572 if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl))
573 return false;
575 const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl));
576 if ((strcmp (name, "omp_get_thread_num") == 0)
577 || (strcmp (name, "omp_get_level") == 0)
578 || (strcmp (name, "omp_get_active_level") == 0)
579 || (strcmp (name, "omp_in_parallel") == 0))
581 tree lhs = gimple_call_lhs (stmt);
582 if (lhs)
584 gassign *assign
585 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
586 gsi_insert_before (gsi, assign, GSI_SAME_STMT);
588 gsi_remove (gsi, true);
589 return true;
592 /* The rest of the omp functions can stay as they are, HSA back-end will
593 handle them correctly. */
594 gcc_checking_assert ((strcmp (name, "omp_get_num_threads") == 0)
595 || (strcmp (name, "omp_get_num_teams") == 0)
596 || (strcmp (name, "omp_get_team_num") == 0));
597 return false;
600 /* Given a sequence of statements within a distribute omp construct or a
601 parallel construct, which in the original source does not form a compound
602 construct with a looping construct, return true if it does not prevent us
603 from turning it into a gridified HSA kernel. Otherwise return false. GRID
604 describes hitherto discovered properties of the loop that is evaluated for
605 possible gridification. IN_PARALLEL must be true if seq is within a
606 parallel construct and flase if it is only within a distribute
607 construct. */
609 static bool
610 grid_dist_follows_tiling_pattern (gimple_seq seq, grid_prop *grid,
611 bool in_parallel)
613 gimple_stmt_iterator gsi;
614 for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
616 gimple *stmt = gsi_stmt (gsi);
618 if (grid_safe_assignment_p (stmt, grid)
619 || gimple_code (stmt) == GIMPLE_GOTO
620 || gimple_code (stmt) == GIMPLE_LABEL
621 || gimple_code (stmt) == GIMPLE_COND)
622 continue;
623 else if (gbind *bind = dyn_cast <gbind *> (stmt))
625 if (!grid_dist_follows_tiling_pattern (gimple_bind_body (bind),
626 grid, in_parallel))
627 return false;
628 continue;
630 else if (gtry *try_stmt = dyn_cast <gtry *> (stmt))
632 if (gimple_try_kind (try_stmt) == GIMPLE_TRY_CATCH)
634 if (dump_enabled_p ())
636 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
637 GRID_MISSED_MSG_PREFIX "the distribute "
638 "construct contains a try..catch region\n");
639 dump_printf_loc (MSG_NOTE, try_stmt,
640 "This statement cannot be analyzed for "
641 "tiled gridification\n");
643 return false;
645 if (!grid_dist_follows_tiling_pattern (gimple_try_eval (try_stmt),
646 grid, in_parallel))
647 return false;
648 if (!grid_dist_follows_tiling_pattern (gimple_try_cleanup (try_stmt),
649 grid, in_parallel))
650 return false;
651 continue;
653 else if (is_gimple_call (stmt))
655 tree fndecl = gimple_call_fndecl (stmt);
656 if (fndecl && grid_call_permissible_in_distribute_p (fndecl))
657 continue;
659 if (dump_enabled_p ())
661 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
662 GRID_MISSED_MSG_PREFIX "the distribute "
663 "construct contains a call\n");
664 dump_printf_loc (MSG_NOTE, stmt,
665 "This statement cannot be analyzed for "
666 "tiled gridification\n");
668 return false;
670 else if (gomp_parallel *par = dyn_cast <gomp_parallel *> (stmt))
672 if (in_parallel)
674 if (dump_enabled_p ())
676 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
677 GRID_MISSED_MSG_PREFIX "a parallel "
678 "construct contains another parallel "
679 "construct\n");
680 dump_printf_loc (MSG_NOTE, stmt,
681 "This parallel construct is nested in "
682 "another one\n");
684 return false;
686 if (!grid_parallel_clauses_gridifiable (par, grid->target_loc)
687 || !grid_dist_follows_tiling_pattern (gimple_omp_body (par),
688 grid, true))
689 return false;
691 else if (gomp_for *gfor = dyn_cast <gomp_for *> (stmt))
693 if (!in_parallel)
695 if (dump_enabled_p ())
697 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
698 GRID_MISSED_MSG_PREFIX "a loop "
699 "construct is not nested within a parallel "
700 "construct\n");
701 dump_printf_loc (MSG_NOTE, stmt,
702 "This loop construct is not nested in "
703 "a parallel construct\n");
705 return false;
707 if (!grid_gfor_follows_tiling_pattern (gfor, grid))
708 return false;
710 else
712 if (dump_enabled_p ())
714 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
715 GRID_MISSED_MSG_PREFIX "the distribute "
716 "construct contains a complex statement\n");
717 dump_printf_loc (MSG_NOTE, stmt,
718 "This statement cannot be analyzed for "
719 "tiled gridification\n");
721 return false;
724 return true;
727 /* If TARGET follows a pattern that can be turned into a gridified HSA kernel,
728 return true, otherwise return false. In the case of success, also fill in
729 GRID with information describing the kernel grid. */
731 static bool
732 grid_target_follows_gridifiable_pattern (gomp_target *target, grid_prop *grid)
734 if (gimple_omp_target_kind (target) != GF_OMP_TARGET_KIND_REGION)
735 return false;
737 dump_user_location_t tloc = target;
738 grid->target_loc = tloc;
739 gimple *stmt
740 = grid_find_single_omp_among_assignments (gimple_omp_body (target),
741 grid, "target");
742 if (!stmt)
743 return false;
744 gomp_teams *teams = dyn_cast <gomp_teams *> (stmt);
745 tree group_size = NULL;
746 if (!teams)
748 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
749 GRID_MISSED_MSG_PREFIX "it does not have a sole teams "
750 "construct in it.\n");
751 return false;
754 tree clauses = gimple_omp_teams_clauses (teams);
755 while (clauses)
757 switch (OMP_CLAUSE_CODE (clauses))
759 case OMP_CLAUSE_NUM_TEAMS:
760 if (dump_enabled_p ())
761 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
762 GRID_MISSED_MSG_PREFIX "the teams construct "
763 "contains a num_teams clause\n ");
764 return false;
766 case OMP_CLAUSE_REDUCTION:
767 if (dump_enabled_p ())
768 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
769 GRID_MISSED_MSG_PREFIX "a reduction "
770 "clause is present\n ");
771 return false;
773 case OMP_CLAUSE_THREAD_LIMIT:
774 if (!integer_zerop (OMP_CLAUSE_OPERAND (clauses, 0)))
775 group_size = OMP_CLAUSE_OPERAND (clauses, 0);
776 break;
778 default:
779 break;
781 clauses = OMP_CLAUSE_CHAIN (clauses);
784 stmt = grid_find_single_omp_among_assignments (gimple_omp_body (teams), grid,
785 "teams");
786 if (!stmt)
787 return false;
788 gomp_for *dist = dyn_cast <gomp_for *> (stmt);
789 if (!dist)
791 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
792 GRID_MISSED_MSG_PREFIX "the teams construct does not "
793 "have a single distribute construct in it.\n");
794 return false;
797 gcc_assert (gimple_omp_for_kind (dist) == GF_OMP_FOR_KIND_DISTRIBUTE);
799 grid->collapse = gimple_omp_for_collapse (dist);
800 if (grid->collapse > 3)
802 if (dump_enabled_p ())
803 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
804 GRID_MISSED_MSG_PREFIX "the distribute construct "
805 "contains collapse clause with parameter greater "
806 "than 3\n");
807 return false;
810 struct omp_for_data fd;
811 struct omp_for_data_loop *dist_loops
812 = (struct omp_for_data_loop *)alloca (grid->collapse
813 * sizeof (struct omp_for_data_loop));
814 omp_extract_for_data (dist, &fd, dist_loops);
815 if (fd.chunk_size)
817 if (group_size && !operand_equal_p (group_size, fd.chunk_size, 0))
819 if (dump_enabled_p ())
820 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
821 GRID_MISSED_MSG_PREFIX "the teams "
822 "thread limit is different from distribute "
823 "schedule chunk\n");
824 return false;
826 group_size = fd.chunk_size;
828 if (group_size && grid->collapse > 1)
830 if (dump_enabled_p ())
831 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
832 GRID_MISSED_MSG_PREFIX "group size cannot be "
833 "set using thread_limit or schedule clauses "
834 "when also using a collapse clause greater than 1\n");
835 return false;
838 if (gimple_omp_for_combined_p (dist))
840 grid->tiling = false;
841 grid->group_sizes[0] = group_size;
842 for (unsigned i = 1; i < grid->collapse; i++)
843 grid->group_sizes[i] = NULL;
844 return grid_dist_follows_simple_pattern (dist, grid);
846 else
848 grid->tiling = true;
849 if (group_size)
851 if (dump_enabled_p ())
852 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
853 GRID_MISSED_MSG_PREFIX "group size cannot be set "
854 "using thread_limit or schedule clauses when "
855 "distribute and loop constructs do not form "
856 "one combined construct\n");
857 return false;
859 for (unsigned i = 0; i < grid->collapse; i++)
861 if (fd.loops[i].cond_code == GT_EXPR)
862 grid->group_sizes[i] = fold_build1 (NEGATE_EXPR,
863 TREE_TYPE (fd.loops[i].step),
864 fd.loops[i].step);
865 else
866 grid->group_sizes[i] = fd.loops[i].step;
868 return grid_dist_follows_tiling_pattern (gimple_omp_body (dist), grid,
869 false);
873 /* Operand walker, used to remap pre-body declarations according to a hash map
874 provided in DATA. */
876 static tree
877 grid_remap_prebody_decls (tree *tp, int *walk_subtrees, void *data)
879 tree t = *tp;
881 if (DECL_P (t) || TYPE_P (t))
882 *walk_subtrees = 0;
883 else
884 *walk_subtrees = 1;
886 if (VAR_P (t))
888 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
889 hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info;
890 tree *repl = declmap->get (t);
891 if (repl)
892 *tp = *repl;
894 return NULL_TREE;
897 /* Identifiers of segments into which a particular variable should be places
898 when gridifying. */
900 enum grid_var_segment {GRID_SEGMENT_PRIVATE, GRID_SEGMENT_GROUP,
901 GRID_SEGMENT_GLOBAL};
903 /* Mark VAR so that it is eventually placed into SEGMENT. Place an artificial
904 builtin call into SEQ that will make sure the variable is always considered
905 address taken. */
907 static void
908 grid_mark_variable_segment (tree var, enum grid_var_segment segment)
910 /* Making a non-addressable variables would require that we re-gimplify all
911 their uses. Fortunately, we do not have to do this because if they are
912 not addressable, it means they are not used in atomic or parallel
913 statements and so relaxed GPU consistency rules mean we can just keep them
914 private. */
915 if (!TREE_ADDRESSABLE (var))
916 return;
918 switch (segment)
920 case GRID_SEGMENT_GROUP:
921 DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_group_segment"),
922 NULL, DECL_ATTRIBUTES (var));
923 break;
924 case GRID_SEGMENT_GLOBAL:
925 DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_global_segment"),
926 NULL, DECL_ATTRIBUTES (var));
927 break;
928 default:
929 gcc_unreachable ();
932 if (!TREE_STATIC (var))
934 TREE_STATIC (var) = 1;
935 varpool_node::finalize_decl (var);
940 /* Copy leading register-type assignments to local variables in SRC to just
941 before DST, Creating temporaries, adjusting mapping of operands in WI and
942 remapping operands as necessary. Add any new temporaries to TGT_BIND.
943 Return the first statement that does not conform to grid_safe_assignment_p
944 or NULL. If VAR_SEGMENT is not GRID_SEGMENT_PRIVATE, also mark all
945 variables in traversed bind statements so that they are put into the
946 appropriate segment. */
948 static gimple *
949 grid_copy_leading_local_assignments (gimple_seq src, gimple_stmt_iterator *dst,
950 gbind *tgt_bind,
951 enum grid_var_segment var_segment,
952 struct walk_stmt_info *wi)
954 hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info;
955 gimple_stmt_iterator gsi;
956 for (gsi = gsi_start (src); !gsi_end_p (gsi); gsi_next (&gsi))
958 gimple *stmt = gsi_stmt (gsi);
959 if (gbind *bind = dyn_cast <gbind *> (stmt))
961 gimple *r = grid_copy_leading_local_assignments
962 (gimple_bind_body (bind), dst, tgt_bind, var_segment, wi);
964 if (var_segment != GRID_SEGMENT_PRIVATE)
965 for (tree var = gimple_bind_vars (bind);
966 var;
967 var = DECL_CHAIN (var))
968 grid_mark_variable_segment (var, var_segment);
969 if (r)
970 return r;
971 else
972 continue;
974 if (!grid_safe_assignment_p (stmt, NULL))
975 return stmt;
976 tree lhs = gimple_assign_lhs (as_a <gassign *> (stmt));
977 tree repl = copy_var_decl (lhs, create_tmp_var_name (NULL),
978 TREE_TYPE (lhs));
979 DECL_CONTEXT (repl) = current_function_decl;
980 gimple_bind_append_vars (tgt_bind, repl);
982 declmap->put (lhs, repl);
983 gassign *copy = as_a <gassign *> (gimple_copy (stmt));
984 walk_gimple_op (copy, grid_remap_prebody_decls, wi);
985 gsi_insert_before (dst, copy, GSI_SAME_STMT);
987 return NULL;
990 /* Statement walker function to make adjustments to statements within the
991 gridifed kernel copy. */
993 static tree
994 grid_process_grid_body (gimple_stmt_iterator *gsi, bool *handled_ops_p,
995 struct walk_stmt_info *)
997 *handled_ops_p = false;
998 gimple *stmt = gsi_stmt (*gsi);
999 if (gimple_code (stmt) == GIMPLE_OMP_FOR
1000 && (gimple_omp_for_kind (stmt) & GF_OMP_FOR_SIMD))
1002 gomp_for *loop = as_a <gomp_for *> (stmt);
1003 tree clauses = gimple_omp_for_clauses (loop);
1004 tree cl = omp_find_clause (clauses, OMP_CLAUSE_SAFELEN);
1005 if (cl)
1006 OMP_CLAUSE_SAFELEN_EXPR (cl) = integer_one_node;
1007 else
1009 tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_SAFELEN);
1010 OMP_CLAUSE_SAFELEN_EXPR (c) = integer_one_node;
1011 OMP_CLAUSE_CHAIN (c) = clauses;
1012 gimple_omp_for_set_clauses (loop, c);
1015 return NULL_TREE;
1018 /* Given a PARLOOP that is a normal for looping construct but also a part of a
1019 combined construct with a simd loop, eliminate the simd loop. */
1021 static void
1022 grid_eliminate_combined_simd_part (gomp_for *parloop)
1024 struct walk_stmt_info wi;
1026 memset (&wi, 0, sizeof (wi));
1027 wi.val_only = true;
1028 enum gf_mask msk = GF_OMP_FOR_SIMD;
1029 wi.info = (void *) &msk;
1030 walk_gimple_seq (gimple_omp_body (parloop), omp_find_combined_for, NULL, &wi);
1031 gimple *stmt = (gimple *) wi.info;
1032 /* We expect that the SIMD id the only statement in the parallel loop. */
1033 gcc_assert (stmt
1034 && gimple_code (stmt) == GIMPLE_OMP_FOR
1035 && (gimple_omp_for_kind (stmt) == GF_OMP_FOR_SIMD)
1036 && gimple_omp_for_combined_into_p (stmt)
1037 && !gimple_omp_for_combined_p (stmt));
1038 gomp_for *simd = as_a <gomp_for *> (stmt);
1040 /* Copy over the iteration properties because the body refers to the index in
1041 the bottmom-most loop. */
1042 unsigned i, collapse = gimple_omp_for_collapse (parloop);
1043 gcc_checking_assert (collapse == gimple_omp_for_collapse (simd));
1044 for (i = 0; i < collapse; i++)
1046 gimple_omp_for_set_index (parloop, i, gimple_omp_for_index (simd, i));
1047 gimple_omp_for_set_initial (parloop, i, gimple_omp_for_initial (simd, i));
1048 gimple_omp_for_set_final (parloop, i, gimple_omp_for_final (simd, i));
1049 gimple_omp_for_set_incr (parloop, i, gimple_omp_for_incr (simd, i));
1052 tree *tgt= gimple_omp_for_clauses_ptr (parloop);
1053 while (*tgt)
1054 tgt = &OMP_CLAUSE_CHAIN (*tgt);
1056 /* Copy over all clauses, except for linaer clauses, which are turned into
1057 private clauses, and all other simd-specificl clauses, which are
1058 ignored. */
1059 tree *pc = gimple_omp_for_clauses_ptr (simd);
1060 while (*pc)
1062 tree c = *pc;
1063 switch (TREE_CODE (c))
1065 case OMP_CLAUSE_LINEAR:
1067 tree priv = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_PRIVATE);
1068 OMP_CLAUSE_DECL (priv) = OMP_CLAUSE_DECL (c);
1069 OMP_CLAUSE_CHAIN (priv) = NULL;
1070 *tgt = priv;
1071 tgt = &OMP_CLAUSE_CHAIN (priv);
1072 pc = &OMP_CLAUSE_CHAIN (c);
1073 break;
1076 case OMP_CLAUSE_SAFELEN:
1077 case OMP_CLAUSE_SIMDLEN:
1078 case OMP_CLAUSE_ALIGNED:
1079 pc = &OMP_CLAUSE_CHAIN (c);
1080 break;
1082 default:
1083 *pc = OMP_CLAUSE_CHAIN (c);
1084 OMP_CLAUSE_CHAIN (c) = NULL;
1085 *tgt = c;
1086 tgt = &OMP_CLAUSE_CHAIN(c);
1087 break;
1091 /* Finally, throw away the simd and mark the parallel loop as not
1092 combined. */
1093 gimple_omp_set_body (parloop, gimple_omp_body (simd));
1094 gimple_omp_for_set_combined_p (parloop, false);
1097 /* Statement walker function marking all parallels as grid_phony and loops as
1098 grid ones representing threads of a particular thread group. */
1100 static tree
1101 grid_mark_tiling_loops (gimple_stmt_iterator *gsi, bool *handled_ops_p,
1102 struct walk_stmt_info *wi_in)
1104 *handled_ops_p = false;
1105 if (gomp_for *loop = dyn_cast <gomp_for *> (gsi_stmt (*gsi)))
1107 *handled_ops_p = true;
1108 gimple_omp_for_set_kind (loop, GF_OMP_FOR_KIND_GRID_LOOP);
1109 gimple_omp_for_set_grid_intra_group (loop, true);
1110 if (gimple_omp_for_combined_p (loop))
1111 grid_eliminate_combined_simd_part (loop);
1113 struct walk_stmt_info body_wi;
1114 memset (&body_wi, 0, sizeof (body_wi));
1115 walk_gimple_seq_mod (gimple_omp_body_ptr (loop),
1116 grid_process_grid_body, NULL, &body_wi);
1118 gbind *bind = (gbind *) wi_in->info;
1119 tree c;
1120 for (c = gimple_omp_for_clauses (loop); c; c = OMP_CLAUSE_CHAIN (c))
1121 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE)
1123 push_gimplify_context ();
1124 tree ov = OMP_CLAUSE_DECL (c);
1125 tree gv = copy_var_decl (ov, create_tmp_var_name (NULL),
1126 TREE_TYPE (ov));
1128 grid_mark_variable_segment (gv, GRID_SEGMENT_GROUP);
1129 DECL_CONTEXT (gv) = current_function_decl;
1130 gimple_bind_append_vars (bind, gv);
1131 tree x = lang_hooks.decls.omp_clause_assign_op (c, gv, ov);
1132 gimplify_and_add (x, &OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c));
1133 x = lang_hooks.decls.omp_clause_copy_ctor (c, ov, gv);
1134 gimple_seq l = NULL;
1135 gimplify_and_add (x, &l);
1136 gsi_insert_seq_after (gsi, l, GSI_SAME_STMT);
1137 pop_gimplify_context (bind);
1140 return NULL_TREE;
1143 /* Statement walker function marking all parallels as grid_phony and loops as
1144 grid ones representing threads of a particular thread group. */
1146 static tree
1147 grid_mark_tiling_parallels_and_loops (gimple_stmt_iterator *gsi,
1148 bool *handled_ops_p,
1149 struct walk_stmt_info *wi_in)
1151 *handled_ops_p = false;
1152 wi_in->removed_stmt = false;
1153 gimple *stmt = gsi_stmt (*gsi);
1154 if (gbind *bind = dyn_cast <gbind *> (stmt))
1156 for (tree var = gimple_bind_vars (bind); var; var = DECL_CHAIN (var))
1157 grid_mark_variable_segment (var, GRID_SEGMENT_GROUP);
1159 else if (gomp_parallel *parallel = dyn_cast <gomp_parallel *> (stmt))
1161 *handled_ops_p = true;
1162 gimple_omp_parallel_set_grid_phony (parallel, true);
1164 gbind *new_bind = gimple_build_bind (NULL, NULL, make_node (BLOCK));
1165 gimple_bind_set_body (new_bind, gimple_omp_body (parallel));
1166 gimple_seq s = NULL;
1167 gimple_seq_add_stmt (&s, new_bind);
1168 gimple_omp_set_body (parallel, s);
1170 struct walk_stmt_info wi_par;
1171 memset (&wi_par, 0, sizeof (wi_par));
1172 wi_par.info = new_bind;
1173 walk_gimple_seq_mod (gimple_bind_body_ptr (new_bind),
1174 grid_mark_tiling_loops, NULL, &wi_par);
1176 else if (is_a <gcall *> (stmt))
1177 wi_in->removed_stmt = grid_handle_call_in_distribute (gsi);
1178 return NULL_TREE;
1181 /* Given freshly copied top level kernel SEQ, identify the individual OMP
1182 components, mark them as part of kernel, copy assignment leading to them
1183 just before DST, remapping them using WI and adding new temporaries to
1184 TGT_BIND, and and return the loop that will be used for kernel dispatch. */
1186 static gomp_for *
1187 grid_process_kernel_body_copy (grid_prop *grid, gimple_seq seq,
1188 gimple_stmt_iterator *dst,
1189 gbind *tgt_bind, struct walk_stmt_info *wi)
1191 gimple *stmt = grid_copy_leading_local_assignments (seq, dst, tgt_bind,
1192 GRID_SEGMENT_GLOBAL, wi);
1193 gomp_teams *teams = dyn_cast <gomp_teams *> (stmt);
1194 gcc_assert (teams);
1195 gimple_omp_teams_set_grid_phony (teams, true);
1196 stmt = grid_copy_leading_local_assignments (gimple_omp_body (teams), dst,
1197 tgt_bind, GRID_SEGMENT_GLOBAL,
1198 wi);
1199 gcc_checking_assert (stmt);
1200 gomp_for *dist = dyn_cast <gomp_for *> (stmt);
1201 gcc_assert (dist);
1202 gimple_seq prebody = gimple_omp_for_pre_body (dist);
1203 if (prebody)
1204 grid_copy_leading_local_assignments (prebody, dst, tgt_bind,
1205 GRID_SEGMENT_GROUP, wi);
1207 if (grid->tiling)
1209 gimple_omp_for_set_kind (dist, GF_OMP_FOR_KIND_GRID_LOOP);
1210 gimple_omp_for_set_grid_group_iter (dist, true);
1212 struct walk_stmt_info wi_tiled;
1213 memset (&wi_tiled, 0, sizeof (wi_tiled));
1214 walk_gimple_seq_mod (gimple_omp_body_ptr (dist),
1215 grid_mark_tiling_parallels_and_loops, NULL,
1216 &wi_tiled);
1217 return dist;
1219 else
1221 gimple_omp_for_set_grid_phony (dist, true);
1222 stmt = grid_copy_leading_local_assignments (gimple_omp_body (dist), dst,
1223 tgt_bind,
1224 GRID_SEGMENT_PRIVATE, wi);
1225 gcc_checking_assert (stmt);
1226 gomp_parallel *parallel = as_a <gomp_parallel *> (stmt);
1227 gimple_omp_parallel_set_grid_phony (parallel, true);
1228 stmt = grid_copy_leading_local_assignments (gimple_omp_body (parallel),
1229 dst, tgt_bind,
1230 GRID_SEGMENT_PRIVATE, wi);
1231 gomp_for *inner_loop = as_a <gomp_for *> (stmt);
1232 gimple_omp_for_set_kind (inner_loop, GF_OMP_FOR_KIND_GRID_LOOP);
1233 prebody = gimple_omp_for_pre_body (inner_loop);
1234 if (prebody)
1235 grid_copy_leading_local_assignments (prebody, dst, tgt_bind,
1236 GRID_SEGMENT_PRIVATE, wi);
1238 if (gimple_omp_for_combined_p (inner_loop))
1239 grid_eliminate_combined_simd_part (inner_loop);
1240 struct walk_stmt_info body_wi;
1241 memset (&body_wi, 0, sizeof (body_wi));
1242 walk_gimple_seq_mod (gimple_omp_body_ptr (inner_loop),
1243 grid_process_grid_body, NULL, &body_wi);
1245 return inner_loop;
1249 /* If TARGET points to a GOMP_TARGET which follows a gridifiable pattern,
1250 create a GPU kernel for it. GSI must point to the same statement, TGT_BIND
1251 is the bind into which temporaries inserted before TARGET should be
1252 added. */
1254 static void
1255 grid_attempt_target_gridification (gomp_target *target,
1256 gimple_stmt_iterator *gsi,
1257 gbind *tgt_bind)
1259 /* removed group_size */
1260 grid_prop grid = {};
1261 if (!target || !grid_target_follows_gridifiable_pattern (target, &grid))
1262 return;
1264 location_t loc = gimple_location (target);
1265 if (dump_enabled_p ())
1266 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, target,
1267 "Target construct will be turned into a gridified HSA "
1268 "kernel\n");
1270 /* Copy target body to a GPUKERNEL construct: */
1271 gimple_seq kernel_seq = copy_gimple_seq_and_replace_locals
1272 (gimple_omp_body (target));
1274 hash_map<tree, tree> *declmap = new hash_map<tree, tree>;
1275 struct walk_stmt_info wi;
1276 memset (&wi, 0, sizeof (struct walk_stmt_info));
1277 wi.info = declmap;
1279 /* Copy assignments in between OMP statements before target, mark OMP
1280 statements within copy appropriately. */
1281 gomp_for *inner_loop = grid_process_kernel_body_copy (&grid, kernel_seq, gsi,
1282 tgt_bind, &wi);
1284 gbind *old_bind
1285 = as_a <gbind *> (gimple_seq_first (gimple_omp_body (target)));
1286 gbind *new_bind = as_a <gbind *> (gimple_seq_first (kernel_seq));
1287 tree new_block = gimple_bind_block (new_bind);
1288 tree enc_block = BLOCK_SUPERCONTEXT (gimple_bind_block (old_bind));
1289 BLOCK_CHAIN (new_block) = BLOCK_SUBBLOCKS (enc_block);
1290 BLOCK_SUBBLOCKS (enc_block) = new_block;
1291 BLOCK_SUPERCONTEXT (new_block) = enc_block;
1292 gimple *gpukernel = gimple_build_omp_grid_body (kernel_seq);
1293 gimple_seq_add_stmt
1294 (gimple_bind_body_ptr (as_a <gbind *> (gimple_omp_body (target))),
1295 gpukernel);
1297 for (size_t i = 0; i < grid.collapse; i++)
1298 walk_tree (&grid.group_sizes[i], grid_remap_prebody_decls, &wi, NULL);
1299 push_gimplify_context ();
1300 for (size_t i = 0; i < grid.collapse; i++)
1302 tree itype, type = TREE_TYPE (gimple_omp_for_index (inner_loop, i));
1303 if (POINTER_TYPE_P (type))
1304 itype = signed_type_for (type);
1305 else
1306 itype = type;
1308 enum tree_code cond_code = gimple_omp_for_cond (inner_loop, i);
1309 tree n1 = unshare_expr (gimple_omp_for_initial (inner_loop, i));
1310 walk_tree (&n1, grid_remap_prebody_decls, &wi, NULL);
1311 tree n2 = unshare_expr (gimple_omp_for_final (inner_loop, i));
1312 walk_tree (&n2, grid_remap_prebody_decls, &wi, NULL);
1313 omp_adjust_for_condition (loc, &cond_code, &n2);
1314 n1 = fold_convert (itype, n1);
1315 n2 = fold_convert (itype, n2);
1317 tree cond = fold_build2 (cond_code, boolean_type_node, n1, n2);
1318 tree step
1319 = omp_get_for_step_from_incr (loc, gimple_omp_for_incr (inner_loop, i));
1321 tree t = build_int_cst (itype, (cond_code == LT_EXPR ? -1 : 1));
1322 t = fold_build2 (PLUS_EXPR, itype, step, t);
1323 t = fold_build2 (PLUS_EXPR, itype, t, n2);
1324 t = fold_build2 (MINUS_EXPR, itype, t, n1);
1325 if (TYPE_UNSIGNED (itype) && cond_code == GT_EXPR)
1326 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1327 fold_build1 (NEGATE_EXPR, itype, t),
1328 fold_build1 (NEGATE_EXPR, itype, step));
1329 else
1330 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
1331 t = fold_build3 (COND_EXPR, itype, cond, t, build_zero_cst (itype));
1332 if (grid.tiling)
1334 if (cond_code == GT_EXPR)
1335 step = fold_build1 (NEGATE_EXPR, itype, step);
1336 t = fold_build2 (MULT_EXPR, itype, t, step);
1339 tree gs = fold_convert (uint32_type_node, t);
1340 gimple_seq tmpseq = NULL;
1341 gimplify_expr (&gs, &tmpseq, NULL, is_gimple_val, fb_rvalue);
1342 if (!gimple_seq_empty_p (tmpseq))
1343 gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT);
1345 tree ws;
1346 if (grid.group_sizes[i])
1348 ws = fold_convert (uint32_type_node, grid.group_sizes[i]);
1349 tmpseq = NULL;
1350 gimplify_expr (&ws, &tmpseq, NULL, is_gimple_val, fb_rvalue);
1351 if (!gimple_seq_empty_p (tmpseq))
1352 gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT);
1354 else
1355 ws = build_zero_cst (uint32_type_node);
1357 tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__GRIDDIM_);
1358 OMP_CLAUSE__GRIDDIM__DIMENSION (c) = i;
1359 OMP_CLAUSE__GRIDDIM__SIZE (c) = gs;
1360 OMP_CLAUSE__GRIDDIM__GROUP (c) = ws;
1361 OMP_CLAUSE_CHAIN (c) = gimple_omp_target_clauses (target);
1362 gimple_omp_target_set_clauses (target, c);
1364 pop_gimplify_context (tgt_bind);
1365 delete declmap;
1366 return;
1369 /* Walker function doing all the work for create_target_kernels. */
1371 static tree
1372 grid_gridify_all_targets_stmt (gimple_stmt_iterator *gsi,
1373 bool *handled_ops_p,
1374 struct walk_stmt_info *incoming)
1376 *handled_ops_p = false;
1378 gimple *stmt = gsi_stmt (*gsi);
1379 gomp_target *target = dyn_cast <gomp_target *> (stmt);
1380 if (target)
1382 gbind *tgt_bind = (gbind *) incoming->info;
1383 gcc_checking_assert (tgt_bind);
1384 grid_attempt_target_gridification (target, gsi, tgt_bind);
1385 return NULL_TREE;
1387 gbind *bind = dyn_cast <gbind *> (stmt);
1388 if (bind)
1390 *handled_ops_p = true;
1391 struct walk_stmt_info wi;
1392 memset (&wi, 0, sizeof (wi));
1393 wi.info = bind;
1394 walk_gimple_seq_mod (gimple_bind_body_ptr (bind),
1395 grid_gridify_all_targets_stmt, NULL, &wi);
1397 return NULL_TREE;
1400 /* Attempt to gridify all target constructs in BODY_P. All such targets will
1401 have their bodies duplicated, with the new copy being put into a
1402 gimple_omp_grid_body statement. All kernel-related construct within the
1403 grid_body will be marked with phony flags or kernel kinds. Moreover, some
1404 re-structuring is often needed, such as copying pre-bodies before the target
1405 construct so that kernel grid sizes can be computed. */
1407 void
1408 omp_grid_gridify_all_targets (gimple_seq *body_p)
1410 struct walk_stmt_info wi;
1411 memset (&wi, 0, sizeof (wi));
1412 walk_gimple_seq_mod (body_p, grid_gridify_all_targets_stmt, NULL, &wi);