gcc/omp-grid.c

   1 /* Lowering and expansion of OpenMP directives for HSA GPU agents.
   2
   3    Copyright (C) 2013-2018 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "backend.h"
  25 #include "tree.h"
  26 #include "gimple.h"
  27 #include "tree-pass.h"
  28 #include "ssa.h"
  29 #include "cgraph.h"
  30 #include "pretty-print.h"
  31 #include "fold-const.h"
  32 #include "gimplify.h"
  33 #include "gimple-iterator.h"
  34 #include "gimple-walk.h"
  35 #include "tree-inline.h"
  36 #include "langhooks.h"
  37 #include "omp-general.h"
  38 #include "omp-low.h"
  39 #include "omp-grid.h"
  40 #include "gimple-pretty-print.h"
  41
  42 /* Return the lastprivate predicate for a given gridified loop described by
  43    FD).  */
  44
  45 tree
  46 omp_grid_lastprivate_predicate (struct omp_for_data *fd)
  47 {
  48   /* When dealing with a gridified loop, we need to check up to three collapsed
  49      iteration variables but they are not actually captured in this fd.
  50      Fortunately, we can easily rely on HSA builtins to get this
  51      information.  */
  52
  53   tree id, size;
  54   if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP
  55       && gimple_omp_for_grid_intra_group (fd->for_stmt))
  56     {
  57       id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMID);
  58       size = builtin_decl_explicit (BUILT_IN_HSA_CURRENTWORKGROUPSIZE);
  59     }
  60   else
  61     {
  62       id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMABSID);
  63       size = builtin_decl_explicit (BUILT_IN_HSA_GRIDSIZE);
  64     }
  65   tree cond = NULL;
  66   for (int dim = 0; dim < fd->collapse; dim++)
  67     {
  68       tree dim_tree = build_int_cstu (unsigned_type_node, dim);
  69       tree u1 = build_int_cstu (unsigned_type_node, 1);
  70       tree c2
  71         = build2 (EQ_EXPR, boolean_type_node,
  72                   build2 (PLUS_EXPR, unsigned_type_node,
  73                           build_call_expr (id, 1, dim_tree), u1),
  74                   build_call_expr (size, 1, dim_tree));
  75       if (cond)
  76         cond = build2 (TRUTH_AND_EXPR, boolean_type_node, cond, c2);
  77       else
  78         cond = c2;
  79     }
  80   return cond;
  81 }
  82
  83 /* Structure describing the basic properties of the loop we ara analyzing
  84    whether it can be gridified and when it is gridified.  */
  85
  86 struct grid_prop
  87 {
  88   /* True when we are doing tiling gridification, i.e. when there is a distinct
  89      distribute loop over groups and a loop construct over work-items.  False
  90      when distribute and parallel for loops form a combined construct.  */
  91   bool tiling;
  92   /* Location of the target construct for optimization information
  93      messages.  */
  94   dump_user_location_t target_loc;
  95   /* The collapse clause of the involved loops.  Collapse value of all of them
  96      must be the same for gridification to take place.  */
  97   size_t collapse;
  98   /* Group sizes, if requested by the user or NULL if not requested.  */
  99   tree group_sizes[3];
 100 };
 101
 102 #define GRID_MISSED_MSG_PREFIX "Will not turn target construct into a " \
 103   "gridified HSA kernel because "
 104
 105 /* Return true if STMT is an assignment of a register-type into a local
 106    VAR_DECL.  If GRID is non-NULL, the assignment additionally must not be to
 107    any of the trees specifying group sizes there.  */
 108
 109 static bool
 110 grid_safe_assignment_p (gimple *stmt, grid_prop *grid)
 111 {
 112   gassign *assign = dyn_cast <gassign *> (stmt);
 113   if (!assign)
 114     return false;
 115   if (gimple_clobber_p (assign))
 116     return true;
 117   tree lhs = gimple_assign_lhs (assign);
 118   if (!VAR_P (lhs)
 119       || !is_gimple_reg_type (TREE_TYPE (lhs))
 120       || is_global_var (lhs))
 121     return false;
 122   if (grid)
 123     for (unsigned i = 0; i < grid->collapse; i++)
 124       if (lhs == grid->group_sizes[i])
 125         return false;
 126   return true;
 127 }
 128
 129 /* Return true if all statements in SEQ are assignments to local register-type
 130    variables that do not hold group size information.  */
 131
 132 static bool
 133 grid_seq_only_contains_local_assignments (gimple_seq seq, grid_prop *grid)
 134 {
 135   if (!seq)
 136     return true;
 137
 138   gimple_stmt_iterator gsi;
 139   for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
 140     if (!grid_safe_assignment_p (gsi_stmt (gsi), grid))
 141       return false;
 142   return true;
 143 }
 144
 145 /* Scan statements in SEQ and call itself recursively on any bind.  GRID
 146    describes hitherto discovered properties of the loop that is evaluated for
 147    possible gridification.  If during whole search only assignments to
 148    register-type local variables (that do not overwrite group size information)
 149    and one single OMP statement is encountered, return true, otherwise return
 150    false.  RET is where we store any OMP statement encountered.  */
 151
 152 static bool
 153 grid_find_single_omp_among_assignments_1 (gimple_seq seq, grid_prop *grid,
 154                                           const char *name, gimple **ret)
 155 {
 156   gimple_stmt_iterator gsi;
 157   for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
 158     {
 159       gimple *stmt = gsi_stmt (gsi);
 160
 161       if (grid_safe_assignment_p (stmt, grid))
 162         continue;
 163       if (gbind *bind = dyn_cast <gbind *> (stmt))
 164         {
 165           gimple_seq bind_body = gimple_bind_body (bind);
 166           if (!grid_find_single_omp_among_assignments_1 (bind_body, grid, name,
 167                                                          ret))
 168               return false;
 169         }
 170       else if (is_gimple_omp (stmt))
 171         {
 172           if (*ret)
 173             {
 174               if (dump_enabled_p ())
 175                 {
 176                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 177                                    GRID_MISSED_MSG_PREFIX "%s construct "
 178                                    "contains multiple OpenMP constructs\n",
 179                                    name);
 180                   dump_printf_loc (MSG_NOTE, *ret,
 181                                    "The first OpenMP construct within "
 182                                    "a parallel\n");
 183                   dump_printf_loc (MSG_NOTE, stmt,
 184                                    "The second OpenMP construct within "
 185                                    "a parallel\n");
 186                 }
 187               return false;
 188             }
 189           *ret = stmt;
 190         }
 191       else
 192         {
 193           if (dump_enabled_p ())
 194             {
 195               dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 196                                GRID_MISSED_MSG_PREFIX "%s construct contains "
 197                                "a complex statement\n", name);
 198               dump_printf_loc (MSG_NOTE, stmt,
 199                                "This statement cannot be analyzed for "
 200                                "gridification\n");
 201             }
 202           return false;
 203         }
 204     }
 205   return true;
 206 }
 207
 208 /* Scan statements in SEQ and make sure that it and any binds in it contain
 209    only assignments to local register-type variables (that do not overwrite
 210    group size information) and one OMP construct.  If so, return that
 211    construct, otherwise return NULL.  GRID describes hitherto discovered
 212    properties of the loop that is evaluated for possible gridification.  If
 213    dumping is enabled and function fails, use NAME to dump a note with the
 214    reason for failure.  */
 215
 216 static gimple *
 217 grid_find_single_omp_among_assignments (gimple_seq seq, grid_prop *grid,
 218                                         const char *name)
 219 {
 220   if (!seq)
 221     {
 222       if (dump_enabled_p ())
 223         dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 224                          GRID_MISSED_MSG_PREFIX "%s construct has empty body\n",
 225                          name);
 226       return NULL;
 227     }
 228
 229   gimple *ret = NULL;
 230   if (grid_find_single_omp_among_assignments_1 (seq, grid, name, &ret))
 231     {
 232       if (!ret && dump_enabled_p ())
 233         dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 234                          GRID_MISSED_MSG_PREFIX "%s construct does not contain"
 235                          " any other OpenMP construct\n", name);
 236       return ret;
 237     }
 238   else
 239     return NULL;
 240 }
 241
 242 /* Walker function looking for statements there is no point gridifying (and for
 243    noreturn function calls which we cannot do).  Return non-NULL if such a
 244    function is found.  */
 245
 246 static tree
 247 grid_find_ungridifiable_statement (gimple_stmt_iterator *gsi,
 248                                    bool *handled_ops_p,
 249                                    struct walk_stmt_info *wi)
 250 {
 251   *handled_ops_p = false;
 252   gimple *stmt = gsi_stmt (*gsi);
 253   switch (gimple_code (stmt))
 254     {
 255     case GIMPLE_CALL:
 256       if (gimple_call_noreturn_p (as_a <gcall *> (stmt)))
 257         {
 258           *handled_ops_p = true;
 259           wi->info = stmt;
 260           return error_mark_node;
 261         }
 262       break;
 263
 264     /* We may reduce the following list if we find a way to implement the
 265        clauses, but now there is no point trying further.  */
 266     case GIMPLE_OMP_CRITICAL:
 267     case GIMPLE_OMP_TASKGROUP:
 268     case GIMPLE_OMP_TASK:
 269     case GIMPLE_OMP_SECTION:
 270     case GIMPLE_OMP_SECTIONS:
 271     case GIMPLE_OMP_SECTIONS_SWITCH:
 272     case GIMPLE_OMP_TARGET:
 273     case GIMPLE_OMP_ORDERED:
 274       *handled_ops_p = true;
 275       wi->info = stmt;
 276       return error_mark_node;
 277     default:
 278       break;
 279     }
 280   return NULL;
 281 }
 282
 283 /* Examine clauses of omp parallel statement PAR and if any prevents
 284    gridification, issue a missed-optimization diagnostics and return false,
 285    otherwise return true.  GRID describes hitherto discovered properties of the
 286    loop that is evaluated for possible gridification.  */
 287
 288 static bool
 289 grid_parallel_clauses_gridifiable (gomp_parallel *par, dump_user_location_t tloc)
 290 {
 291   tree clauses = gimple_omp_parallel_clauses (par);
 292   while (clauses)
 293     {
 294       switch (OMP_CLAUSE_CODE (clauses))
 295         {
 296         case OMP_CLAUSE_NUM_THREADS:
 297           if (dump_enabled_p ())
 298             {
 299               dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
 300                                GRID_MISSED_MSG_PREFIX "because there is "
 301                                "a num_threads clause of the parallel "
 302                                "construct\n");
 303               dump_printf_loc (MSG_NOTE, par,
 304                                "Parallel construct has a num_threads clause\n");
 305             }
 306           return false;
 307
 308         case OMP_CLAUSE_REDUCTION:
 309           if (dump_enabled_p ())
 310             {
 311               dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
 312                                GRID_MISSED_MSG_PREFIX "a reduction clause "
 313                                "is present\n ");
 314               dump_printf_loc (MSG_NOTE, par,
 315                                "Parallel construct has a reduction clause\n");
 316             }
 317           return false;
 318
 319         default:
 320           break;
 321         }
 322       clauses = OMP_CLAUSE_CHAIN (clauses);
 323     }
 324   return true;
 325 }
 326
 327 /* Examine clauses and the body of omp loop statement GFOR and if something
 328    prevents gridification, issue a missed-optimization diagnostics and return
 329    false, otherwise return true.  GRID describes hitherto discovered properties
 330    of the loop that is evaluated for possible gridification.  */
 331
 332 static bool
 333 grid_inner_loop_gridifiable_p (gomp_for *gfor, grid_prop *grid)
 334 {
 335   if (!grid_seq_only_contains_local_assignments (gimple_omp_for_pre_body (gfor),
 336                                                  grid))
 337     {
 338       if (dump_enabled_p ())
 339         {
 340           dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 341                            GRID_MISSED_MSG_PREFIX "the inner loop "
 342                            "loop bounds computation contains a complex "
 343                            "statement\n");
 344           dump_printf_loc (MSG_NOTE, gfor,
 345                            "Loop construct cannot be analyzed for "
 346                            "gridification\n");
 347         }
 348       return false;
 349     }
 350
 351   tree clauses = gimple_omp_for_clauses (gfor);
 352   while (clauses)
 353     {
 354       switch (OMP_CLAUSE_CODE (clauses))
 355         {
 356         case OMP_CLAUSE_SCHEDULE:
 357           if (OMP_CLAUSE_SCHEDULE_KIND (clauses) != OMP_CLAUSE_SCHEDULE_AUTO)
 358             {
 359               if (dump_enabled_p ())
 360                 {
 361                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 362                                    GRID_MISSED_MSG_PREFIX "the inner loop "
 363                                    "has a non-automatic schedule clause\n");
 364                   dump_printf_loc (MSG_NOTE, gfor,
 365                                    "Loop construct has a non automatic "
 366                                    "schedule clause\n");
 367                 }
 368               return false;
 369             }
 370           break;
 371
 372         case OMP_CLAUSE_REDUCTION:
 373           if (dump_enabled_p ())
 374             {
 375               dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 376                                GRID_MISSED_MSG_PREFIX "a reduction "
 377                                "clause is present\n ");
 378               dump_printf_loc (MSG_NOTE, gfor,
 379                                "Loop construct has a reduction schedule "
 380                                "clause\n");
 381             }
 382           return false;
 383
 384         default:
 385           break;
 386         }
 387       clauses = OMP_CLAUSE_CHAIN (clauses);
 388     }
 389   struct walk_stmt_info wi;
 390   memset (&wi, 0, sizeof (wi));
 391   if (walk_gimple_seq (gimple_omp_body (gfor),
 392                        grid_find_ungridifiable_statement,
 393                        NULL, &wi))
 394     {
 395       gimple *bad = (gimple *) wi.info;
 396       if (dump_enabled_p ())
 397         {
 398           if (is_gimple_call (bad))
 399             dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 400                                GRID_MISSED_MSG_PREFIX "the inner loop contains "
 401                                "call to a noreturn function\n");
 402           else
 403             dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 404                              GRID_MISSED_MSG_PREFIX "the inner loop contains "
 405                              "statement %s which cannot be transformed\n",
 406                              gimple_code_name[(int) gimple_code (bad)]);
 407           dump_printf_loc (MSG_NOTE, bad,
 408                            "This statement cannot be analyzed for "
 409                            "gridification\n");
 410         }
 411       return false;
 412     }
 413   return true;
 414 }
 415
 416 /* Given distribute omp construct represented by DIST, which in the original
 417    source forms a compound construct with a looping construct, return true if it
 418    can be turned into a gridified HSA kernel.  Otherwise return false.  GRID
 419    describes hitherto discovered properties of the loop that is evaluated for
 420    possible gridification.  */
 421
 422 static bool
 423 grid_dist_follows_simple_pattern (gomp_for *dist, grid_prop *grid)
 424 {
 425   dump_user_location_t tloc = grid->target_loc;
 426   gimple *stmt = grid_find_single_omp_among_assignments (gimple_omp_body (dist),
 427                                                          grid, "distribute");
 428   gomp_parallel *par;
 429   if (!stmt
 430       || !(par = dyn_cast <gomp_parallel *> (stmt))
 431       || !grid_parallel_clauses_gridifiable (par, tloc))
 432     return false;
 433
 434   stmt = grid_find_single_omp_among_assignments (gimple_omp_body (par), grid,
 435                                                  "parallel");
 436   gomp_for *gfor;
 437   if (!stmt || !(gfor = dyn_cast <gomp_for *> (stmt)))
 438     return false;
 439
 440   if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR)
 441     {
 442       if (dump_enabled_p ())
 443         dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
 444                          GRID_MISSED_MSG_PREFIX "the inner loop is not "
 445                          "a simple for loop\n");
 446       return false;
 447     }
 448   gcc_assert (gimple_omp_for_collapse (gfor) == grid->collapse);
 449
 450   if (!grid_inner_loop_gridifiable_p (gfor, grid))
 451     return false;
 452
 453   return true;
 454 }
 455
 456 /* Given an omp loop statement GFOR, return true if it can participate in
 457    tiling gridification, i.e. in one where the distribute and parallel for
 458    loops do not form a compound statement.  GRID describes hitherto discovered
 459    properties of the loop that is evaluated for possible gridification.  */
 460
 461 static bool
 462 grid_gfor_follows_tiling_pattern (gomp_for *gfor, grid_prop *grid)
 463 {
 464   if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR)
 465     {
 466       if (dump_enabled_p ())
 467         {
 468           dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 469                            GRID_MISSED_MSG_PREFIX "an inner loop is not "
 470                            "a simple for loop\n");
 471           dump_printf_loc (MSG_NOTE, gfor,
 472                            "This statement is not a simple for loop\n");
 473         }
 474       return false;
 475     }
 476
 477   if (!grid_inner_loop_gridifiable_p (gfor, grid))
 478     return false;
 479
 480   if (gimple_omp_for_collapse (gfor) != grid->collapse)
 481     {
 482       if (dump_enabled_p ())
 483         {
 484           dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 485                            GRID_MISSED_MSG_PREFIX "an inner loop does not "
 486                            "have use the same collapse clause\n");
 487           dump_printf_loc (MSG_NOTE, gfor,
 488                            "Loop construct uses a different collapse clause\n");
 489         }
 490       return false;
 491     }
 492
 493   struct omp_for_data fd;
 494   struct omp_for_data_loop *loops
 495     = (struct omp_for_data_loop *)alloca (grid->collapse
 496                                           * sizeof (struct omp_for_data_loop));
 497   omp_extract_for_data (gfor, &fd, loops);
 498   for (unsigned i = 0; i < grid->collapse; i++)
 499     {
 500       tree itype, type = TREE_TYPE (fd.loops[i].v);
 501       if (POINTER_TYPE_P (type))
 502         itype = signed_type_for (type);
 503       else
 504         itype = type;
 505
 506       tree n1 = fold_convert (itype, fd.loops[i].n1);
 507       tree n2 = fold_convert (itype, fd.loops[i].n2);
 508       tree t = build_int_cst (itype,
 509                               (fd.loops[i].cond_code == LT_EXPR ? -1 : 1));
 510       t = fold_build2 (PLUS_EXPR, itype, fd.loops[i].step, t);
 511       t = fold_build2 (PLUS_EXPR, itype, t, n2);
 512       t = fold_build2 (MINUS_EXPR, itype, t, n1);
 513       if (TYPE_UNSIGNED (itype) && fd.loops[i].cond_code == GT_EXPR)
 514         t = fold_build2 (TRUNC_DIV_EXPR, itype,
 515                          fold_build1 (NEGATE_EXPR, itype, t),
 516                          fold_build1 (NEGATE_EXPR, itype, fd.loops[i].step));
 517       else
 518         t = fold_build2 (TRUNC_DIV_EXPR, itype, t, fd.loops[i].step);
 519
 520       if (!operand_equal_p (grid->group_sizes[i], t, 0))
 521         {
 522           if (dump_enabled_p ())
 523             {
 524               dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 525                                GRID_MISSED_MSG_PREFIX "the distribute and "
 526                                "an internal loop do not agree on tile size\n");
 527               dump_printf_loc (MSG_NOTE, gfor,
 528                                "Loop construct does not seem to loop over "
 529                                "a tile size\n");
 530             }
 531           return false;
 532         }
 533     }
 534   return true;
 535 }
 536
 537 /* Facing a call to FNDECL in the body of a distribute construct, return true
 538    if we can handle it or false if it precludes gridification.  */
 539
 540 static bool
 541 grid_call_permissible_in_distribute_p (tree fndecl)
 542 {
 543   if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl))
 544     return true;
 545
 546   const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl));
 547   if (strstr (name, "omp_") != name)
 548     return false;
 549
 550   if ((strcmp (name, "omp_get_thread_num") == 0)
 551       || (strcmp (name, "omp_get_num_threads") == 0)
 552       || (strcmp (name, "omp_get_num_teams") == 0)
 553       || (strcmp (name, "omp_get_team_num") == 0)
 554       || (strcmp (name, "omp_get_level") == 0)
 555       || (strcmp (name, "omp_get_active_level") == 0)
 556       || (strcmp (name, "omp_in_parallel") == 0))
 557     return true;
 558
 559   return false;
 560 }
 561
 562 /* Facing a call satisfying grid_call_permissible_in_distribute_p in the body
 563    of a distribute construct that is pointed at by GSI, modify it as necessary
 564    for gridification.  If the statement itself got removed, return true.  */
 565
 566 static bool
 567 grid_handle_call_in_distribute (gimple_stmt_iterator *gsi)
 568 {
 569   gimple *stmt = gsi_stmt (*gsi);
 570   tree fndecl = gimple_call_fndecl (stmt);
 571   gcc_checking_assert (stmt);
 572   if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl))
 573     return false;
 574
 575   const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl));
 576   if ((strcmp (name, "omp_get_thread_num") == 0)
 577       || (strcmp (name, "omp_get_level") == 0)
 578       || (strcmp (name, "omp_get_active_level") == 0)
 579       || (strcmp (name, "omp_in_parallel") == 0))
 580     {
 581       tree lhs = gimple_call_lhs (stmt);
 582       if (lhs)
 583         {
 584           gassign *assign
 585             = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
 586           gsi_insert_before (gsi, assign, GSI_SAME_STMT);
 587         }
 588       gsi_remove (gsi, true);
 589       return true;
 590     }
 591
 592   /* The rest of the omp functions can stay as they are, HSA back-end will
 593      handle them correctly.  */
 594   gcc_checking_assert ((strcmp (name, "omp_get_num_threads") == 0)
 595                        || (strcmp (name, "omp_get_num_teams") == 0)
 596                        || (strcmp (name, "omp_get_team_num") == 0));
 597   return false;
 598 }
 599
 600 /* Given a sequence of statements within a distribute omp construct or a
 601    parallel construct, which in the original source does not form a compound
 602    construct with a looping construct, return true if it does not prevent us
 603    from turning it into a gridified HSA kernel.  Otherwise return false.  GRID
 604    describes hitherto discovered properties of the loop that is evaluated for
 605    possible gridification.  IN_PARALLEL must be true if seq is within a
 606    parallel construct and flase if it is only within a distribute
 607    construct.  */
 608
 609 static bool
 610 grid_dist_follows_tiling_pattern (gimple_seq seq, grid_prop *grid,
 611                                   bool in_parallel)
 612 {
 613   gimple_stmt_iterator gsi;
 614   for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
 615     {
 616       gimple *stmt = gsi_stmt (gsi);
 617
 618       if (grid_safe_assignment_p (stmt, grid)
 619           || gimple_code (stmt) == GIMPLE_GOTO
 620           || gimple_code (stmt) == GIMPLE_LABEL
 621           || gimple_code (stmt) == GIMPLE_COND)
 622         continue;
 623       else if (gbind *bind = dyn_cast <gbind *> (stmt))
 624         {
 625           if (!grid_dist_follows_tiling_pattern (gimple_bind_body (bind),
 626                                                  grid, in_parallel))
 627             return false;
 628           continue;
 629         }
 630       else if (gtry *try_stmt = dyn_cast <gtry *> (stmt))
 631         {
 632           if (gimple_try_kind (try_stmt) == GIMPLE_TRY_CATCH)
 633             {
 634               if (dump_enabled_p ())
 635                 {
 636                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 637                                    GRID_MISSED_MSG_PREFIX "the distribute "
 638                                    "construct contains a try..catch region\n");
 639                   dump_printf_loc (MSG_NOTE, try_stmt,
 640                                    "This statement cannot be analyzed for "
 641                                    "tiled gridification\n");
 642                 }
 643               return false;
 644             }
 645           if (!grid_dist_follows_tiling_pattern (gimple_try_eval (try_stmt),
 646                                                  grid, in_parallel))
 647             return false;
 648           if (!grid_dist_follows_tiling_pattern (gimple_try_cleanup (try_stmt),
 649                                                  grid, in_parallel))
 650             return false;
 651           continue;
 652         }
 653       else if (is_gimple_call (stmt))
 654         {
 655           tree fndecl = gimple_call_fndecl (stmt);
 656           if (fndecl && grid_call_permissible_in_distribute_p (fndecl))
 657             continue;
 658
 659           if (dump_enabled_p ())
 660             {
 661               dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 662                                GRID_MISSED_MSG_PREFIX "the distribute "
 663                                "construct contains a call\n");
 664               dump_printf_loc (MSG_NOTE, stmt,
 665                                "This statement cannot be analyzed for "
 666                                "tiled gridification\n");
 667             }
 668           return false;
 669         }
 670       else if (gomp_parallel *par = dyn_cast <gomp_parallel *> (stmt))
 671         {
 672           if (in_parallel)
 673             {
 674               if (dump_enabled_p ())
 675                 {
 676                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 677                                    GRID_MISSED_MSG_PREFIX "a parallel "
 678                                    "construct contains another parallel "
 679                                    "construct\n");
 680                   dump_printf_loc (MSG_NOTE, stmt,
 681                                    "This parallel construct is nested in "
 682                                    "another one\n");
 683                 }
 684               return false;
 685             }
 686           if (!grid_parallel_clauses_gridifiable (par, grid->target_loc)
 687               || !grid_dist_follows_tiling_pattern (gimple_omp_body (par),
 688                                                     grid, true))
 689             return false;
 690         }
 691       else if (gomp_for *gfor = dyn_cast <gomp_for *> (stmt))
 692         {
 693           if (!in_parallel)
 694             {
 695               if (dump_enabled_p ())
 696                 {
 697                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 698                                    GRID_MISSED_MSG_PREFIX "a loop "
 699                                    "construct is not nested within a parallel "
 700                                    "construct\n");
 701                   dump_printf_loc (MSG_NOTE, stmt,
 702                                    "This loop construct is not nested in "
 703                                    "a parallel construct\n");
 704                 }
 705               return false;
 706             }
 707           if (!grid_gfor_follows_tiling_pattern (gfor, grid))
 708             return false;
 709         }
 710       else
 711         {
 712           if (dump_enabled_p ())
 713             {
 714               dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 715                                GRID_MISSED_MSG_PREFIX "the distribute "
 716                                "construct contains a complex statement\n");
 717               dump_printf_loc (MSG_NOTE, stmt,
 718                                "This statement cannot be analyzed for "
 719                                "tiled gridification\n");
 720             }
 721           return false;
 722         }
 723     }
 724     return true;
 725 }
 726
 727 /* If TARGET follows a pattern that can be turned into a gridified HSA kernel,
 728    return true, otherwise return false.  In the case of success, also fill in
 729    GRID with information describing the kernel grid.  */
 730
 731 static bool
 732 grid_target_follows_gridifiable_pattern (gomp_target *target, grid_prop *grid)
 733 {
 734   if (gimple_omp_target_kind (target) != GF_OMP_TARGET_KIND_REGION)
 735     return false;
 736
 737   dump_user_location_t tloc = target;
 738   grid->target_loc = tloc;
 739   gimple *stmt
 740     = grid_find_single_omp_among_assignments (gimple_omp_body (target),
 741                                               grid, "target");
 742   if (!stmt)
 743     return false;
 744   gomp_teams *teams = dyn_cast <gomp_teams *> (stmt);
 745   tree group_size = NULL;
 746   if (!teams)
 747     {
 748       dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
 749                        GRID_MISSED_MSG_PREFIX "it does not have a sole teams "
 750                        "construct in it.\n");
 751       return false;
 752     }
 753
 754   tree clauses = gimple_omp_teams_clauses (teams);
 755   while (clauses)
 756     {
 757       switch (OMP_CLAUSE_CODE (clauses))
 758         {
 759         case OMP_CLAUSE_NUM_TEAMS:
 760           if (dump_enabled_p ())
 761             dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
 762                              GRID_MISSED_MSG_PREFIX "the teams construct "
 763                              "contains a num_teams clause\n ");
 764           return false;
 765
 766         case OMP_CLAUSE_REDUCTION:
 767           if (dump_enabled_p ())
 768             dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
 769                              GRID_MISSED_MSG_PREFIX "a reduction "
 770                              "clause is present\n ");
 771           return false;
 772
 773         case OMP_CLAUSE_THREAD_LIMIT:
 774           if (!integer_zerop (OMP_CLAUSE_OPERAND (clauses, 0)))
 775             group_size = OMP_CLAUSE_OPERAND (clauses, 0);
 776           break;
 777
 778         default:
 779           break;
 780         }
 781       clauses = OMP_CLAUSE_CHAIN (clauses);
 782     }
 783
 784   stmt = grid_find_single_omp_among_assignments (gimple_omp_body (teams), grid,
 785                                                  "teams");
 786   if (!stmt)
 787     return false;
 788   gomp_for *dist = dyn_cast <gomp_for *> (stmt);
 789   if (!dist)
 790     {
 791       dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
 792                        GRID_MISSED_MSG_PREFIX "the teams construct does not "
 793                        "have a single distribute construct in it.\n");
 794       return false;
 795     }
 796
 797   gcc_assert (gimple_omp_for_kind (dist) == GF_OMP_FOR_KIND_DISTRIBUTE);
 798
 799   grid->collapse = gimple_omp_for_collapse (dist);
 800   if (grid->collapse > 3)
 801     {
 802       if (dump_enabled_p ())
 803         dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
 804                          GRID_MISSED_MSG_PREFIX "the distribute construct "
 805                          "contains collapse clause with parameter greater "
 806                          "than 3\n");
 807       return false;
 808     }
 809
 810   struct omp_for_data fd;
 811   struct omp_for_data_loop *dist_loops
 812     = (struct omp_for_data_loop *)alloca (grid->collapse
 813                                           * sizeof (struct omp_for_data_loop));
 814   omp_extract_for_data (dist, &fd, dist_loops);
 815   if (fd.chunk_size)
 816     {
 817       if (group_size && !operand_equal_p (group_size, fd.chunk_size, 0))
 818         {
 819           if (dump_enabled_p ())
 820             dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
 821                              GRID_MISSED_MSG_PREFIX "the teams "
 822                              "thread limit is different from distribute "
 823                              "schedule chunk\n");
 824           return false;
 825         }
 826       group_size = fd.chunk_size;
 827     }
 828   if (group_size && grid->collapse > 1)
 829     {
 830       if (dump_enabled_p ())
 831         dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
 832                          GRID_MISSED_MSG_PREFIX "group size cannot be "
 833                          "set using thread_limit or schedule clauses "
 834                          "when also using a collapse clause greater than 1\n");
 835       return false;
 836     }
 837
 838   if (gimple_omp_for_combined_p (dist))
 839     {
 840       grid->tiling = false;
 841       grid->group_sizes[0] = group_size;
 842       for (unsigned i = 1; i < grid->collapse; i++)
 843         grid->group_sizes[i] = NULL;
 844       return grid_dist_follows_simple_pattern (dist, grid);
 845     }
 846   else
 847     {
 848       grid->tiling = true;
 849       if (group_size)
 850         {
 851           if (dump_enabled_p ())
 852             dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
 853                              GRID_MISSED_MSG_PREFIX "group size cannot be set "
 854                              "using thread_limit or schedule clauses when "
 855                              "distribute and loop constructs do not form "
 856                              "one combined construct\n");
 857           return false;
 858         }
 859       for (unsigned i = 0; i < grid->collapse; i++)
 860         {
 861           if (fd.loops[i].cond_code == GT_EXPR)
 862             grid->group_sizes[i] = fold_build1 (NEGATE_EXPR,
 863                                                 TREE_TYPE (fd.loops[i].step),
 864                                                 fd.loops[i].step);
 865           else
 866             grid->group_sizes[i] = fd.loops[i].step;
 867         }
 868       return grid_dist_follows_tiling_pattern (gimple_omp_body (dist), grid,
 869                                                false);
 870     }
 871 }
 872
 873 /* Operand walker, used to remap pre-body declarations according to a hash map
 874    provided in DATA.  */
 875
 876 static tree
 877 grid_remap_prebody_decls (tree *tp, int *walk_subtrees, void *data)
 878 {
 879   tree t = *tp;
 880
 881   if (DECL_P (t) || TYPE_P (t))
 882     *walk_subtrees = 0;
 883   else
 884     *walk_subtrees = 1;
 885
 886   if (VAR_P (t))
 887     {
 888       struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
 889       hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info;
 890       tree *repl = declmap->get (t);
 891       if (repl)
 892         *tp = *repl;
 893     }
 894   return NULL_TREE;
 895 }
 896
 897 /* Identifiers of segments into which a particular variable should be places
 898    when gridifying.  */
 899
 900 enum grid_var_segment {GRID_SEGMENT_PRIVATE, GRID_SEGMENT_GROUP,
 901                        GRID_SEGMENT_GLOBAL};
 902
 903 /* Mark VAR so that it is eventually placed into SEGMENT.  Place an artificial
 904    builtin call into SEQ that will make sure the variable is always considered
 905    address taken.  */
 906
 907 static void
 908 grid_mark_variable_segment (tree var, enum grid_var_segment segment)
 909 {
 910   /* Making a non-addressable variables would require that we re-gimplify all
 911      their uses.  Fortunately, we do not have to do this because if they are
 912      not addressable, it means they are not used in atomic or parallel
 913      statements and so relaxed GPU consistency rules mean we can just keep them
 914      private.  */
 915   if (!TREE_ADDRESSABLE (var))
 916     return;
 917
 918   switch (segment)
 919     {
 920     case GRID_SEGMENT_GROUP:
 921       DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_group_segment"),
 922                                          NULL, DECL_ATTRIBUTES (var));
 923       break;
 924     case GRID_SEGMENT_GLOBAL:
 925       DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_global_segment"),
 926                                          NULL, DECL_ATTRIBUTES (var));
 927       break;
 928     default:
 929       gcc_unreachable ();
 930     }
 931
 932   if (!TREE_STATIC (var))
 933     {
 934       TREE_STATIC (var) = 1;
 935       varpool_node::finalize_decl (var);
 936     }
 937
 938 }
 939
 940 /* Copy leading register-type assignments to local variables in SRC to just
 941    before DST, Creating temporaries, adjusting mapping of operands in WI and
 942    remapping operands as necessary.  Add any new temporaries to TGT_BIND.
 943    Return the first statement that does not conform to grid_safe_assignment_p
 944    or NULL.  If VAR_SEGMENT is not GRID_SEGMENT_PRIVATE, also mark all
 945    variables in traversed bind statements so that they are put into the
 946    appropriate segment.  */
 947
 948 static gimple *
 949 grid_copy_leading_local_assignments (gimple_seq src, gimple_stmt_iterator *dst,
 950                                      gbind *tgt_bind,
 951                                      enum grid_var_segment var_segment,
 952                                      struct walk_stmt_info *wi)
 953 {
 954   hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info;
 955   gimple_stmt_iterator gsi;
 956   for (gsi = gsi_start (src); !gsi_end_p (gsi); gsi_next (&gsi))
 957     {
 958       gimple *stmt = gsi_stmt (gsi);
 959       if (gbind *bind = dyn_cast <gbind *> (stmt))
 960         {
 961           gimple *r = grid_copy_leading_local_assignments
 962             (gimple_bind_body (bind), dst, tgt_bind, var_segment, wi);
 963
 964           if (var_segment != GRID_SEGMENT_PRIVATE)
 965             for (tree var = gimple_bind_vars (bind);
 966                  var;
 967                  var = DECL_CHAIN (var))
 968               grid_mark_variable_segment (var, var_segment);
 969           if (r)
 970             return r;
 971           else
 972             continue;
 973         }
 974       if (!grid_safe_assignment_p (stmt, NULL))
 975         return stmt;
 976       tree lhs = gimple_assign_lhs (as_a <gassign *> (stmt));
 977       tree repl = copy_var_decl (lhs, create_tmp_var_name (NULL),
 978                                  TREE_TYPE (lhs));
 979       DECL_CONTEXT (repl) = current_function_decl;
 980       gimple_bind_append_vars (tgt_bind, repl);
 981
 982       declmap->put (lhs, repl);
 983       gassign *copy = as_a <gassign *> (gimple_copy (stmt));
 984       walk_gimple_op (copy, grid_remap_prebody_decls, wi);
 985       gsi_insert_before (dst, copy, GSI_SAME_STMT);
 986     }
 987   return NULL;
 988 }
 989
 990 /* Statement walker function to make adjustments to statements within the
 991    gridifed kernel copy.  */
 992
 993 static tree
 994 grid_process_grid_body (gimple_stmt_iterator *gsi, bool *handled_ops_p,
 995                         struct walk_stmt_info *)
 996 {
 997   *handled_ops_p = false;
 998   gimple *stmt = gsi_stmt (*gsi);
 999   if (gimple_code (stmt) == GIMPLE_OMP_FOR
1000       && (gimple_omp_for_kind (stmt) & GF_OMP_FOR_SIMD))
1001   {
1002     gomp_for *loop = as_a <gomp_for *> (stmt);
1003     tree clauses = gimple_omp_for_clauses (loop);
1004     tree cl = omp_find_clause (clauses, OMP_CLAUSE_SAFELEN);
1005     if (cl)
1006       OMP_CLAUSE_SAFELEN_EXPR (cl) = integer_one_node;
1007     else
1008       {
1009         tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_SAFELEN);
1010         OMP_CLAUSE_SAFELEN_EXPR (c) = integer_one_node;
1011         OMP_CLAUSE_CHAIN (c) = clauses;
1012         gimple_omp_for_set_clauses (loop, c);
1013       }
1014   }
1015   return NULL_TREE;
1016 }
1017
1018 /* Given a PARLOOP that is a normal for looping construct but also a part of a
1019    combined construct with a simd loop, eliminate the simd loop.  */
1020
1021 static void
1022 grid_eliminate_combined_simd_part (gomp_for *parloop)
1023 {
1024   struct walk_stmt_info wi;
1025
1026   memset (&wi, 0, sizeof (wi));
1027   wi.val_only = true;
1028   enum gf_mask msk = GF_OMP_FOR_SIMD;
1029   wi.info = (void *) &msk;
1030   walk_gimple_seq (gimple_omp_body (parloop), omp_find_combined_for, NULL, &wi);
1031   gimple *stmt = (gimple *) wi.info;
1032   /* We expect that the SIMD id the only statement in the parallel loop.  */
1033   gcc_assert (stmt
1034               && gimple_code (stmt) == GIMPLE_OMP_FOR
1035               && (gimple_omp_for_kind (stmt) == GF_OMP_FOR_SIMD)
1036               && gimple_omp_for_combined_into_p (stmt)
1037               && !gimple_omp_for_combined_p (stmt));
1038   gomp_for *simd = as_a <gomp_for *> (stmt);
1039
1040   /* Copy over the iteration properties because the body refers to the index in
1041      the bottmom-most loop.  */
1042   unsigned i, collapse = gimple_omp_for_collapse (parloop);
1043   gcc_checking_assert (collapse == gimple_omp_for_collapse (simd));
1044   for (i = 0; i < collapse; i++)
1045     {
1046       gimple_omp_for_set_index (parloop, i, gimple_omp_for_index (simd, i));
1047       gimple_omp_for_set_initial (parloop, i, gimple_omp_for_initial (simd, i));
1048       gimple_omp_for_set_final (parloop, i, gimple_omp_for_final (simd, i));
1049       gimple_omp_for_set_incr (parloop, i, gimple_omp_for_incr (simd, i));
1050     }
1051
1052   tree *tgt= gimple_omp_for_clauses_ptr (parloop);
1053   while (*tgt)
1054     tgt = &OMP_CLAUSE_CHAIN (*tgt);
1055
1056   /* Copy over all clauses, except for linaer clauses, which are turned into
1057      private clauses, and all other simd-specificl clauses, which are
1058      ignored.  */
1059   tree *pc = gimple_omp_for_clauses_ptr (simd);
1060   while (*pc)
1061     {
1062       tree c = *pc;
1063       switch (TREE_CODE (c))
1064         {
1065         case OMP_CLAUSE_LINEAR:
1066           {
1067             tree priv = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_PRIVATE);
1068             OMP_CLAUSE_DECL (priv) = OMP_CLAUSE_DECL (c);
1069             OMP_CLAUSE_CHAIN (priv) = NULL;
1070             *tgt = priv;
1071             tgt = &OMP_CLAUSE_CHAIN (priv);
1072             pc = &OMP_CLAUSE_CHAIN (c);
1073             break;
1074           }
1075
1076         case OMP_CLAUSE_SAFELEN:
1077         case OMP_CLAUSE_SIMDLEN:
1078         case OMP_CLAUSE_ALIGNED:
1079           pc = &OMP_CLAUSE_CHAIN (c);
1080           break;
1081
1082         default:
1083           *pc = OMP_CLAUSE_CHAIN (c);
1084           OMP_CLAUSE_CHAIN (c) = NULL;
1085           *tgt = c;
1086           tgt = &OMP_CLAUSE_CHAIN(c);
1087           break;
1088         }
1089     }
1090
1091   /* Finally, throw away the simd and mark the parallel loop as not
1092      combined.  */
1093   gimple_omp_set_body (parloop, gimple_omp_body (simd));
1094   gimple_omp_for_set_combined_p (parloop, false);
1095 }
1096
1097 /* Statement walker function marking all parallels as grid_phony and loops as
1098    grid ones representing threads of a particular thread group.  */
1099
1100 static tree
1101 grid_mark_tiling_loops (gimple_stmt_iterator *gsi, bool *handled_ops_p,
1102                         struct walk_stmt_info *wi_in)
1103 {
1104   *handled_ops_p = false;
1105   if (gomp_for *loop = dyn_cast <gomp_for *> (gsi_stmt (*gsi)))
1106     {
1107       *handled_ops_p = true;
1108       gimple_omp_for_set_kind (loop, GF_OMP_FOR_KIND_GRID_LOOP);
1109       gimple_omp_for_set_grid_intra_group (loop, true);
1110       if (gimple_omp_for_combined_p (loop))
1111         grid_eliminate_combined_simd_part (loop);
1112
1113       struct walk_stmt_info body_wi;
1114       memset (&body_wi, 0, sizeof (body_wi));
1115       walk_gimple_seq_mod (gimple_omp_body_ptr (loop),
1116                            grid_process_grid_body, NULL, &body_wi);
1117
1118       gbind *bind = (gbind *) wi_in->info;
1119       tree c;
1120       for (c = gimple_omp_for_clauses (loop); c; c = OMP_CLAUSE_CHAIN (c))
1121         if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE)
1122           {
1123             push_gimplify_context ();
1124             tree ov = OMP_CLAUSE_DECL (c);
1125             tree gv = copy_var_decl (ov, create_tmp_var_name (NULL),
1126                                     TREE_TYPE (ov));
1127
1128             grid_mark_variable_segment (gv, GRID_SEGMENT_GROUP);
1129             DECL_CONTEXT (gv) = current_function_decl;
1130             gimple_bind_append_vars (bind, gv);
1131             tree x = lang_hooks.decls.omp_clause_assign_op (c, gv, ov);
1132             gimplify_and_add (x, &OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c));
1133             x = lang_hooks.decls.omp_clause_copy_ctor (c, ov, gv);
1134             gimple_seq l = NULL;
1135             gimplify_and_add (x, &l);
1136             gsi_insert_seq_after (gsi, l, GSI_SAME_STMT);
1137             pop_gimplify_context (bind);
1138           }
1139     }
1140   return NULL_TREE;
1141 }
1142
1143 /* Statement walker function marking all parallels as grid_phony and loops as
1144    grid ones representing threads of a particular thread group.  */
1145
1146 static tree
1147 grid_mark_tiling_parallels_and_loops (gimple_stmt_iterator *gsi,
1148                                       bool *handled_ops_p,
1149                                       struct walk_stmt_info *wi_in)
1150 {
1151   *handled_ops_p = false;
1152   wi_in->removed_stmt = false;
1153   gimple *stmt = gsi_stmt (*gsi);
1154   if (gbind *bind = dyn_cast <gbind *> (stmt))
1155     {
1156       for (tree var = gimple_bind_vars (bind); var; var = DECL_CHAIN (var))
1157         grid_mark_variable_segment (var, GRID_SEGMENT_GROUP);
1158     }
1159   else if (gomp_parallel *parallel = dyn_cast <gomp_parallel *> (stmt))
1160     {
1161       *handled_ops_p = true;
1162       gimple_omp_parallel_set_grid_phony (parallel, true);
1163
1164       gbind *new_bind = gimple_build_bind (NULL, NULL, make_node (BLOCK));
1165       gimple_bind_set_body (new_bind, gimple_omp_body (parallel));
1166       gimple_seq s = NULL;
1167       gimple_seq_add_stmt (&s, new_bind);
1168       gimple_omp_set_body (parallel, s);
1169
1170       struct walk_stmt_info wi_par;
1171       memset (&wi_par, 0, sizeof (wi_par));
1172       wi_par.info = new_bind;
1173       walk_gimple_seq_mod (gimple_bind_body_ptr (new_bind),
1174                            grid_mark_tiling_loops, NULL, &wi_par);
1175     }
1176   else if (is_a <gcall *> (stmt))
1177     wi_in->removed_stmt = grid_handle_call_in_distribute (gsi);
1178   return NULL_TREE;
1179 }
1180
1181 /* Given freshly copied top level kernel SEQ, identify the individual OMP
1182    components, mark them as part of kernel, copy assignment leading to them
1183    just before DST, remapping them using WI and adding new temporaries to
1184    TGT_BIND, and and return the loop that will be used for kernel dispatch.  */
1185
1186 static gomp_for *
1187 grid_process_kernel_body_copy (grid_prop *grid, gimple_seq seq,
1188                                gimple_stmt_iterator *dst,
1189                                gbind *tgt_bind, struct walk_stmt_info *wi)
1190 {
1191   gimple *stmt = grid_copy_leading_local_assignments (seq, dst, tgt_bind,
1192                                                       GRID_SEGMENT_GLOBAL, wi);
1193   gomp_teams *teams = dyn_cast <gomp_teams *> (stmt);
1194   gcc_assert (teams);
1195   gimple_omp_teams_set_grid_phony (teams, true);
1196   stmt = grid_copy_leading_local_assignments (gimple_omp_body (teams), dst,
1197                                               tgt_bind, GRID_SEGMENT_GLOBAL,
1198                                               wi);
1199   gcc_checking_assert (stmt);
1200   gomp_for *dist = dyn_cast <gomp_for *> (stmt);
1201   gcc_assert (dist);
1202   gimple_seq prebody = gimple_omp_for_pre_body (dist);
1203   if (prebody)
1204     grid_copy_leading_local_assignments (prebody, dst, tgt_bind,
1205                                          GRID_SEGMENT_GROUP, wi);
1206
1207   if (grid->tiling)
1208     {
1209       gimple_omp_for_set_kind (dist, GF_OMP_FOR_KIND_GRID_LOOP);
1210       gimple_omp_for_set_grid_group_iter (dist, true);
1211
1212       struct walk_stmt_info wi_tiled;
1213       memset (&wi_tiled, 0, sizeof (wi_tiled));
1214       walk_gimple_seq_mod (gimple_omp_body_ptr (dist),
1215                            grid_mark_tiling_parallels_and_loops, NULL,
1216                            &wi_tiled);
1217       return dist;
1218     }
1219   else
1220     {
1221       gimple_omp_for_set_grid_phony (dist, true);
1222       stmt = grid_copy_leading_local_assignments (gimple_omp_body (dist), dst,
1223                                                   tgt_bind,
1224                                                   GRID_SEGMENT_PRIVATE, wi);
1225       gcc_checking_assert (stmt);
1226       gomp_parallel *parallel = as_a <gomp_parallel *> (stmt);
1227       gimple_omp_parallel_set_grid_phony (parallel, true);
1228       stmt = grid_copy_leading_local_assignments (gimple_omp_body (parallel),
1229                                                   dst, tgt_bind,
1230                                                   GRID_SEGMENT_PRIVATE, wi);
1231       gomp_for *inner_loop = as_a <gomp_for *> (stmt);
1232       gimple_omp_for_set_kind (inner_loop, GF_OMP_FOR_KIND_GRID_LOOP);
1233       prebody = gimple_omp_for_pre_body (inner_loop);
1234       if (prebody)
1235         grid_copy_leading_local_assignments (prebody, dst, tgt_bind,
1236                                              GRID_SEGMENT_PRIVATE, wi);
1237
1238       if (gimple_omp_for_combined_p (inner_loop))
1239         grid_eliminate_combined_simd_part (inner_loop);
1240       struct walk_stmt_info body_wi;
1241       memset (&body_wi, 0, sizeof (body_wi));
1242       walk_gimple_seq_mod (gimple_omp_body_ptr (inner_loop),
1243                            grid_process_grid_body, NULL, &body_wi);
1244
1245       return inner_loop;
1246     }
1247 }
1248
1249 /* If TARGET points to a GOMP_TARGET which follows a gridifiable pattern,
1250    create a GPU kernel for it.  GSI must point to the same statement, TGT_BIND
1251    is the bind into which temporaries inserted before TARGET should be
1252    added.  */
1253
1254 static void
1255 grid_attempt_target_gridification (gomp_target *target,
1256                                    gimple_stmt_iterator *gsi,
1257                                    gbind *tgt_bind)
1258 {
1259   /* removed group_size */
1260   grid_prop grid = {};
1261   if (!target || !grid_target_follows_gridifiable_pattern (target, &grid))
1262     return;
1263
1264   location_t loc = gimple_location (target);
1265   if (dump_enabled_p ())
1266     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, target,
1267                      "Target construct will be turned into a gridified HSA "
1268                      "kernel\n");
1269
1270   /* Copy target body to a GPUKERNEL construct:  */
1271   gimple_seq kernel_seq = copy_gimple_seq_and_replace_locals
1272     (gimple_omp_body (target));
1273
1274   hash_map<tree, tree> *declmap = new hash_map<tree, tree>;
1275   struct walk_stmt_info wi;
1276   memset (&wi, 0, sizeof (struct walk_stmt_info));
1277   wi.info = declmap;
1278
1279   /* Copy assignments in between OMP statements before target, mark OMP
1280      statements within copy appropriately.  */
1281   gomp_for *inner_loop = grid_process_kernel_body_copy (&grid, kernel_seq, gsi,
1282                                                         tgt_bind, &wi);
1283
1284   gbind *old_bind
1285     = as_a <gbind *> (gimple_seq_first (gimple_omp_body (target)));
1286   gbind *new_bind = as_a <gbind *> (gimple_seq_first (kernel_seq));
1287   tree new_block = gimple_bind_block (new_bind);
1288   tree enc_block = BLOCK_SUPERCONTEXT (gimple_bind_block (old_bind));
1289   BLOCK_CHAIN (new_block) = BLOCK_SUBBLOCKS (enc_block);
1290   BLOCK_SUBBLOCKS (enc_block) = new_block;
1291   BLOCK_SUPERCONTEXT (new_block) = enc_block;
1292   gimple *gpukernel = gimple_build_omp_grid_body (kernel_seq);
1293   gimple_seq_add_stmt
1294     (gimple_bind_body_ptr (as_a <gbind *> (gimple_omp_body (target))),
1295      gpukernel);
1296
1297   for (size_t i = 0; i < grid.collapse; i++)
1298     walk_tree (&grid.group_sizes[i], grid_remap_prebody_decls, &wi, NULL);
1299   push_gimplify_context ();
1300   for (size_t i = 0; i < grid.collapse; i++)
1301     {
1302       tree itype, type = TREE_TYPE (gimple_omp_for_index (inner_loop, i));
1303       if (POINTER_TYPE_P (type))
1304         itype = signed_type_for (type);
1305       else
1306         itype = type;
1307
1308       enum tree_code cond_code = gimple_omp_for_cond (inner_loop, i);
1309       tree n1 = unshare_expr (gimple_omp_for_initial (inner_loop, i));
1310       walk_tree (&n1, grid_remap_prebody_decls, &wi, NULL);
1311       tree n2 = unshare_expr (gimple_omp_for_final (inner_loop, i));
1312       walk_tree (&n2, grid_remap_prebody_decls, &wi, NULL);
1313       omp_adjust_for_condition (loc, &cond_code, &n2);
1314       n1 = fold_convert (itype, n1);
1315       n2 = fold_convert (itype, n2);
1316
1317       tree cond = fold_build2 (cond_code, boolean_type_node, n1, n2);
1318       tree step
1319         = omp_get_for_step_from_incr (loc, gimple_omp_for_incr (inner_loop, i));
1320
1321       tree t = build_int_cst (itype, (cond_code == LT_EXPR ? -1 : 1));
1322       t = fold_build2 (PLUS_EXPR, itype, step, t);
1323       t = fold_build2 (PLUS_EXPR, itype, t, n2);
1324       t = fold_build2 (MINUS_EXPR, itype, t, n1);
1325       if (TYPE_UNSIGNED (itype) && cond_code == GT_EXPR)
1326         t = fold_build2 (TRUNC_DIV_EXPR, itype,
1327                          fold_build1 (NEGATE_EXPR, itype, t),
1328                          fold_build1 (NEGATE_EXPR, itype, step));
1329       else
1330         t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
1331       t = fold_build3 (COND_EXPR, itype, cond, t, build_zero_cst (itype));
1332       if (grid.tiling)
1333         {
1334           if (cond_code == GT_EXPR)
1335             step = fold_build1 (NEGATE_EXPR, itype, step);
1336           t = fold_build2 (MULT_EXPR, itype, t, step);
1337         }
1338
1339       tree gs = fold_convert (uint32_type_node, t);
1340       gimple_seq tmpseq = NULL;
1341       gimplify_expr (&gs, &tmpseq, NULL, is_gimple_val, fb_rvalue);
1342       if (!gimple_seq_empty_p (tmpseq))
1343         gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT);
1344
1345       tree ws;
1346       if (grid.group_sizes[i])
1347         {
1348           ws = fold_convert (uint32_type_node, grid.group_sizes[i]);
1349           tmpseq = NULL;
1350           gimplify_expr (&ws, &tmpseq, NULL, is_gimple_val, fb_rvalue);
1351           if (!gimple_seq_empty_p (tmpseq))
1352             gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT);
1353         }
1354       else
1355         ws = build_zero_cst (uint32_type_node);
1356
1357       tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__GRIDDIM_);
1358       OMP_CLAUSE__GRIDDIM__DIMENSION (c) = i;
1359       OMP_CLAUSE__GRIDDIM__SIZE (c) = gs;
1360       OMP_CLAUSE__GRIDDIM__GROUP (c) = ws;
1361       OMP_CLAUSE_CHAIN (c) = gimple_omp_target_clauses (target);
1362       gimple_omp_target_set_clauses (target, c);
1363     }
1364   pop_gimplify_context (tgt_bind);
1365   delete declmap;
1366   return;
1367 }
1368
1369 /* Walker function doing all the work for create_target_kernels.  */
1370
1371 static tree
1372 grid_gridify_all_targets_stmt (gimple_stmt_iterator *gsi,
1373                                    bool *handled_ops_p,
1374                                    struct walk_stmt_info *incoming)
1375 {
1376   *handled_ops_p = false;
1377
1378   gimple *stmt = gsi_stmt (*gsi);
1379   gomp_target *target = dyn_cast <gomp_target *> (stmt);
1380   if (target)
1381     {
1382       gbind *tgt_bind = (gbind *) incoming->info;
1383       gcc_checking_assert (tgt_bind);
1384       grid_attempt_target_gridification (target, gsi, tgt_bind);
1385       return NULL_TREE;
1386     }
1387   gbind *bind = dyn_cast <gbind *> (stmt);
1388   if (bind)
1389     {
1390       *handled_ops_p = true;
1391       struct walk_stmt_info wi;
1392       memset (&wi, 0, sizeof (wi));
1393       wi.info = bind;
1394       walk_gimple_seq_mod (gimple_bind_body_ptr (bind),
1395                            grid_gridify_all_targets_stmt, NULL, &wi);
1396     }
1397   return NULL_TREE;
1398 }
1399
1400 /* Attempt to gridify all target constructs in BODY_P.  All such targets will
1401    have their bodies duplicated, with the new copy being put into a
1402    gimple_omp_grid_body statement.  All kernel-related construct within the
1403    grid_body will be marked with phony flags or kernel kinds.  Moreover, some
1404    re-structuring is often needed, such as copying pre-bodies before the target
1405    construct so that kernel grid sizes can be computed.  */
1406
1407 void
1408 omp_grid_gridify_all_targets (gimple_seq *body_p)
1409 {
1410   struct walk_stmt_info wi;
1411   memset (&wi, 0, sizeof (wi));
1412   walk_gimple_seq_mod (body_p, grid_gridify_all_targets_stmt, NULL, &wi);
1413 }