Don't warn when alignment of global common data exceeds maximum alignment.
[official-gcc.git] / gcc / omp-oacc-neuter-broadcast.cc
blobd48627a6940c3c6417cb8cae1ceafc630f1cb25c
1 /* OpenACC worker partitioning via middle end neutering/broadcasting scheme
3 Copyright (C) 2015-2021 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "tree-pass.h"
29 #include "ssa.h"
30 #include "cgraph.h"
31 #include "pretty-print.h"
32 #include "fold-const.h"
33 #include "gimplify.h"
34 #include "gimple-iterator.h"
35 #include "gimple-walk.h"
36 #include "tree-inline.h"
37 #include "langhooks.h"
38 #include "omp-general.h"
39 #include "omp-low.h"
40 #include "gimple-pretty-print.h"
41 #include "cfghooks.h"
42 #include "insn-config.h"
43 #include "recog.h"
44 #include "internal-fn.h"
45 #include "bitmap.h"
46 #include "tree-nested.h"
47 #include "stor-layout.h"
48 #include "tree-ssa-threadupdate.h"
49 #include "tree-into-ssa.h"
50 #include "splay-tree.h"
51 #include "target.h"
52 #include "cfgloop.h"
53 #include "tree-cfg.h"
54 #include "omp-offload.h"
55 #include "attribs.h"
57 /* Loop structure of the function. The entire function is described as
58 a NULL loop. */
59 /* Adapted from 'gcc/config/nvptx/nvptx.c:struct parallel'. */
61 struct parallel_g
63 /* Parent parallel. */
64 parallel_g *parent;
66 /* Next sibling parallel. */
67 parallel_g *next;
69 /* First child parallel. */
70 parallel_g *inner;
72 /* Partitioning mask of the parallel. */
73 unsigned mask;
75 /* Partitioning used within inner parallels. */
76 unsigned inner_mask;
78 /* Location of parallel forked and join. The forked is the first
79 block in the parallel and the join is the first block after of
80 the partition. */
81 basic_block forked_block;
82 basic_block join_block;
84 gimple *forked_stmt;
85 gimple *join_stmt;
87 gimple *fork_stmt;
88 gimple *joining_stmt;
90 /* Basic blocks in this parallel, but not in child parallels. The
91 FORKED and JOINING blocks are in the partition. The FORK and JOIN
92 blocks are not. */
93 auto_vec<basic_block> blocks;
95 tree record_type;
96 tree sender_decl;
97 tree receiver_decl;
99 public:
100 parallel_g (parallel_g *parent, unsigned mode);
101 ~parallel_g ();
104 /* Constructor links the new parallel into it's parent's chain of
105 children. */
107 parallel_g::parallel_g (parallel_g *parent_, unsigned mask_)
108 :parent (parent_), next (0), inner (0), mask (mask_), inner_mask (0)
110 forked_block = join_block = 0;
111 forked_stmt = join_stmt = NULL;
112 fork_stmt = joining_stmt = NULL;
114 record_type = NULL_TREE;
115 sender_decl = NULL_TREE;
116 receiver_decl = NULL_TREE;
118 if (parent)
120 next = parent->inner;
121 parent->inner = this;
125 parallel_g::~parallel_g ()
127 delete inner;
128 delete next;
131 static bool
132 local_var_based_p (tree decl)
134 switch (TREE_CODE (decl))
136 case VAR_DECL:
137 return !is_global_var (decl);
139 case COMPONENT_REF:
140 case BIT_FIELD_REF:
141 case ARRAY_REF:
142 return local_var_based_p (TREE_OPERAND (decl, 0));
144 default:
145 return false;
149 /* Map of basic blocks to gimple stmts. */
150 typedef hash_map<basic_block, gimple *> bb_stmt_map_t;
152 /* Calls to OpenACC routines are made by all workers/wavefronts/warps, since
153 the routine likely contains partitioned loops (else will do its own
154 neutering and variable propagation). Return TRUE if a function call CALL
155 should be made in (worker) single mode instead, rather than redundant
156 mode. */
158 static bool
159 omp_sese_active_worker_call (gcall *call)
161 #define GOMP_DIM_SEQ GOMP_DIM_MAX
162 tree fndecl = gimple_call_fndecl (call);
164 if (!fndecl)
165 return true;
167 tree attrs = oacc_get_fn_attrib (fndecl);
169 if (!attrs)
170 return true;
172 int level = oacc_fn_attrib_level (attrs);
174 /* Neither regular functions nor "seq" routines should be run by all threads
175 in worker-single mode. */
176 return level == -1 || level == GOMP_DIM_SEQ;
177 #undef GOMP_DIM_SEQ
180 /* Split basic blocks such that each forked and join unspecs are at
181 the start of their basic blocks. Thus afterwards each block will
182 have a single partitioning mode. We also do the same for return
183 insns, as they are executed by every thread. Return the
184 partitioning mode of the function as a whole. Populate MAP with
185 head and tail blocks. We also clear the BB visited flag, which is
186 used when finding partitions. */
187 /* Adapted from 'gcc/config/nvptx/nvptx.c:nvptx_split_blocks'. */
189 static void
190 omp_sese_split_blocks (bb_stmt_map_t *map)
192 auto_vec<gimple *> worklist;
193 basic_block block;
195 /* Locate all the reorg instructions of interest. */
196 FOR_ALL_BB_FN (block, cfun)
198 /* Clear visited flag, for use by parallel locator */
199 block->flags &= ~BB_VISITED;
201 for (gimple_stmt_iterator gsi = gsi_start_bb (block);
202 !gsi_end_p (gsi);
203 gsi_next (&gsi))
205 gimple *stmt = gsi_stmt (gsi);
207 if (gimple_call_internal_p (stmt, IFN_UNIQUE))
209 enum ifn_unique_kind k = ((enum ifn_unique_kind)
210 TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)));
212 if (k == IFN_UNIQUE_OACC_JOIN)
213 worklist.safe_push (stmt);
214 else if (k == IFN_UNIQUE_OACC_FORK)
216 gcc_assert (gsi_one_before_end_p (gsi));
217 basic_block forked_block = single_succ (block);
218 gimple_stmt_iterator gsi2 = gsi_start_bb (forked_block);
220 /* We push a NOP as a placeholder for the "forked" stmt.
221 This is then recognized in omp_sese_find_par. */
222 gimple *nop = gimple_build_nop ();
223 gsi_insert_before (&gsi2, nop, GSI_SAME_STMT);
225 worklist.safe_push (nop);
228 else if (gimple_code (stmt) == GIMPLE_RETURN
229 || gimple_code (stmt) == GIMPLE_COND
230 || gimple_code (stmt) == GIMPLE_SWITCH
231 || (gimple_code (stmt) == GIMPLE_CALL
232 && !gimple_call_internal_p (stmt)
233 && !omp_sese_active_worker_call (as_a <gcall *> (stmt))))
234 worklist.safe_push (stmt);
235 else if (is_gimple_assign (stmt))
237 tree lhs = gimple_assign_lhs (stmt);
239 /* Force assignments to components/fields/elements of local
240 aggregates into fully-partitioned (redundant) mode. This
241 avoids having to broadcast the whole aggregate. The RHS of
242 the assignment will be propagated using the normal
243 mechanism. */
245 switch (TREE_CODE (lhs))
247 case COMPONENT_REF:
248 case BIT_FIELD_REF:
249 case ARRAY_REF:
251 tree aggr = TREE_OPERAND (lhs, 0);
253 if (local_var_based_p (aggr))
254 worklist.safe_push (stmt);
256 break;
258 default:
265 /* Split blocks on the worklist. */
266 unsigned ix;
267 gimple *stmt;
269 for (ix = 0; worklist.iterate (ix, &stmt); ix++)
271 basic_block block = gimple_bb (stmt);
273 if (gimple_code (stmt) == GIMPLE_COND)
275 gcond *orig_cond = as_a <gcond *> (stmt);
276 tree_code code = gimple_expr_code (orig_cond);
277 tree pred = make_ssa_name (boolean_type_node);
278 gimple *asgn = gimple_build_assign (pred, code,
279 gimple_cond_lhs (orig_cond),
280 gimple_cond_rhs (orig_cond));
281 gcond *new_cond
282 = gimple_build_cond (NE_EXPR, pred, boolean_false_node,
283 gimple_cond_true_label (orig_cond),
284 gimple_cond_false_label (orig_cond));
286 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
287 gsi_insert_before (&gsi, asgn, GSI_SAME_STMT);
288 gsi_replace (&gsi, new_cond, true);
290 edge e = split_block (block, asgn);
291 block = e->dest;
292 map->get_or_insert (block) = new_cond;
294 else if ((gimple_code (stmt) == GIMPLE_CALL
295 && !gimple_call_internal_p (stmt))
296 || is_gimple_assign (stmt))
298 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
299 gsi_prev (&gsi);
301 edge call = split_block (block, gsi_stmt (gsi));
303 gimple *call_stmt = gsi_stmt (gsi_start_bb (call->dest));
305 edge call_to_ret = split_block (call->dest, call_stmt);
307 map->get_or_insert (call_to_ret->src) = call_stmt;
309 else
311 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
312 gsi_prev (&gsi);
314 if (gsi_end_p (gsi))
315 map->get_or_insert (block) = stmt;
316 else
318 /* Split block before insn. The insn is in the new block. */
319 edge e = split_block (block, gsi_stmt (gsi));
321 block = e->dest;
322 map->get_or_insert (block) = stmt;
328 static const char *
329 mask_name (unsigned mask)
331 switch (mask)
333 case 0: return "gang redundant";
334 case 1: return "gang partitioned";
335 case 2: return "worker partitioned";
336 case 3: return "gang+worker partitioned";
337 case 4: return "vector partitioned";
338 case 5: return "gang+vector partitioned";
339 case 6: return "worker+vector partitioned";
340 case 7: return "fully partitioned";
341 default: return "<illegal>";
345 /* Dump this parallel and all its inner parallels. */
346 /* Adapted from 'gcc/config/nvptx/nvptx.c:nvptx_dump_pars'. */
348 static void
349 omp_sese_dump_pars (parallel_g *par, unsigned depth)
351 fprintf (dump_file, "%u: mask %d (%s) head=%d, tail=%d\n",
352 depth, par->mask, mask_name (par->mask),
353 par->forked_block ? par->forked_block->index : -1,
354 par->join_block ? par->join_block->index : -1);
356 fprintf (dump_file, " blocks:");
358 basic_block block;
359 for (unsigned ix = 0; par->blocks.iterate (ix, &block); ix++)
360 fprintf (dump_file, " %d", block->index);
361 fprintf (dump_file, "\n");
362 if (par->inner)
363 omp_sese_dump_pars (par->inner, depth + 1);
365 if (par->next)
366 omp_sese_dump_pars (par->next, depth);
369 /* If BLOCK contains a fork/join marker, process it to create or
370 terminate a loop structure. Add this block to the current loop,
371 and then walk successor blocks. */
372 /* Adapted from 'gcc/config/nvptx/nvptx.c:nvptx_find_par'. */
374 static parallel_g *
375 omp_sese_find_par (bb_stmt_map_t *map, parallel_g *par, basic_block block)
377 if (block->flags & BB_VISITED)
378 return par;
379 block->flags |= BB_VISITED;
381 if (gimple **stmtp = map->get (block))
383 gimple *stmt = *stmtp;
385 if (gimple_code (stmt) == GIMPLE_COND
386 || gimple_code (stmt) == GIMPLE_SWITCH
387 || gimple_code (stmt) == GIMPLE_RETURN
388 || (gimple_code (stmt) == GIMPLE_CALL
389 && !gimple_call_internal_p (stmt))
390 || is_gimple_assign (stmt))
392 /* A single block that is forced to be at the maximum partition
393 level. Make a singleton par for it. */
394 par = new parallel_g (par, GOMP_DIM_MASK (GOMP_DIM_GANG)
395 | GOMP_DIM_MASK (GOMP_DIM_WORKER)
396 | GOMP_DIM_MASK (GOMP_DIM_VECTOR));
397 par->forked_block = block;
398 par->forked_stmt = stmt;
399 par->blocks.safe_push (block);
400 par = par->parent;
401 goto walk_successors;
403 else if (gimple_nop_p (stmt))
405 basic_block pred = single_pred (block);
406 gcc_assert (pred);
407 gimple_stmt_iterator gsi = gsi_last_bb (pred);
408 gimple *final_stmt = gsi_stmt (gsi);
410 if (gimple_call_internal_p (final_stmt, IFN_UNIQUE))
412 gcall *call = as_a <gcall *> (final_stmt);
413 enum ifn_unique_kind k = ((enum ifn_unique_kind)
414 TREE_INT_CST_LOW (gimple_call_arg (call, 0)));
416 if (k == IFN_UNIQUE_OACC_FORK)
418 HOST_WIDE_INT dim
419 = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
420 unsigned mask = (dim >= 0) ? GOMP_DIM_MASK (dim) : 0;
422 par = new parallel_g (par, mask);
423 par->forked_block = block;
424 par->forked_stmt = final_stmt;
425 par->fork_stmt = stmt;
427 else
428 gcc_unreachable ();
430 else
431 gcc_unreachable ();
433 else if (gimple_call_internal_p (stmt, IFN_UNIQUE))
435 gcall *call = as_a <gcall *> (stmt);
436 enum ifn_unique_kind k = ((enum ifn_unique_kind)
437 TREE_INT_CST_LOW (gimple_call_arg (call, 0)));
438 if (k == IFN_UNIQUE_OACC_JOIN)
440 HOST_WIDE_INT dim = TREE_INT_CST_LOW (gimple_call_arg (stmt, 2));
441 unsigned mask = (dim >= 0) ? GOMP_DIM_MASK (dim) : 0;
443 gcc_assert (par->mask == mask);
444 par->join_block = block;
445 par->join_stmt = stmt;
446 par = par->parent;
448 else
449 gcc_unreachable ();
451 else
452 gcc_unreachable ();
455 if (par)
456 /* Add this block onto the current loop's list of blocks. */
457 par->blocks.safe_push (block);
458 else
459 /* This must be the entry block. Create a NULL parallel. */
460 par = new parallel_g (0, 0);
462 walk_successors:
463 /* Walk successor blocks. */
464 edge e;
465 edge_iterator ei;
467 FOR_EACH_EDGE (e, ei, block->succs)
468 omp_sese_find_par (map, par, e->dest);
470 return par;
473 /* DFS walk the CFG looking for fork & join markers. Construct
474 loop structures as we go. MAP is a mapping of basic blocks
475 to head & tail markers, discovered when splitting blocks. This
476 speeds up the discovery. We rely on the BB visited flag having
477 been cleared when splitting blocks. */
478 /* Adapted from 'gcc/config/nvptx/nvptx.c:nvptx_discover_pars'. */
480 static parallel_g *
481 omp_sese_discover_pars (bb_stmt_map_t *map)
483 basic_block block;
485 /* Mark exit blocks as visited. */
486 block = EXIT_BLOCK_PTR_FOR_FN (cfun);
487 block->flags |= BB_VISITED;
489 /* And entry block as not. */
490 block = ENTRY_BLOCK_PTR_FOR_FN (cfun);
491 block->flags &= ~BB_VISITED;
493 parallel_g *par = omp_sese_find_par (map, 0, block);
495 if (dump_file)
497 fprintf (dump_file, "\nLoops\n");
498 omp_sese_dump_pars (par, 0);
499 fprintf (dump_file, "\n");
502 return par;
505 static void
506 populate_single_mode_bitmaps (parallel_g *par, bitmap worker_single,
507 bitmap vector_single, unsigned outer_mask,
508 int depth)
510 unsigned mask = outer_mask | par->mask;
512 basic_block block;
514 for (unsigned i = 0; par->blocks.iterate (i, &block); i++)
516 if ((mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)) == 0)
517 bitmap_set_bit (worker_single, block->index);
519 if ((mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR)) == 0)
520 bitmap_set_bit (vector_single, block->index);
523 if (par->inner)
524 populate_single_mode_bitmaps (par->inner, worker_single, vector_single,
525 mask, depth + 1);
526 if (par->next)
527 populate_single_mode_bitmaps (par->next, worker_single, vector_single,
528 outer_mask, depth);
531 /* A map from SSA names or var decls to record fields. */
533 typedef hash_map<tree, tree> field_map_t;
535 /* For each propagation record type, this is a map from SSA names or var decls
536 to propagate, to the field in the record type that should be used for
537 transmission and reception. */
539 typedef hash_map<tree, field_map_t *> record_field_map_t;
541 static void
542 install_var_field (tree var, tree record_type, field_map_t *fields)
544 tree name;
545 char tmp[20];
547 if (TREE_CODE (var) == SSA_NAME)
549 name = SSA_NAME_IDENTIFIER (var);
550 if (!name)
552 sprintf (tmp, "_%u", (unsigned) SSA_NAME_VERSION (var));
553 name = get_identifier (tmp);
556 else if (TREE_CODE (var) == VAR_DECL)
558 name = DECL_NAME (var);
559 if (!name)
561 sprintf (tmp, "D_%u", (unsigned) DECL_UID (var));
562 name = get_identifier (tmp);
565 else
566 gcc_unreachable ();
568 gcc_assert (!fields->get (var));
570 tree type = TREE_TYPE (var);
572 if (POINTER_TYPE_P (type)
573 && TYPE_RESTRICT (type))
574 type = build_qualified_type (type, TYPE_QUALS (type) & ~TYPE_QUAL_RESTRICT);
576 tree field = build_decl (BUILTINS_LOCATION, FIELD_DECL, name, type);
578 if (TREE_CODE (var) == VAR_DECL && type == TREE_TYPE (var))
580 SET_DECL_ALIGN (field, DECL_ALIGN (var));
581 DECL_USER_ALIGN (field) = DECL_USER_ALIGN (var);
582 TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (var);
584 else
585 SET_DECL_ALIGN (field, TYPE_ALIGN (type));
587 fields->put (var, field);
589 insert_field_into_struct (record_type, field);
592 /* Sets of SSA_NAMES or VAR_DECLs to propagate. */
593 typedef hash_set<tree> propagation_set;
595 static void
596 find_ssa_names_to_propagate (parallel_g *par, unsigned outer_mask,
597 bitmap worker_single, bitmap vector_single,
598 vec<propagation_set *> *prop_set)
600 unsigned mask = outer_mask | par->mask;
602 if (par->inner)
603 find_ssa_names_to_propagate (par->inner, mask, worker_single,
604 vector_single, prop_set);
605 if (par->next)
606 find_ssa_names_to_propagate (par->next, outer_mask, worker_single,
607 vector_single, prop_set);
609 if (mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))
611 basic_block block;
612 int ix;
614 for (ix = 0; par->blocks.iterate (ix, &block); ix++)
616 for (gphi_iterator psi = gsi_start_phis (block);
617 !gsi_end_p (psi); gsi_next (&psi))
619 gphi *phi = psi.phi ();
620 use_operand_p use;
621 ssa_op_iter iter;
623 FOR_EACH_PHI_ARG (use, phi, iter, SSA_OP_USE)
625 tree var = USE_FROM_PTR (use);
627 if (TREE_CODE (var) != SSA_NAME)
628 continue;
630 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
632 if (gimple_nop_p (def_stmt))
633 continue;
635 basic_block def_bb = gimple_bb (def_stmt);
637 if (bitmap_bit_p (worker_single, def_bb->index))
639 if (!(*prop_set)[def_bb->index])
640 (*prop_set)[def_bb->index] = new propagation_set;
642 propagation_set *ws_prop = (*prop_set)[def_bb->index];
644 ws_prop->add (var);
649 for (gimple_stmt_iterator gsi = gsi_start_bb (block);
650 !gsi_end_p (gsi); gsi_next (&gsi))
652 use_operand_p use;
653 ssa_op_iter iter;
654 gimple *stmt = gsi_stmt (gsi);
656 FOR_EACH_SSA_USE_OPERAND (use, stmt, iter, SSA_OP_USE)
658 tree var = USE_FROM_PTR (use);
660 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
662 if (gimple_nop_p (def_stmt))
663 continue;
665 basic_block def_bb = gimple_bb (def_stmt);
667 if (bitmap_bit_p (worker_single, def_bb->index))
669 if (!(*prop_set)[def_bb->index])
670 (*prop_set)[def_bb->index] = new propagation_set;
672 propagation_set *ws_prop = (*prop_set)[def_bb->index];
674 ws_prop->add (var);
682 /* Callback for walk_gimple_stmt to find RHS VAR_DECLs (uses) in a
683 statement. */
685 static tree
686 find_partitioned_var_uses_1 (tree *node, int *, void *data)
688 walk_stmt_info *wi = (walk_stmt_info *) data;
689 hash_set<tree> *partitioned_var_uses = (hash_set<tree> *) wi->info;
691 if (!wi->is_lhs && VAR_P (*node))
692 partitioned_var_uses->add (*node);
694 return NULL_TREE;
697 static void
698 find_partitioned_var_uses (parallel_g *par, unsigned outer_mask,
699 hash_set<tree> *partitioned_var_uses)
701 unsigned mask = outer_mask | par->mask;
703 if (par->inner)
704 find_partitioned_var_uses (par->inner, mask, partitioned_var_uses);
705 if (par->next)
706 find_partitioned_var_uses (par->next, outer_mask, partitioned_var_uses);
708 if (mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))
710 basic_block block;
711 int ix;
713 for (ix = 0; par->blocks.iterate (ix, &block); ix++)
714 for (gimple_stmt_iterator gsi = gsi_start_bb (block);
715 !gsi_end_p (gsi); gsi_next (&gsi))
717 walk_stmt_info wi;
718 memset (&wi, 0, sizeof (wi));
719 wi.info = (void *) partitioned_var_uses;
720 walk_gimple_stmt (&gsi, NULL, find_partitioned_var_uses_1, &wi);
725 /* Gang-private variables (typically placed in a GPU's shared memory) do not
726 need to be processed by the worker-propagation mechanism. Populate the
727 GANG_PRIVATE_VARS set with any such variables found in the current
728 function. */
730 static void
731 find_gang_private_vars (hash_set<tree> *gang_private_vars)
733 basic_block block;
735 FOR_EACH_BB_FN (block, cfun)
737 for (gimple_stmt_iterator gsi = gsi_start_bb (block);
738 !gsi_end_p (gsi);
739 gsi_next (&gsi))
741 gimple *stmt = gsi_stmt (gsi);
743 if (gimple_call_internal_p (stmt, IFN_UNIQUE))
745 enum ifn_unique_kind k = ((enum ifn_unique_kind)
746 TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)));
747 if (k == IFN_UNIQUE_OACC_PRIVATE)
749 HOST_WIDE_INT level
750 = TREE_INT_CST_LOW (gimple_call_arg (stmt, 2));
751 if (level != GOMP_DIM_GANG)
752 continue;
753 for (unsigned i = 3; i < gimple_call_num_args (stmt); i++)
755 tree arg = gimple_call_arg (stmt, i);
756 gcc_assert (TREE_CODE (arg) == ADDR_EXPR);
757 tree decl = TREE_OPERAND (arg, 0);
758 gang_private_vars->add (decl);
766 static void
767 find_local_vars_to_propagate (parallel_g *par, unsigned outer_mask,
768 hash_set<tree> *partitioned_var_uses,
769 hash_set<tree> *gang_private_vars,
770 vec<propagation_set *> *prop_set)
772 unsigned mask = outer_mask | par->mask;
774 if (par->inner)
775 find_local_vars_to_propagate (par->inner, mask, partitioned_var_uses,
776 gang_private_vars, prop_set);
777 if (par->next)
778 find_local_vars_to_propagate (par->next, outer_mask, partitioned_var_uses,
779 gang_private_vars, prop_set);
781 if (!(mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)))
783 basic_block block;
784 int ix;
786 for (ix = 0; par->blocks.iterate (ix, &block); ix++)
788 for (gimple_stmt_iterator gsi = gsi_start_bb (block);
789 !gsi_end_p (gsi); gsi_next (&gsi))
791 gimple *stmt = gsi_stmt (gsi);
792 tree var;
793 unsigned i;
795 FOR_EACH_LOCAL_DECL (cfun, i, var)
797 if (!VAR_P (var)
798 || is_global_var (var)
799 || AGGREGATE_TYPE_P (TREE_TYPE (var))
800 || !partitioned_var_uses->contains (var)
801 || gang_private_vars->contains (var))
802 continue;
804 if (stmt_may_clobber_ref_p (stmt, var))
806 if (dump_file)
808 fprintf (dump_file, "bb %u: local variable may be "
809 "clobbered in %s mode: ", block->index,
810 mask_name (mask));
811 print_generic_expr (dump_file, var, TDF_SLIM);
812 fprintf (dump_file, "\n");
815 if (!(*prop_set)[block->index])
816 (*prop_set)[block->index] = new propagation_set;
818 propagation_set *ws_prop
819 = (*prop_set)[block->index];
821 ws_prop->add (var);
829 /* Transform basic blocks FROM, TO (which may be the same block) into:
830 if (GOACC_single_start ())
831 BLOCK;
832 GOACC_barrier ();
833 \ | /
834 +----+
835 | | (new) predicate block
836 +----+--
837 \ | / \ | / |t \
838 +----+ +----+ +----+ |
839 | | | | ===> | | | f (old) from block
840 +----+ +----+ +----+ |
841 | t/ \f | /
842 +----+/
843 (split (split before | | skip block
844 at end) condition) +----+
845 t/ \f
848 static void
849 worker_single_simple (basic_block from, basic_block to,
850 hash_set<tree> *def_escapes_block)
852 gimple *call, *cond;
853 tree lhs, decl;
854 basic_block skip_block;
856 gimple_stmt_iterator gsi = gsi_last_bb (to);
857 if (EDGE_COUNT (to->succs) > 1)
859 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND);
860 gsi_prev (&gsi);
862 edge e = split_block (to, gsi_stmt (gsi));
863 skip_block = e->dest;
865 gimple_stmt_iterator start = gsi_after_labels (from);
867 decl = builtin_decl_explicit (BUILT_IN_GOACC_SINGLE_START);
868 lhs = create_tmp_var (TREE_TYPE (TREE_TYPE (decl)));
869 call = gimple_build_call (decl, 0);
870 gimple_call_set_lhs (call, lhs);
871 gsi_insert_before (&start, call, GSI_NEW_STMT);
872 update_stmt (call);
874 cond = gimple_build_cond (EQ_EXPR, lhs,
875 fold_convert_loc (UNKNOWN_LOCATION,
876 TREE_TYPE (lhs),
877 boolean_true_node),
878 NULL_TREE, NULL_TREE);
879 gsi_insert_after (&start, cond, GSI_NEW_STMT);
880 update_stmt (cond);
882 edge et = split_block (from, cond);
883 et->flags &= ~EDGE_FALLTHRU;
884 et->flags |= EDGE_TRUE_VALUE;
885 /* Make the active worker the more probable path so we prefer fallthrough
886 (letting the idle workers jump around more). */
887 et->probability = profile_probability::likely ();
889 edge ef = make_edge (from, skip_block, EDGE_FALSE_VALUE);
890 ef->probability = et->probability.invert ();
892 basic_block neutered = split_edge (ef);
893 gimple_stmt_iterator neut_gsi = gsi_last_bb (neutered);
895 for (gsi = gsi_start_bb (et->dest); !gsi_end_p (gsi); gsi_next (&gsi))
897 gimple *stmt = gsi_stmt (gsi);
898 ssa_op_iter iter;
899 tree var;
901 FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_DEF)
903 if (def_escapes_block->contains (var))
905 gphi *join_phi = create_phi_node (NULL_TREE, skip_block);
906 create_new_def_for (var, join_phi,
907 gimple_phi_result_ptr (join_phi));
908 add_phi_arg (join_phi, var, e, UNKNOWN_LOCATION);
910 tree neutered_def = copy_ssa_name (var, NULL);
911 /* We really want "don't care" or some value representing
912 undefined here, but optimizers will probably get rid of the
913 zero-assignments anyway. */
914 gassign *zero = gimple_build_assign (neutered_def,
915 build_zero_cst (TREE_TYPE (neutered_def)));
917 gsi_insert_after (&neut_gsi, zero, GSI_CONTINUE_LINKING);
918 update_stmt (zero);
920 add_phi_arg (join_phi, neutered_def, single_succ_edge (neutered),
921 UNKNOWN_LOCATION);
922 update_stmt (join_phi);
927 gsi = gsi_start_bb (skip_block);
929 decl = builtin_decl_explicit (BUILT_IN_GOACC_BARRIER);
930 gimple *acc_bar = gimple_build_call (decl, 0);
932 gsi_insert_before (&gsi, acc_bar, GSI_SAME_STMT);
933 update_stmt (acc_bar);
936 /* Build COMPONENT_REF and set TREE_THIS_VOLATILE and TREE_READONLY on it
937 as appropriate. */
938 /* Adapted from 'gcc/omp-low.c:omp_build_component_ref'. */
940 static tree
941 oacc_build_component_ref (tree obj, tree field)
943 tree field_type = TREE_TYPE (field);
944 tree obj_type = TREE_TYPE (obj);
945 if (!ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (obj_type)))
946 field_type = build_qualified_type
947 (field_type,
948 KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (obj_type)));
950 tree ret = build3 (COMPONENT_REF, field_type, obj, field, NULL);
951 if (TREE_THIS_VOLATILE (field))
952 TREE_THIS_VOLATILE (ret) |= 1;
953 if (TREE_READONLY (field))
954 TREE_READONLY (ret) |= 1;
955 return ret;
958 static tree
959 build_receiver_ref (tree var, tree receiver_decl, field_map_t *fields)
961 tree x = build_simple_mem_ref (receiver_decl);
962 tree field = *fields->get (var);
963 TREE_THIS_NOTRAP (x) = 1;
964 x = oacc_build_component_ref (x, field);
965 return x;
968 static tree
969 build_sender_ref (tree var, tree sender_decl, field_map_t *fields)
971 tree field = *fields->get (var);
972 return oacc_build_component_ref (sender_decl, field);
975 static int
976 sort_by_ssa_version_or_uid (const void *p1, const void *p2)
978 const tree t1 = *(const tree *)p1;
979 const tree t2 = *(const tree *)p2;
981 if (TREE_CODE (t1) == SSA_NAME && TREE_CODE (t2) == SSA_NAME)
982 return SSA_NAME_VERSION (t1) - SSA_NAME_VERSION (t2);
983 else if (TREE_CODE (t1) == SSA_NAME && TREE_CODE (t2) != SSA_NAME)
984 return -1;
985 else if (TREE_CODE (t1) != SSA_NAME && TREE_CODE (t2) == SSA_NAME)
986 return 1;
987 else
988 return DECL_UID (t1) - DECL_UID (t2);
991 static int
992 sort_by_size_then_ssa_version_or_uid (const void *p1, const void *p2)
994 const tree t1 = *(const tree *)p1;
995 const tree t2 = *(const tree *)p2;
996 unsigned HOST_WIDE_INT s1 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (t1)));
997 unsigned HOST_WIDE_INT s2 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (t2)));
998 if (s1 != s2)
999 return s2 - s1;
1000 else
1001 return sort_by_ssa_version_or_uid (p1, p2);
1004 static void
1005 worker_single_copy (basic_block from, basic_block to,
1006 hash_set<tree> *def_escapes_block,
1007 hash_set<tree> *worker_partitioned_uses,
1008 tree record_type, record_field_map_t *record_field_map)
1010 /* If we only have virtual defs, we'll have no record type, but we still want
1011 to emit single_copy_start and (particularly) single_copy_end to act as
1012 a vdef source on the neutered edge representing memory writes on the
1013 non-neutered edge. */
1014 if (!record_type)
1015 record_type = char_type_node;
1017 tree sender_decl
1018 = targetm.goacc.create_worker_broadcast_record (record_type, true,
1019 ".oacc_worker_o");
1020 tree receiver_decl
1021 = targetm.goacc.create_worker_broadcast_record (record_type, false,
1022 ".oacc_worker_i");
1024 gimple_stmt_iterator gsi = gsi_last_bb (to);
1025 if (EDGE_COUNT (to->succs) > 1)
1026 gsi_prev (&gsi);
1027 edge e = split_block (to, gsi_stmt (gsi));
1028 basic_block barrier_block = e->dest;
1030 gimple_stmt_iterator start = gsi_after_labels (from);
1032 tree decl = builtin_decl_explicit (BUILT_IN_GOACC_SINGLE_COPY_START);
1034 tree lhs = create_tmp_var (TREE_TYPE (TREE_TYPE (decl)));
1036 gimple *call = gimple_build_call (decl, 1,
1037 build_fold_addr_expr (sender_decl));
1038 gimple_call_set_lhs (call, lhs);
1039 gsi_insert_before (&start, call, GSI_NEW_STMT);
1040 update_stmt (call);
1042 tree conv_tmp = make_ssa_name (TREE_TYPE (receiver_decl));
1044 gimple *conv = gimple_build_assign (conv_tmp,
1045 fold_convert (TREE_TYPE (receiver_decl),
1046 lhs));
1047 update_stmt (conv);
1048 gsi_insert_after (&start, conv, GSI_NEW_STMT);
1049 gimple *asgn = gimple_build_assign (receiver_decl, conv_tmp);
1050 gsi_insert_after (&start, asgn, GSI_NEW_STMT);
1051 update_stmt (asgn);
1053 tree zero_ptr = build_int_cst (TREE_TYPE (receiver_decl), 0);
1055 tree recv_tmp = make_ssa_name (TREE_TYPE (receiver_decl));
1056 asgn = gimple_build_assign (recv_tmp, receiver_decl);
1057 gsi_insert_after (&start, asgn, GSI_NEW_STMT);
1058 update_stmt (asgn);
1060 gimple *cond = gimple_build_cond (EQ_EXPR, recv_tmp, zero_ptr, NULL_TREE,
1061 NULL_TREE);
1062 update_stmt (cond);
1064 gsi_insert_after (&start, cond, GSI_NEW_STMT);
1066 edge et = split_block (from, cond);
1067 et->flags &= ~EDGE_FALLTHRU;
1068 et->flags |= EDGE_TRUE_VALUE;
1069 /* Make the active worker the more probable path so we prefer fallthrough
1070 (letting the idle workers jump around more). */
1071 et->probability = profile_probability::likely ();
1073 basic_block body = et->dest;
1075 edge ef = make_edge (from, barrier_block, EDGE_FALSE_VALUE);
1076 ef->probability = et->probability.invert ();
1078 decl = builtin_decl_explicit (BUILT_IN_GOACC_BARRIER);
1079 gimple *acc_bar = gimple_build_call (decl, 0);
1081 gimple_stmt_iterator bar_gsi = gsi_start_bb (barrier_block);
1082 gsi_insert_before (&bar_gsi, acc_bar, GSI_NEW_STMT);
1084 cond = gimple_build_cond (NE_EXPR, recv_tmp, zero_ptr, NULL_TREE, NULL_TREE);
1085 gsi_insert_after (&bar_gsi, cond, GSI_NEW_STMT);
1087 edge et2 = split_block (barrier_block, cond);
1088 et2->flags &= ~EDGE_FALLTHRU;
1089 et2->flags |= EDGE_TRUE_VALUE;
1090 et2->probability = profile_probability::unlikely ();
1092 basic_block exit_block = et2->dest;
1094 basic_block copyout_block = split_edge (et2);
1095 edge ef2 = make_edge (barrier_block, exit_block, EDGE_FALSE_VALUE);
1096 ef2->probability = et2->probability.invert ();
1098 gimple_stmt_iterator copyout_gsi = gsi_start_bb (copyout_block);
1100 edge copyout_to_exit = single_succ_edge (copyout_block);
1102 gimple_seq sender_seq = NULL;
1104 /* Make sure we iterate over definitions in a stable order. */
1105 auto_vec<tree> escape_vec (def_escapes_block->elements ());
1106 for (hash_set<tree>::iterator it = def_escapes_block->begin ();
1107 it != def_escapes_block->end (); ++it)
1108 escape_vec.quick_push (*it);
1109 escape_vec.qsort (sort_by_ssa_version_or_uid);
1111 for (unsigned i = 0; i < escape_vec.length (); i++)
1113 tree var = escape_vec[i];
1115 if (TREE_CODE (var) == SSA_NAME && SSA_NAME_IS_VIRTUAL_OPERAND (var))
1116 continue;
1118 tree barrier_def = 0;
1120 if (TREE_CODE (var) == SSA_NAME)
1122 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
1124 if (gimple_nop_p (def_stmt))
1125 continue;
1127 /* The barrier phi takes one result from the actual work of the
1128 block we're neutering, and the other result is constant zero of
1129 the same type. */
1131 gphi *barrier_phi = create_phi_node (NULL_TREE, barrier_block);
1132 barrier_def = create_new_def_for (var, barrier_phi,
1133 gimple_phi_result_ptr (barrier_phi));
1135 add_phi_arg (barrier_phi, var, e, UNKNOWN_LOCATION);
1136 add_phi_arg (barrier_phi, build_zero_cst (TREE_TYPE (var)), ef,
1137 UNKNOWN_LOCATION);
1139 update_stmt (barrier_phi);
1141 else
1142 gcc_assert (TREE_CODE (var) == VAR_DECL);
1144 /* If we had no record type, we will have no fields map. */
1145 field_map_t **fields_p = record_field_map->get (record_type);
1146 field_map_t *fields = fields_p ? *fields_p : NULL;
1148 if (worker_partitioned_uses->contains (var)
1149 && fields
1150 && fields->get (var))
1152 tree neutered_def = make_ssa_name (TREE_TYPE (var));
1154 /* Receive definition from shared memory block. */
1156 tree receiver_ref = build_receiver_ref (var, receiver_decl, fields);
1157 gassign *recv = gimple_build_assign (neutered_def,
1158 receiver_ref);
1159 gsi_insert_after (&copyout_gsi, recv, GSI_CONTINUE_LINKING);
1160 update_stmt (recv);
1162 if (TREE_CODE (var) == VAR_DECL)
1164 /* If it's a VAR_DECL, we only copied to an SSA temporary. Copy
1165 to the final location now. */
1166 gassign *asgn = gimple_build_assign (var, neutered_def);
1167 gsi_insert_after (&copyout_gsi, asgn, GSI_CONTINUE_LINKING);
1168 update_stmt (asgn);
1170 else
1172 /* If it's an SSA name, create a new phi at the join node to
1173 represent either the output from the active worker (the
1174 barrier) or the inactive workers (the copyout block). */
1175 gphi *join_phi = create_phi_node (NULL_TREE, exit_block);
1176 create_new_def_for (barrier_def, join_phi,
1177 gimple_phi_result_ptr (join_phi));
1178 add_phi_arg (join_phi, barrier_def, ef2, UNKNOWN_LOCATION);
1179 add_phi_arg (join_phi, neutered_def, copyout_to_exit,
1180 UNKNOWN_LOCATION);
1181 update_stmt (join_phi);
1184 /* Send definition to shared memory block. */
1186 tree sender_ref = build_sender_ref (var, sender_decl, fields);
1188 if (TREE_CODE (var) == SSA_NAME)
1190 gassign *send = gimple_build_assign (sender_ref, var);
1191 gimple_seq_add_stmt (&sender_seq, send);
1192 update_stmt (send);
1194 else if (TREE_CODE (var) == VAR_DECL)
1196 tree tmp = make_ssa_name (TREE_TYPE (var));
1197 gassign *send = gimple_build_assign (tmp, var);
1198 gimple_seq_add_stmt (&sender_seq, send);
1199 update_stmt (send);
1200 send = gimple_build_assign (sender_ref, tmp);
1201 gimple_seq_add_stmt (&sender_seq, send);
1202 update_stmt (send);
1204 else
1205 gcc_unreachable ();
1209 /* It's possible for the ET->DEST block (the work done by the active thread)
1210 to finish with a control-flow insn, e.g. a UNIQUE function call. Split
1211 the block and add SENDER_SEQ in the latter part to avoid having control
1212 flow in the middle of a BB. */
1214 decl = builtin_decl_explicit (BUILT_IN_GOACC_SINGLE_COPY_END);
1215 call = gimple_build_call (decl, 1, build_fold_addr_expr (sender_decl));
1216 gimple_seq_add_stmt (&sender_seq, call);
1218 gsi = gsi_last_bb (body);
1219 gimple *last = gsi_stmt (gsi);
1220 basic_block sender_block = split_block (body, last)->dest;
1221 gsi = gsi_last_bb (sender_block);
1222 gsi_insert_seq_after (&gsi, sender_seq, GSI_CONTINUE_LINKING);
1225 static void
1226 neuter_worker_single (parallel_g *par, unsigned outer_mask,
1227 bitmap worker_single, bitmap vector_single,
1228 vec<propagation_set *> *prop_set,
1229 hash_set<tree> *partitioned_var_uses,
1230 record_field_map_t *record_field_map)
1232 unsigned mask = outer_mask | par->mask;
1234 if ((mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)) == 0)
1236 basic_block block;
1238 for (unsigned i = 0; par->blocks.iterate (i, &block); i++)
1240 bool has_defs = false;
1241 hash_set<tree> def_escapes_block;
1242 hash_set<tree> worker_partitioned_uses;
1243 unsigned j;
1244 tree var;
1246 FOR_EACH_SSA_NAME (j, var, cfun)
1248 if (SSA_NAME_IS_VIRTUAL_OPERAND (var))
1250 has_defs = true;
1251 continue;
1254 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
1256 if (gimple_nop_p (def_stmt))
1257 continue;
1259 if (gimple_bb (def_stmt)->index != block->index)
1260 continue;
1262 gimple *use_stmt;
1263 imm_use_iterator use_iter;
1264 bool uses_outside_block = false;
1265 bool worker_partitioned_use = false;
1267 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, var)
1269 int blocknum = gimple_bb (use_stmt)->index;
1271 /* Don't propagate SSA names that are only used in the
1272 current block, unless the usage is in a phi node: that
1273 means the name left the block, then came back in at the
1274 top. */
1275 if (blocknum != block->index
1276 || gimple_code (use_stmt) == GIMPLE_PHI)
1277 uses_outside_block = true;
1278 if (!bitmap_bit_p (worker_single, blocknum))
1279 worker_partitioned_use = true;
1282 if (uses_outside_block)
1283 def_escapes_block.add (var);
1285 if (worker_partitioned_use)
1287 worker_partitioned_uses.add (var);
1288 has_defs = true;
1292 propagation_set *ws_prop = (*prop_set)[block->index];
1294 if (ws_prop)
1296 for (propagation_set::iterator it = ws_prop->begin ();
1297 it != ws_prop->end ();
1298 ++it)
1300 tree var = *it;
1301 if (TREE_CODE (var) == VAR_DECL)
1303 def_escapes_block.add (var);
1304 if (partitioned_var_uses->contains (var))
1306 worker_partitioned_uses.add (var);
1307 has_defs = true;
1312 delete ws_prop;
1313 (*prop_set)[block->index] = 0;
1316 tree record_type = (tree) block->aux;
1318 if (has_defs)
1319 worker_single_copy (block, block, &def_escapes_block,
1320 &worker_partitioned_uses, record_type,
1321 record_field_map);
1322 else
1323 worker_single_simple (block, block, &def_escapes_block);
1327 if ((outer_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)) == 0)
1329 basic_block block;
1331 for (unsigned i = 0; par->blocks.iterate (i, &block); i++)
1332 for (gimple_stmt_iterator gsi = gsi_start_bb (block);
1333 !gsi_end_p (gsi);
1334 gsi_next (&gsi))
1336 gimple *stmt = gsi_stmt (gsi);
1338 if (gimple_code (stmt) == GIMPLE_CALL
1339 && !gimple_call_internal_p (stmt)
1340 && !omp_sese_active_worker_call (as_a <gcall *> (stmt)))
1342 /* If we have an OpenACC routine call in worker-single mode,
1343 place barriers before and afterwards to prevent
1344 clobbering re-used shared memory regions (as are used
1345 for AMDGCN at present, for example). */
1346 tree decl = builtin_decl_explicit (BUILT_IN_GOACC_BARRIER);
1347 gsi_insert_before (&gsi, gimple_build_call (decl, 0),
1348 GSI_SAME_STMT);
1349 gsi_insert_after (&gsi, gimple_build_call (decl, 0),
1350 GSI_NEW_STMT);
1355 if (par->inner)
1356 neuter_worker_single (par->inner, mask, worker_single, vector_single,
1357 prop_set, partitioned_var_uses, record_field_map);
1358 if (par->next)
1359 neuter_worker_single (par->next, outer_mask, worker_single, vector_single,
1360 prop_set, partitioned_var_uses, record_field_map);
1363 static int
1364 execute_omp_oacc_neuter_broadcast ()
1366 bb_stmt_map_t bb_stmt_map;
1367 auto_bitmap worker_single, vector_single;
1369 omp_sese_split_blocks (&bb_stmt_map);
1371 if (dump_file)
1373 fprintf (dump_file, "\n\nAfter splitting:\n\n");
1374 dump_function_to_file (current_function_decl, dump_file, dump_flags);
1377 unsigned mask = 0;
1379 /* If this is a routine, calculate MASK as if the outer levels are already
1380 partitioned. */
1381 tree attr = oacc_get_fn_attrib (current_function_decl);
1382 if (attr)
1384 tree dims = TREE_VALUE (attr);
1385 unsigned ix;
1386 for (ix = 0; ix != GOMP_DIM_MAX; ix++, dims = TREE_CHAIN (dims))
1388 tree allowed = TREE_PURPOSE (dims);
1389 if (allowed && integer_zerop (allowed))
1390 mask |= GOMP_DIM_MASK (ix);
1394 parallel_g *par = omp_sese_discover_pars (&bb_stmt_map);
1395 populate_single_mode_bitmaps (par, worker_single, vector_single, mask, 0);
1397 basic_block bb;
1398 FOR_ALL_BB_FN (bb, cfun)
1399 bb->aux = NULL;
1401 vec<propagation_set *> prop_set (vNULL);
1402 prop_set.safe_grow_cleared (last_basic_block_for_fn (cfun), true);
1404 find_ssa_names_to_propagate (par, mask, worker_single, vector_single,
1405 &prop_set);
1407 hash_set<tree> partitioned_var_uses;
1408 hash_set<tree> gang_private_vars;
1410 find_gang_private_vars (&gang_private_vars);
1411 find_partitioned_var_uses (par, mask, &partitioned_var_uses);
1412 find_local_vars_to_propagate (par, mask, &partitioned_var_uses,
1413 &gang_private_vars, &prop_set);
1415 record_field_map_t record_field_map;
1417 FOR_ALL_BB_FN (bb, cfun)
1419 propagation_set *ws_prop = prop_set[bb->index];
1420 if (ws_prop)
1422 tree record_type = lang_hooks.types.make_type (RECORD_TYPE);
1423 tree name = create_tmp_var_name (".oacc_ws_data_s");
1424 name = build_decl (UNKNOWN_LOCATION, TYPE_DECL, name, record_type);
1425 DECL_ARTIFICIAL (name) = 1;
1426 DECL_NAMELESS (name) = 1;
1427 TYPE_NAME (record_type) = name;
1428 TYPE_ARTIFICIAL (record_type) = 1;
1430 auto_vec<tree> field_vec (ws_prop->elements ());
1431 for (hash_set<tree>::iterator it = ws_prop->begin ();
1432 it != ws_prop->end (); ++it)
1433 field_vec.quick_push (*it);
1435 field_vec.qsort (sort_by_size_then_ssa_version_or_uid);
1437 field_map_t *fields = new field_map_t;
1439 bool existed;
1440 existed = record_field_map.put (record_type, fields);
1441 gcc_checking_assert (!existed);
1443 /* Insert var fields in reverse order, so the last inserted element
1444 is the first in the structure. */
1445 for (int i = field_vec.length () - 1; i >= 0; i--)
1446 install_var_field (field_vec[i], record_type, fields);
1448 layout_type (record_type);
1450 bb->aux = (tree) record_type;
1454 neuter_worker_single (par, mask, worker_single, vector_single, &prop_set,
1455 &partitioned_var_uses, &record_field_map);
1457 for (auto it : record_field_map)
1458 delete it.second;
1459 record_field_map.empty ();
1461 /* These are supposed to have been 'delete'd by 'neuter_worker_single'. */
1462 for (auto it : prop_set)
1463 gcc_checking_assert (!it);
1464 prop_set.release ();
1466 delete par;
1468 /* This doesn't seem to make a difference. */
1469 loops_state_clear (LOOP_CLOSED_SSA);
1471 /* Neutering worker-single neutered blocks will invalidate dominance info.
1472 It may be possible to incrementally update just the affected blocks, but
1473 obliterate everything for now. */
1474 free_dominance_info (CDI_DOMINATORS);
1475 free_dominance_info (CDI_POST_DOMINATORS);
1477 if (dump_file)
1479 fprintf (dump_file, "\n\nAfter neutering:\n\n");
1480 dump_function_to_file (current_function_decl, dump_file, dump_flags);
1483 return 0;
1486 namespace {
1488 const pass_data pass_data_omp_oacc_neuter_broadcast =
1490 GIMPLE_PASS, /* type */
1491 "omp_oacc_neuter_broadcast", /* name */
1492 OPTGROUP_OMP, /* optinfo_flags */
1493 TV_NONE, /* tv_id */
1494 PROP_cfg, /* properties_required */
1495 0, /* properties_provided */
1496 0, /* properties_destroyed */
1497 0, /* todo_flags_start */
1498 TODO_update_ssa | TODO_cleanup_cfg, /* todo_flags_finish */
1501 class pass_omp_oacc_neuter_broadcast : public gimple_opt_pass
1503 public:
1504 pass_omp_oacc_neuter_broadcast (gcc::context *ctxt)
1505 : gimple_opt_pass (pass_data_omp_oacc_neuter_broadcast, ctxt)
1508 /* opt_pass methods: */
1509 virtual bool gate (function *)
1511 return (flag_openacc
1512 && targetm.goacc.create_worker_broadcast_record);
1515 virtual unsigned int execute (function *)
1517 return execute_omp_oacc_neuter_broadcast ();
1520 }; // class pass_omp_oacc_neuter_broadcast
1522 } // anon namespace
1524 gimple_opt_pass *
1525 make_pass_omp_oacc_neuter_broadcast (gcc::context *ctxt)
1527 return new pass_omp_oacc_neuter_broadcast (ctxt);