1 /* OpenACC worker partitioning via middle end neutering/broadcasting scheme
3 Copyright (C) 2015-2021 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
28 #include "tree-pass.h"
31 #include "pretty-print.h"
32 #include "fold-const.h"
34 #include "gimple-iterator.h"
35 #include "gimple-walk.h"
36 #include "tree-inline.h"
37 #include "langhooks.h"
38 #include "omp-general.h"
40 #include "gimple-pretty-print.h"
42 #include "insn-config.h"
44 #include "internal-fn.h"
46 #include "tree-nested.h"
47 #include "stor-layout.h"
48 #include "tree-ssa-threadupdate.h"
49 #include "tree-into-ssa.h"
50 #include "splay-tree.h"
54 #include "omp-offload.h"
57 /* Loop structure of the function. The entire function is described as
59 /* Adapted from 'gcc/config/nvptx/nvptx.c:struct parallel'. */
63 /* Parent parallel. */
66 /* Next sibling parallel. */
69 /* First child parallel. */
72 /* Partitioning mask of the parallel. */
75 /* Partitioning used within inner parallels. */
78 /* Location of parallel forked and join. The forked is the first
79 block in the parallel and the join is the first block after of
81 basic_block forked_block
;
82 basic_block join_block
;
90 /* Basic blocks in this parallel, but not in child parallels. The
91 FORKED and JOINING blocks are in the partition. The FORK and JOIN
93 auto_vec
<basic_block
> blocks
;
100 parallel_g (parallel_g
*parent
, unsigned mode
);
104 /* Constructor links the new parallel into it's parent's chain of
107 parallel_g::parallel_g (parallel_g
*parent_
, unsigned mask_
)
108 :parent (parent_
), next (0), inner (0), mask (mask_
), inner_mask (0)
110 forked_block
= join_block
= 0;
111 forked_stmt
= join_stmt
= NULL
;
112 fork_stmt
= joining_stmt
= NULL
;
114 record_type
= NULL_TREE
;
115 sender_decl
= NULL_TREE
;
116 receiver_decl
= NULL_TREE
;
120 next
= parent
->inner
;
121 parent
->inner
= this;
125 parallel_g::~parallel_g ()
132 local_var_based_p (tree decl
)
134 switch (TREE_CODE (decl
))
137 return !is_global_var (decl
);
142 return local_var_based_p (TREE_OPERAND (decl
, 0));
149 /* Map of basic blocks to gimple stmts. */
150 typedef hash_map
<basic_block
, gimple
*> bb_stmt_map_t
;
152 /* Calls to OpenACC routines are made by all workers/wavefronts/warps, since
153 the routine likely contains partitioned loops (else will do its own
154 neutering and variable propagation). Return TRUE if a function call CALL
155 should be made in (worker) single mode instead, rather than redundant
159 omp_sese_active_worker_call (gcall
*call
)
161 #define GOMP_DIM_SEQ GOMP_DIM_MAX
162 tree fndecl
= gimple_call_fndecl (call
);
167 tree attrs
= oacc_get_fn_attrib (fndecl
);
172 int level
= oacc_fn_attrib_level (attrs
);
174 /* Neither regular functions nor "seq" routines should be run by all threads
175 in worker-single mode. */
176 return level
== -1 || level
== GOMP_DIM_SEQ
;
180 /* Split basic blocks such that each forked and join unspecs are at
181 the start of their basic blocks. Thus afterwards each block will
182 have a single partitioning mode. We also do the same for return
183 insns, as they are executed by every thread. Return the
184 partitioning mode of the function as a whole. Populate MAP with
185 head and tail blocks. We also clear the BB visited flag, which is
186 used when finding partitions. */
187 /* Adapted from 'gcc/config/nvptx/nvptx.c:nvptx_split_blocks'. */
190 omp_sese_split_blocks (bb_stmt_map_t
*map
)
192 auto_vec
<gimple
*> worklist
;
195 /* Locate all the reorg instructions of interest. */
196 FOR_ALL_BB_FN (block
, cfun
)
198 /* Clear visited flag, for use by parallel locator */
199 block
->flags
&= ~BB_VISITED
;
201 for (gimple_stmt_iterator gsi
= gsi_start_bb (block
);
205 gimple
*stmt
= gsi_stmt (gsi
);
207 if (gimple_call_internal_p (stmt
, IFN_UNIQUE
))
209 enum ifn_unique_kind k
= ((enum ifn_unique_kind
)
210 TREE_INT_CST_LOW (gimple_call_arg (stmt
, 0)));
212 if (k
== IFN_UNIQUE_OACC_JOIN
)
213 worklist
.safe_push (stmt
);
214 else if (k
== IFN_UNIQUE_OACC_FORK
)
216 gcc_assert (gsi_one_before_end_p (gsi
));
217 basic_block forked_block
= single_succ (block
);
218 gimple_stmt_iterator gsi2
= gsi_start_bb (forked_block
);
220 /* We push a NOP as a placeholder for the "forked" stmt.
221 This is then recognized in omp_sese_find_par. */
222 gimple
*nop
= gimple_build_nop ();
223 gsi_insert_before (&gsi2
, nop
, GSI_SAME_STMT
);
225 worklist
.safe_push (nop
);
228 else if (gimple_code (stmt
) == GIMPLE_RETURN
229 || gimple_code (stmt
) == GIMPLE_COND
230 || gimple_code (stmt
) == GIMPLE_SWITCH
231 || (gimple_code (stmt
) == GIMPLE_CALL
232 && !gimple_call_internal_p (stmt
)
233 && !omp_sese_active_worker_call (as_a
<gcall
*> (stmt
))))
234 worklist
.safe_push (stmt
);
235 else if (is_gimple_assign (stmt
))
237 tree lhs
= gimple_assign_lhs (stmt
);
239 /* Force assignments to components/fields/elements of local
240 aggregates into fully-partitioned (redundant) mode. This
241 avoids having to broadcast the whole aggregate. The RHS of
242 the assignment will be propagated using the normal
245 switch (TREE_CODE (lhs
))
251 tree aggr
= TREE_OPERAND (lhs
, 0);
253 if (local_var_based_p (aggr
))
254 worklist
.safe_push (stmt
);
265 /* Split blocks on the worklist. */
269 for (ix
= 0; worklist
.iterate (ix
, &stmt
); ix
++)
271 basic_block block
= gimple_bb (stmt
);
273 if (gimple_code (stmt
) == GIMPLE_COND
)
275 gcond
*orig_cond
= as_a
<gcond
*> (stmt
);
276 tree_code code
= gimple_expr_code (orig_cond
);
277 tree pred
= make_ssa_name (boolean_type_node
);
278 gimple
*asgn
= gimple_build_assign (pred
, code
,
279 gimple_cond_lhs (orig_cond
),
280 gimple_cond_rhs (orig_cond
));
282 = gimple_build_cond (NE_EXPR
, pred
, boolean_false_node
,
283 gimple_cond_true_label (orig_cond
),
284 gimple_cond_false_label (orig_cond
));
286 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
287 gsi_insert_before (&gsi
, asgn
, GSI_SAME_STMT
);
288 gsi_replace (&gsi
, new_cond
, true);
290 edge e
= split_block (block
, asgn
);
292 map
->get_or_insert (block
) = new_cond
;
294 else if ((gimple_code (stmt
) == GIMPLE_CALL
295 && !gimple_call_internal_p (stmt
))
296 || is_gimple_assign (stmt
))
298 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
301 edge call
= split_block (block
, gsi_stmt (gsi
));
303 gimple
*call_stmt
= gsi_stmt (gsi_start_bb (call
->dest
));
305 edge call_to_ret
= split_block (call
->dest
, call_stmt
);
307 map
->get_or_insert (call_to_ret
->src
) = call_stmt
;
311 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
315 map
->get_or_insert (block
) = stmt
;
318 /* Split block before insn. The insn is in the new block. */
319 edge e
= split_block (block
, gsi_stmt (gsi
));
322 map
->get_or_insert (block
) = stmt
;
329 mask_name (unsigned mask
)
333 case 0: return "gang redundant";
334 case 1: return "gang partitioned";
335 case 2: return "worker partitioned";
336 case 3: return "gang+worker partitioned";
337 case 4: return "vector partitioned";
338 case 5: return "gang+vector partitioned";
339 case 6: return "worker+vector partitioned";
340 case 7: return "fully partitioned";
341 default: return "<illegal>";
345 /* Dump this parallel and all its inner parallels. */
346 /* Adapted from 'gcc/config/nvptx/nvptx.c:nvptx_dump_pars'. */
349 omp_sese_dump_pars (parallel_g
*par
, unsigned depth
)
351 fprintf (dump_file
, "%u: mask %d (%s) head=%d, tail=%d\n",
352 depth
, par
->mask
, mask_name (par
->mask
),
353 par
->forked_block
? par
->forked_block
->index
: -1,
354 par
->join_block
? par
->join_block
->index
: -1);
356 fprintf (dump_file
, " blocks:");
359 for (unsigned ix
= 0; par
->blocks
.iterate (ix
, &block
); ix
++)
360 fprintf (dump_file
, " %d", block
->index
);
361 fprintf (dump_file
, "\n");
363 omp_sese_dump_pars (par
->inner
, depth
+ 1);
366 omp_sese_dump_pars (par
->next
, depth
);
369 /* If BLOCK contains a fork/join marker, process it to create or
370 terminate a loop structure. Add this block to the current loop,
371 and then walk successor blocks. */
372 /* Adapted from 'gcc/config/nvptx/nvptx.c:nvptx_find_par'. */
375 omp_sese_find_par (bb_stmt_map_t
*map
, parallel_g
*par
, basic_block block
)
377 if (block
->flags
& BB_VISITED
)
379 block
->flags
|= BB_VISITED
;
381 if (gimple
**stmtp
= map
->get (block
))
383 gimple
*stmt
= *stmtp
;
385 if (gimple_code (stmt
) == GIMPLE_COND
386 || gimple_code (stmt
) == GIMPLE_SWITCH
387 || gimple_code (stmt
) == GIMPLE_RETURN
388 || (gimple_code (stmt
) == GIMPLE_CALL
389 && !gimple_call_internal_p (stmt
))
390 || is_gimple_assign (stmt
))
392 /* A single block that is forced to be at the maximum partition
393 level. Make a singleton par for it. */
394 par
= new parallel_g (par
, GOMP_DIM_MASK (GOMP_DIM_GANG
)
395 | GOMP_DIM_MASK (GOMP_DIM_WORKER
)
396 | GOMP_DIM_MASK (GOMP_DIM_VECTOR
));
397 par
->forked_block
= block
;
398 par
->forked_stmt
= stmt
;
399 par
->blocks
.safe_push (block
);
401 goto walk_successors
;
403 else if (gimple_nop_p (stmt
))
405 basic_block pred
= single_pred (block
);
407 gimple_stmt_iterator gsi
= gsi_last_bb (pred
);
408 gimple
*final_stmt
= gsi_stmt (gsi
);
410 if (gimple_call_internal_p (final_stmt
, IFN_UNIQUE
))
412 gcall
*call
= as_a
<gcall
*> (final_stmt
);
413 enum ifn_unique_kind k
= ((enum ifn_unique_kind
)
414 TREE_INT_CST_LOW (gimple_call_arg (call
, 0)));
416 if (k
== IFN_UNIQUE_OACC_FORK
)
419 = TREE_INT_CST_LOW (gimple_call_arg (call
, 2));
420 unsigned mask
= (dim
>= 0) ? GOMP_DIM_MASK (dim
) : 0;
422 par
= new parallel_g (par
, mask
);
423 par
->forked_block
= block
;
424 par
->forked_stmt
= final_stmt
;
425 par
->fork_stmt
= stmt
;
433 else if (gimple_call_internal_p (stmt
, IFN_UNIQUE
))
435 gcall
*call
= as_a
<gcall
*> (stmt
);
436 enum ifn_unique_kind k
= ((enum ifn_unique_kind
)
437 TREE_INT_CST_LOW (gimple_call_arg (call
, 0)));
438 if (k
== IFN_UNIQUE_OACC_JOIN
)
440 HOST_WIDE_INT dim
= TREE_INT_CST_LOW (gimple_call_arg (stmt
, 2));
441 unsigned mask
= (dim
>= 0) ? GOMP_DIM_MASK (dim
) : 0;
443 gcc_assert (par
->mask
== mask
);
444 par
->join_block
= block
;
445 par
->join_stmt
= stmt
;
456 /* Add this block onto the current loop's list of blocks. */
457 par
->blocks
.safe_push (block
);
459 /* This must be the entry block. Create a NULL parallel. */
460 par
= new parallel_g (0, 0);
463 /* Walk successor blocks. */
467 FOR_EACH_EDGE (e
, ei
, block
->succs
)
468 omp_sese_find_par (map
, par
, e
->dest
);
473 /* DFS walk the CFG looking for fork & join markers. Construct
474 loop structures as we go. MAP is a mapping of basic blocks
475 to head & tail markers, discovered when splitting blocks. This
476 speeds up the discovery. We rely on the BB visited flag having
477 been cleared when splitting blocks. */
478 /* Adapted from 'gcc/config/nvptx/nvptx.c:nvptx_discover_pars'. */
481 omp_sese_discover_pars (bb_stmt_map_t
*map
)
485 /* Mark exit blocks as visited. */
486 block
= EXIT_BLOCK_PTR_FOR_FN (cfun
);
487 block
->flags
|= BB_VISITED
;
489 /* And entry block as not. */
490 block
= ENTRY_BLOCK_PTR_FOR_FN (cfun
);
491 block
->flags
&= ~BB_VISITED
;
493 parallel_g
*par
= omp_sese_find_par (map
, 0, block
);
497 fprintf (dump_file
, "\nLoops\n");
498 omp_sese_dump_pars (par
, 0);
499 fprintf (dump_file
, "\n");
506 populate_single_mode_bitmaps (parallel_g
*par
, bitmap worker_single
,
507 bitmap vector_single
, unsigned outer_mask
,
510 unsigned mask
= outer_mask
| par
->mask
;
514 for (unsigned i
= 0; par
->blocks
.iterate (i
, &block
); i
++)
516 if ((mask
& GOMP_DIM_MASK (GOMP_DIM_WORKER
)) == 0)
517 bitmap_set_bit (worker_single
, block
->index
);
519 if ((mask
& GOMP_DIM_MASK (GOMP_DIM_VECTOR
)) == 0)
520 bitmap_set_bit (vector_single
, block
->index
);
524 populate_single_mode_bitmaps (par
->inner
, worker_single
, vector_single
,
527 populate_single_mode_bitmaps (par
->next
, worker_single
, vector_single
,
531 /* A map from SSA names or var decls to record fields. */
533 typedef hash_map
<tree
, tree
> field_map_t
;
535 /* For each propagation record type, this is a map from SSA names or var decls
536 to propagate, to the field in the record type that should be used for
537 transmission and reception. */
539 typedef hash_map
<tree
, field_map_t
*> record_field_map_t
;
542 install_var_field (tree var
, tree record_type
, field_map_t
*fields
)
547 if (TREE_CODE (var
) == SSA_NAME
)
549 name
= SSA_NAME_IDENTIFIER (var
);
552 sprintf (tmp
, "_%u", (unsigned) SSA_NAME_VERSION (var
));
553 name
= get_identifier (tmp
);
556 else if (TREE_CODE (var
) == VAR_DECL
)
558 name
= DECL_NAME (var
);
561 sprintf (tmp
, "D_%u", (unsigned) DECL_UID (var
));
562 name
= get_identifier (tmp
);
568 gcc_assert (!fields
->get (var
));
570 tree type
= TREE_TYPE (var
);
572 if (POINTER_TYPE_P (type
)
573 && TYPE_RESTRICT (type
))
574 type
= build_qualified_type (type
, TYPE_QUALS (type
) & ~TYPE_QUAL_RESTRICT
);
576 tree field
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
, name
, type
);
578 if (TREE_CODE (var
) == VAR_DECL
&& type
== TREE_TYPE (var
))
580 SET_DECL_ALIGN (field
, DECL_ALIGN (var
));
581 DECL_USER_ALIGN (field
) = DECL_USER_ALIGN (var
);
582 TREE_THIS_VOLATILE (field
) = TREE_THIS_VOLATILE (var
);
585 SET_DECL_ALIGN (field
, TYPE_ALIGN (type
));
587 fields
->put (var
, field
);
589 insert_field_into_struct (record_type
, field
);
592 /* Sets of SSA_NAMES or VAR_DECLs to propagate. */
593 typedef hash_set
<tree
> propagation_set
;
596 find_ssa_names_to_propagate (parallel_g
*par
, unsigned outer_mask
,
597 bitmap worker_single
, bitmap vector_single
,
598 vec
<propagation_set
*> *prop_set
)
600 unsigned mask
= outer_mask
| par
->mask
;
603 find_ssa_names_to_propagate (par
->inner
, mask
, worker_single
,
604 vector_single
, prop_set
);
606 find_ssa_names_to_propagate (par
->next
, outer_mask
, worker_single
,
607 vector_single
, prop_set
);
609 if (mask
& GOMP_DIM_MASK (GOMP_DIM_WORKER
))
614 for (ix
= 0; par
->blocks
.iterate (ix
, &block
); ix
++)
616 for (gphi_iterator psi
= gsi_start_phis (block
);
617 !gsi_end_p (psi
); gsi_next (&psi
))
619 gphi
*phi
= psi
.phi ();
623 FOR_EACH_PHI_ARG (use
, phi
, iter
, SSA_OP_USE
)
625 tree var
= USE_FROM_PTR (use
);
627 if (TREE_CODE (var
) != SSA_NAME
)
630 gimple
*def_stmt
= SSA_NAME_DEF_STMT (var
);
632 if (gimple_nop_p (def_stmt
))
635 basic_block def_bb
= gimple_bb (def_stmt
);
637 if (bitmap_bit_p (worker_single
, def_bb
->index
))
639 if (!(*prop_set
)[def_bb
->index
])
640 (*prop_set
)[def_bb
->index
] = new propagation_set
;
642 propagation_set
*ws_prop
= (*prop_set
)[def_bb
->index
];
649 for (gimple_stmt_iterator gsi
= gsi_start_bb (block
);
650 !gsi_end_p (gsi
); gsi_next (&gsi
))
654 gimple
*stmt
= gsi_stmt (gsi
);
656 FOR_EACH_SSA_USE_OPERAND (use
, stmt
, iter
, SSA_OP_USE
)
658 tree var
= USE_FROM_PTR (use
);
660 gimple
*def_stmt
= SSA_NAME_DEF_STMT (var
);
662 if (gimple_nop_p (def_stmt
))
665 basic_block def_bb
= gimple_bb (def_stmt
);
667 if (bitmap_bit_p (worker_single
, def_bb
->index
))
669 if (!(*prop_set
)[def_bb
->index
])
670 (*prop_set
)[def_bb
->index
] = new propagation_set
;
672 propagation_set
*ws_prop
= (*prop_set
)[def_bb
->index
];
682 /* Callback for walk_gimple_stmt to find RHS VAR_DECLs (uses) in a
686 find_partitioned_var_uses_1 (tree
*node
, int *, void *data
)
688 walk_stmt_info
*wi
= (walk_stmt_info
*) data
;
689 hash_set
<tree
> *partitioned_var_uses
= (hash_set
<tree
> *) wi
->info
;
691 if (!wi
->is_lhs
&& VAR_P (*node
))
692 partitioned_var_uses
->add (*node
);
698 find_partitioned_var_uses (parallel_g
*par
, unsigned outer_mask
,
699 hash_set
<tree
> *partitioned_var_uses
)
701 unsigned mask
= outer_mask
| par
->mask
;
704 find_partitioned_var_uses (par
->inner
, mask
, partitioned_var_uses
);
706 find_partitioned_var_uses (par
->next
, outer_mask
, partitioned_var_uses
);
708 if (mask
& GOMP_DIM_MASK (GOMP_DIM_WORKER
))
713 for (ix
= 0; par
->blocks
.iterate (ix
, &block
); ix
++)
714 for (gimple_stmt_iterator gsi
= gsi_start_bb (block
);
715 !gsi_end_p (gsi
); gsi_next (&gsi
))
718 memset (&wi
, 0, sizeof (wi
));
719 wi
.info
= (void *) partitioned_var_uses
;
720 walk_gimple_stmt (&gsi
, NULL
, find_partitioned_var_uses_1
, &wi
);
725 /* Gang-private variables (typically placed in a GPU's shared memory) do not
726 need to be processed by the worker-propagation mechanism. Populate the
727 GANG_PRIVATE_VARS set with any such variables found in the current
731 find_gang_private_vars (hash_set
<tree
> *gang_private_vars
)
735 FOR_EACH_BB_FN (block
, cfun
)
737 for (gimple_stmt_iterator gsi
= gsi_start_bb (block
);
741 gimple
*stmt
= gsi_stmt (gsi
);
743 if (gimple_call_internal_p (stmt
, IFN_UNIQUE
))
745 enum ifn_unique_kind k
= ((enum ifn_unique_kind
)
746 TREE_INT_CST_LOW (gimple_call_arg (stmt
, 0)));
747 if (k
== IFN_UNIQUE_OACC_PRIVATE
)
750 = TREE_INT_CST_LOW (gimple_call_arg (stmt
, 2));
751 if (level
!= GOMP_DIM_GANG
)
753 for (unsigned i
= 3; i
< gimple_call_num_args (stmt
); i
++)
755 tree arg
= gimple_call_arg (stmt
, i
);
756 gcc_assert (TREE_CODE (arg
) == ADDR_EXPR
);
757 tree decl
= TREE_OPERAND (arg
, 0);
758 gang_private_vars
->add (decl
);
767 find_local_vars_to_propagate (parallel_g
*par
, unsigned outer_mask
,
768 hash_set
<tree
> *partitioned_var_uses
,
769 hash_set
<tree
> *gang_private_vars
,
770 vec
<propagation_set
*> *prop_set
)
772 unsigned mask
= outer_mask
| par
->mask
;
775 find_local_vars_to_propagate (par
->inner
, mask
, partitioned_var_uses
,
776 gang_private_vars
, prop_set
);
778 find_local_vars_to_propagate (par
->next
, outer_mask
, partitioned_var_uses
,
779 gang_private_vars
, prop_set
);
781 if (!(mask
& GOMP_DIM_MASK (GOMP_DIM_WORKER
)))
786 for (ix
= 0; par
->blocks
.iterate (ix
, &block
); ix
++)
788 for (gimple_stmt_iterator gsi
= gsi_start_bb (block
);
789 !gsi_end_p (gsi
); gsi_next (&gsi
))
791 gimple
*stmt
= gsi_stmt (gsi
);
795 FOR_EACH_LOCAL_DECL (cfun
, i
, var
)
798 || is_global_var (var
)
799 || AGGREGATE_TYPE_P (TREE_TYPE (var
))
800 || !partitioned_var_uses
->contains (var
)
801 || gang_private_vars
->contains (var
))
804 if (stmt_may_clobber_ref_p (stmt
, var
))
808 fprintf (dump_file
, "bb %u: local variable may be "
809 "clobbered in %s mode: ", block
->index
,
811 print_generic_expr (dump_file
, var
, TDF_SLIM
);
812 fprintf (dump_file
, "\n");
815 if (!(*prop_set
)[block
->index
])
816 (*prop_set
)[block
->index
] = new propagation_set
;
818 propagation_set
*ws_prop
819 = (*prop_set
)[block
->index
];
829 /* Transform basic blocks FROM, TO (which may be the same block) into:
830 if (GOACC_single_start ())
835 | | (new) predicate block
838 +----+ +----+ +----+ |
839 | | | | ===> | | | f (old) from block
840 +----+ +----+ +----+ |
843 (split (split before | | skip block
844 at end) condition) +----+
849 worker_single_simple (basic_block from
, basic_block to
,
850 hash_set
<tree
> *def_escapes_block
)
854 basic_block skip_block
;
856 gimple_stmt_iterator gsi
= gsi_last_bb (to
);
857 if (EDGE_COUNT (to
->succs
) > 1)
859 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_COND
);
862 edge e
= split_block (to
, gsi_stmt (gsi
));
863 skip_block
= e
->dest
;
865 gimple_stmt_iterator start
= gsi_after_labels (from
);
867 decl
= builtin_decl_explicit (BUILT_IN_GOACC_SINGLE_START
);
868 lhs
= create_tmp_var (TREE_TYPE (TREE_TYPE (decl
)));
869 call
= gimple_build_call (decl
, 0);
870 gimple_call_set_lhs (call
, lhs
);
871 gsi_insert_before (&start
, call
, GSI_NEW_STMT
);
874 cond
= gimple_build_cond (EQ_EXPR
, lhs
,
875 fold_convert_loc (UNKNOWN_LOCATION
,
878 NULL_TREE
, NULL_TREE
);
879 gsi_insert_after (&start
, cond
, GSI_NEW_STMT
);
882 edge et
= split_block (from
, cond
);
883 et
->flags
&= ~EDGE_FALLTHRU
;
884 et
->flags
|= EDGE_TRUE_VALUE
;
885 /* Make the active worker the more probable path so we prefer fallthrough
886 (letting the idle workers jump around more). */
887 et
->probability
= profile_probability::likely ();
889 edge ef
= make_edge (from
, skip_block
, EDGE_FALSE_VALUE
);
890 ef
->probability
= et
->probability
.invert ();
892 basic_block neutered
= split_edge (ef
);
893 gimple_stmt_iterator neut_gsi
= gsi_last_bb (neutered
);
895 for (gsi
= gsi_start_bb (et
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
897 gimple
*stmt
= gsi_stmt (gsi
);
901 FOR_EACH_SSA_TREE_OPERAND (var
, stmt
, iter
, SSA_OP_DEF
)
903 if (def_escapes_block
->contains (var
))
905 gphi
*join_phi
= create_phi_node (NULL_TREE
, skip_block
);
906 create_new_def_for (var
, join_phi
,
907 gimple_phi_result_ptr (join_phi
));
908 add_phi_arg (join_phi
, var
, e
, UNKNOWN_LOCATION
);
910 tree neutered_def
= copy_ssa_name (var
, NULL
);
911 /* We really want "don't care" or some value representing
912 undefined here, but optimizers will probably get rid of the
913 zero-assignments anyway. */
914 gassign
*zero
= gimple_build_assign (neutered_def
,
915 build_zero_cst (TREE_TYPE (neutered_def
)));
917 gsi_insert_after (&neut_gsi
, zero
, GSI_CONTINUE_LINKING
);
920 add_phi_arg (join_phi
, neutered_def
, single_succ_edge (neutered
),
922 update_stmt (join_phi
);
927 gsi
= gsi_start_bb (skip_block
);
929 decl
= builtin_decl_explicit (BUILT_IN_GOACC_BARRIER
);
930 gimple
*acc_bar
= gimple_build_call (decl
, 0);
932 gsi_insert_before (&gsi
, acc_bar
, GSI_SAME_STMT
);
933 update_stmt (acc_bar
);
936 /* Build COMPONENT_REF and set TREE_THIS_VOLATILE and TREE_READONLY on it
938 /* Adapted from 'gcc/omp-low.c:omp_build_component_ref'. */
941 oacc_build_component_ref (tree obj
, tree field
)
943 tree field_type
= TREE_TYPE (field
);
944 tree obj_type
= TREE_TYPE (obj
);
945 if (!ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (obj_type
)))
946 field_type
= build_qualified_type
948 KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (obj_type
)));
950 tree ret
= build3 (COMPONENT_REF
, field_type
, obj
, field
, NULL
);
951 if (TREE_THIS_VOLATILE (field
))
952 TREE_THIS_VOLATILE (ret
) |= 1;
953 if (TREE_READONLY (field
))
954 TREE_READONLY (ret
) |= 1;
959 build_receiver_ref (tree var
, tree receiver_decl
, field_map_t
*fields
)
961 tree x
= build_simple_mem_ref (receiver_decl
);
962 tree field
= *fields
->get (var
);
963 TREE_THIS_NOTRAP (x
) = 1;
964 x
= oacc_build_component_ref (x
, field
);
969 build_sender_ref (tree var
, tree sender_decl
, field_map_t
*fields
)
971 tree field
= *fields
->get (var
);
972 return oacc_build_component_ref (sender_decl
, field
);
976 sort_by_ssa_version_or_uid (const void *p1
, const void *p2
)
978 const tree t1
= *(const tree
*)p1
;
979 const tree t2
= *(const tree
*)p2
;
981 if (TREE_CODE (t1
) == SSA_NAME
&& TREE_CODE (t2
) == SSA_NAME
)
982 return SSA_NAME_VERSION (t1
) - SSA_NAME_VERSION (t2
);
983 else if (TREE_CODE (t1
) == SSA_NAME
&& TREE_CODE (t2
) != SSA_NAME
)
985 else if (TREE_CODE (t1
) != SSA_NAME
&& TREE_CODE (t2
) == SSA_NAME
)
988 return DECL_UID (t1
) - DECL_UID (t2
);
992 sort_by_size_then_ssa_version_or_uid (const void *p1
, const void *p2
)
994 const tree t1
= *(const tree
*)p1
;
995 const tree t2
= *(const tree
*)p2
;
996 unsigned HOST_WIDE_INT s1
= tree_to_uhwi (TYPE_SIZE (TREE_TYPE (t1
)));
997 unsigned HOST_WIDE_INT s2
= tree_to_uhwi (TYPE_SIZE (TREE_TYPE (t2
)));
1001 return sort_by_ssa_version_or_uid (p1
, p2
);
1005 worker_single_copy (basic_block from
, basic_block to
,
1006 hash_set
<tree
> *def_escapes_block
,
1007 hash_set
<tree
> *worker_partitioned_uses
,
1008 tree record_type
, record_field_map_t
*record_field_map
)
1010 /* If we only have virtual defs, we'll have no record type, but we still want
1011 to emit single_copy_start and (particularly) single_copy_end to act as
1012 a vdef source on the neutered edge representing memory writes on the
1013 non-neutered edge. */
1015 record_type
= char_type_node
;
1018 = targetm
.goacc
.create_worker_broadcast_record (record_type
, true,
1021 = targetm
.goacc
.create_worker_broadcast_record (record_type
, false,
1024 gimple_stmt_iterator gsi
= gsi_last_bb (to
);
1025 if (EDGE_COUNT (to
->succs
) > 1)
1027 edge e
= split_block (to
, gsi_stmt (gsi
));
1028 basic_block barrier_block
= e
->dest
;
1030 gimple_stmt_iterator start
= gsi_after_labels (from
);
1032 tree decl
= builtin_decl_explicit (BUILT_IN_GOACC_SINGLE_COPY_START
);
1034 tree lhs
= create_tmp_var (TREE_TYPE (TREE_TYPE (decl
)));
1036 gimple
*call
= gimple_build_call (decl
, 1,
1037 build_fold_addr_expr (sender_decl
));
1038 gimple_call_set_lhs (call
, lhs
);
1039 gsi_insert_before (&start
, call
, GSI_NEW_STMT
);
1042 tree conv_tmp
= make_ssa_name (TREE_TYPE (receiver_decl
));
1044 gimple
*conv
= gimple_build_assign (conv_tmp
,
1045 fold_convert (TREE_TYPE (receiver_decl
),
1048 gsi_insert_after (&start
, conv
, GSI_NEW_STMT
);
1049 gimple
*asgn
= gimple_build_assign (receiver_decl
, conv_tmp
);
1050 gsi_insert_after (&start
, asgn
, GSI_NEW_STMT
);
1053 tree zero_ptr
= build_int_cst (TREE_TYPE (receiver_decl
), 0);
1055 tree recv_tmp
= make_ssa_name (TREE_TYPE (receiver_decl
));
1056 asgn
= gimple_build_assign (recv_tmp
, receiver_decl
);
1057 gsi_insert_after (&start
, asgn
, GSI_NEW_STMT
);
1060 gimple
*cond
= gimple_build_cond (EQ_EXPR
, recv_tmp
, zero_ptr
, NULL_TREE
,
1064 gsi_insert_after (&start
, cond
, GSI_NEW_STMT
);
1066 edge et
= split_block (from
, cond
);
1067 et
->flags
&= ~EDGE_FALLTHRU
;
1068 et
->flags
|= EDGE_TRUE_VALUE
;
1069 /* Make the active worker the more probable path so we prefer fallthrough
1070 (letting the idle workers jump around more). */
1071 et
->probability
= profile_probability::likely ();
1073 basic_block body
= et
->dest
;
1075 edge ef
= make_edge (from
, barrier_block
, EDGE_FALSE_VALUE
);
1076 ef
->probability
= et
->probability
.invert ();
1078 decl
= builtin_decl_explicit (BUILT_IN_GOACC_BARRIER
);
1079 gimple
*acc_bar
= gimple_build_call (decl
, 0);
1081 gimple_stmt_iterator bar_gsi
= gsi_start_bb (barrier_block
);
1082 gsi_insert_before (&bar_gsi
, acc_bar
, GSI_NEW_STMT
);
1084 cond
= gimple_build_cond (NE_EXPR
, recv_tmp
, zero_ptr
, NULL_TREE
, NULL_TREE
);
1085 gsi_insert_after (&bar_gsi
, cond
, GSI_NEW_STMT
);
1087 edge et2
= split_block (barrier_block
, cond
);
1088 et2
->flags
&= ~EDGE_FALLTHRU
;
1089 et2
->flags
|= EDGE_TRUE_VALUE
;
1090 et2
->probability
= profile_probability::unlikely ();
1092 basic_block exit_block
= et2
->dest
;
1094 basic_block copyout_block
= split_edge (et2
);
1095 edge ef2
= make_edge (barrier_block
, exit_block
, EDGE_FALSE_VALUE
);
1096 ef2
->probability
= et2
->probability
.invert ();
1098 gimple_stmt_iterator copyout_gsi
= gsi_start_bb (copyout_block
);
1100 edge copyout_to_exit
= single_succ_edge (copyout_block
);
1102 gimple_seq sender_seq
= NULL
;
1104 /* Make sure we iterate over definitions in a stable order. */
1105 auto_vec
<tree
> escape_vec (def_escapes_block
->elements ());
1106 for (hash_set
<tree
>::iterator it
= def_escapes_block
->begin ();
1107 it
!= def_escapes_block
->end (); ++it
)
1108 escape_vec
.quick_push (*it
);
1109 escape_vec
.qsort (sort_by_ssa_version_or_uid
);
1111 for (unsigned i
= 0; i
< escape_vec
.length (); i
++)
1113 tree var
= escape_vec
[i
];
1115 if (TREE_CODE (var
) == SSA_NAME
&& SSA_NAME_IS_VIRTUAL_OPERAND (var
))
1118 tree barrier_def
= 0;
1120 if (TREE_CODE (var
) == SSA_NAME
)
1122 gimple
*def_stmt
= SSA_NAME_DEF_STMT (var
);
1124 if (gimple_nop_p (def_stmt
))
1127 /* The barrier phi takes one result from the actual work of the
1128 block we're neutering, and the other result is constant zero of
1131 gphi
*barrier_phi
= create_phi_node (NULL_TREE
, barrier_block
);
1132 barrier_def
= create_new_def_for (var
, barrier_phi
,
1133 gimple_phi_result_ptr (barrier_phi
));
1135 add_phi_arg (barrier_phi
, var
, e
, UNKNOWN_LOCATION
);
1136 add_phi_arg (barrier_phi
, build_zero_cst (TREE_TYPE (var
)), ef
,
1139 update_stmt (barrier_phi
);
1142 gcc_assert (TREE_CODE (var
) == VAR_DECL
);
1144 /* If we had no record type, we will have no fields map. */
1145 field_map_t
**fields_p
= record_field_map
->get (record_type
);
1146 field_map_t
*fields
= fields_p
? *fields_p
: NULL
;
1148 if (worker_partitioned_uses
->contains (var
)
1150 && fields
->get (var
))
1152 tree neutered_def
= make_ssa_name (TREE_TYPE (var
));
1154 /* Receive definition from shared memory block. */
1156 tree receiver_ref
= build_receiver_ref (var
, receiver_decl
, fields
);
1157 gassign
*recv
= gimple_build_assign (neutered_def
,
1159 gsi_insert_after (©out_gsi
, recv
, GSI_CONTINUE_LINKING
);
1162 if (TREE_CODE (var
) == VAR_DECL
)
1164 /* If it's a VAR_DECL, we only copied to an SSA temporary. Copy
1165 to the final location now. */
1166 gassign
*asgn
= gimple_build_assign (var
, neutered_def
);
1167 gsi_insert_after (©out_gsi
, asgn
, GSI_CONTINUE_LINKING
);
1172 /* If it's an SSA name, create a new phi at the join node to
1173 represent either the output from the active worker (the
1174 barrier) or the inactive workers (the copyout block). */
1175 gphi
*join_phi
= create_phi_node (NULL_TREE
, exit_block
);
1176 create_new_def_for (barrier_def
, join_phi
,
1177 gimple_phi_result_ptr (join_phi
));
1178 add_phi_arg (join_phi
, barrier_def
, ef2
, UNKNOWN_LOCATION
);
1179 add_phi_arg (join_phi
, neutered_def
, copyout_to_exit
,
1181 update_stmt (join_phi
);
1184 /* Send definition to shared memory block. */
1186 tree sender_ref
= build_sender_ref (var
, sender_decl
, fields
);
1188 if (TREE_CODE (var
) == SSA_NAME
)
1190 gassign
*send
= gimple_build_assign (sender_ref
, var
);
1191 gimple_seq_add_stmt (&sender_seq
, send
);
1194 else if (TREE_CODE (var
) == VAR_DECL
)
1196 tree tmp
= make_ssa_name (TREE_TYPE (var
));
1197 gassign
*send
= gimple_build_assign (tmp
, var
);
1198 gimple_seq_add_stmt (&sender_seq
, send
);
1200 send
= gimple_build_assign (sender_ref
, tmp
);
1201 gimple_seq_add_stmt (&sender_seq
, send
);
1209 /* It's possible for the ET->DEST block (the work done by the active thread)
1210 to finish with a control-flow insn, e.g. a UNIQUE function call. Split
1211 the block and add SENDER_SEQ in the latter part to avoid having control
1212 flow in the middle of a BB. */
1214 decl
= builtin_decl_explicit (BUILT_IN_GOACC_SINGLE_COPY_END
);
1215 call
= gimple_build_call (decl
, 1, build_fold_addr_expr (sender_decl
));
1216 gimple_seq_add_stmt (&sender_seq
, call
);
1218 gsi
= gsi_last_bb (body
);
1219 gimple
*last
= gsi_stmt (gsi
);
1220 basic_block sender_block
= split_block (body
, last
)->dest
;
1221 gsi
= gsi_last_bb (sender_block
);
1222 gsi_insert_seq_after (&gsi
, sender_seq
, GSI_CONTINUE_LINKING
);
1226 neuter_worker_single (parallel_g
*par
, unsigned outer_mask
,
1227 bitmap worker_single
, bitmap vector_single
,
1228 vec
<propagation_set
*> *prop_set
,
1229 hash_set
<tree
> *partitioned_var_uses
,
1230 record_field_map_t
*record_field_map
)
1232 unsigned mask
= outer_mask
| par
->mask
;
1234 if ((mask
& GOMP_DIM_MASK (GOMP_DIM_WORKER
)) == 0)
1238 for (unsigned i
= 0; par
->blocks
.iterate (i
, &block
); i
++)
1240 bool has_defs
= false;
1241 hash_set
<tree
> def_escapes_block
;
1242 hash_set
<tree
> worker_partitioned_uses
;
1246 FOR_EACH_SSA_NAME (j
, var
, cfun
)
1248 if (SSA_NAME_IS_VIRTUAL_OPERAND (var
))
1254 gimple
*def_stmt
= SSA_NAME_DEF_STMT (var
);
1256 if (gimple_nop_p (def_stmt
))
1259 if (gimple_bb (def_stmt
)->index
!= block
->index
)
1263 imm_use_iterator use_iter
;
1264 bool uses_outside_block
= false;
1265 bool worker_partitioned_use
= false;
1267 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, var
)
1269 int blocknum
= gimple_bb (use_stmt
)->index
;
1271 /* Don't propagate SSA names that are only used in the
1272 current block, unless the usage is in a phi node: that
1273 means the name left the block, then came back in at the
1275 if (blocknum
!= block
->index
1276 || gimple_code (use_stmt
) == GIMPLE_PHI
)
1277 uses_outside_block
= true;
1278 if (!bitmap_bit_p (worker_single
, blocknum
))
1279 worker_partitioned_use
= true;
1282 if (uses_outside_block
)
1283 def_escapes_block
.add (var
);
1285 if (worker_partitioned_use
)
1287 worker_partitioned_uses
.add (var
);
1292 propagation_set
*ws_prop
= (*prop_set
)[block
->index
];
1296 for (propagation_set::iterator it
= ws_prop
->begin ();
1297 it
!= ws_prop
->end ();
1301 if (TREE_CODE (var
) == VAR_DECL
)
1303 def_escapes_block
.add (var
);
1304 if (partitioned_var_uses
->contains (var
))
1306 worker_partitioned_uses
.add (var
);
1313 (*prop_set
)[block
->index
] = 0;
1316 tree record_type
= (tree
) block
->aux
;
1319 worker_single_copy (block
, block
, &def_escapes_block
,
1320 &worker_partitioned_uses
, record_type
,
1323 worker_single_simple (block
, block
, &def_escapes_block
);
1327 if ((outer_mask
& GOMP_DIM_MASK (GOMP_DIM_WORKER
)) == 0)
1331 for (unsigned i
= 0; par
->blocks
.iterate (i
, &block
); i
++)
1332 for (gimple_stmt_iterator gsi
= gsi_start_bb (block
);
1336 gimple
*stmt
= gsi_stmt (gsi
);
1338 if (gimple_code (stmt
) == GIMPLE_CALL
1339 && !gimple_call_internal_p (stmt
)
1340 && !omp_sese_active_worker_call (as_a
<gcall
*> (stmt
)))
1342 /* If we have an OpenACC routine call in worker-single mode,
1343 place barriers before and afterwards to prevent
1344 clobbering re-used shared memory regions (as are used
1345 for AMDGCN at present, for example). */
1346 tree decl
= builtin_decl_explicit (BUILT_IN_GOACC_BARRIER
);
1347 gsi_insert_before (&gsi
, gimple_build_call (decl
, 0),
1349 gsi_insert_after (&gsi
, gimple_build_call (decl
, 0),
1356 neuter_worker_single (par
->inner
, mask
, worker_single
, vector_single
,
1357 prop_set
, partitioned_var_uses
, record_field_map
);
1359 neuter_worker_single (par
->next
, outer_mask
, worker_single
, vector_single
,
1360 prop_set
, partitioned_var_uses
, record_field_map
);
1364 execute_omp_oacc_neuter_broadcast ()
1366 bb_stmt_map_t bb_stmt_map
;
1367 auto_bitmap worker_single
, vector_single
;
1369 omp_sese_split_blocks (&bb_stmt_map
);
1373 fprintf (dump_file
, "\n\nAfter splitting:\n\n");
1374 dump_function_to_file (current_function_decl
, dump_file
, dump_flags
);
1379 /* If this is a routine, calculate MASK as if the outer levels are already
1381 tree attr
= oacc_get_fn_attrib (current_function_decl
);
1384 tree dims
= TREE_VALUE (attr
);
1386 for (ix
= 0; ix
!= GOMP_DIM_MAX
; ix
++, dims
= TREE_CHAIN (dims
))
1388 tree allowed
= TREE_PURPOSE (dims
);
1389 if (allowed
&& integer_zerop (allowed
))
1390 mask
|= GOMP_DIM_MASK (ix
);
1394 parallel_g
*par
= omp_sese_discover_pars (&bb_stmt_map
);
1395 populate_single_mode_bitmaps (par
, worker_single
, vector_single
, mask
, 0);
1398 FOR_ALL_BB_FN (bb
, cfun
)
1401 vec
<propagation_set
*> prop_set (vNULL
);
1402 prop_set
.safe_grow_cleared (last_basic_block_for_fn (cfun
), true);
1404 find_ssa_names_to_propagate (par
, mask
, worker_single
, vector_single
,
1407 hash_set
<tree
> partitioned_var_uses
;
1408 hash_set
<tree
> gang_private_vars
;
1410 find_gang_private_vars (&gang_private_vars
);
1411 find_partitioned_var_uses (par
, mask
, &partitioned_var_uses
);
1412 find_local_vars_to_propagate (par
, mask
, &partitioned_var_uses
,
1413 &gang_private_vars
, &prop_set
);
1415 record_field_map_t record_field_map
;
1417 FOR_ALL_BB_FN (bb
, cfun
)
1419 propagation_set
*ws_prop
= prop_set
[bb
->index
];
1422 tree record_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
1423 tree name
= create_tmp_var_name (".oacc_ws_data_s");
1424 name
= build_decl (UNKNOWN_LOCATION
, TYPE_DECL
, name
, record_type
);
1425 DECL_ARTIFICIAL (name
) = 1;
1426 DECL_NAMELESS (name
) = 1;
1427 TYPE_NAME (record_type
) = name
;
1428 TYPE_ARTIFICIAL (record_type
) = 1;
1430 auto_vec
<tree
> field_vec (ws_prop
->elements ());
1431 for (hash_set
<tree
>::iterator it
= ws_prop
->begin ();
1432 it
!= ws_prop
->end (); ++it
)
1433 field_vec
.quick_push (*it
);
1435 field_vec
.qsort (sort_by_size_then_ssa_version_or_uid
);
1437 field_map_t
*fields
= new field_map_t
;
1440 existed
= record_field_map
.put (record_type
, fields
);
1441 gcc_checking_assert (!existed
);
1443 /* Insert var fields in reverse order, so the last inserted element
1444 is the first in the structure. */
1445 for (int i
= field_vec
.length () - 1; i
>= 0; i
--)
1446 install_var_field (field_vec
[i
], record_type
, fields
);
1448 layout_type (record_type
);
1450 bb
->aux
= (tree
) record_type
;
1454 neuter_worker_single (par
, mask
, worker_single
, vector_single
, &prop_set
,
1455 &partitioned_var_uses
, &record_field_map
);
1457 for (auto it
: record_field_map
)
1459 record_field_map
.empty ();
1461 /* These are supposed to have been 'delete'd by 'neuter_worker_single'. */
1462 for (auto it
: prop_set
)
1463 gcc_checking_assert (!it
);
1464 prop_set
.release ();
1468 /* This doesn't seem to make a difference. */
1469 loops_state_clear (LOOP_CLOSED_SSA
);
1471 /* Neutering worker-single neutered blocks will invalidate dominance info.
1472 It may be possible to incrementally update just the affected blocks, but
1473 obliterate everything for now. */
1474 free_dominance_info (CDI_DOMINATORS
);
1475 free_dominance_info (CDI_POST_DOMINATORS
);
1479 fprintf (dump_file
, "\n\nAfter neutering:\n\n");
1480 dump_function_to_file (current_function_decl
, dump_file
, dump_flags
);
1488 const pass_data pass_data_omp_oacc_neuter_broadcast
=
1490 GIMPLE_PASS
, /* type */
1491 "omp_oacc_neuter_broadcast", /* name */
1492 OPTGROUP_OMP
, /* optinfo_flags */
1493 TV_NONE
, /* tv_id */
1494 PROP_cfg
, /* properties_required */
1495 0, /* properties_provided */
1496 0, /* properties_destroyed */
1497 0, /* todo_flags_start */
1498 TODO_update_ssa
| TODO_cleanup_cfg
, /* todo_flags_finish */
1501 class pass_omp_oacc_neuter_broadcast
: public gimple_opt_pass
1504 pass_omp_oacc_neuter_broadcast (gcc::context
*ctxt
)
1505 : gimple_opt_pass (pass_data_omp_oacc_neuter_broadcast
, ctxt
)
1508 /* opt_pass methods: */
1509 virtual bool gate (function
*)
1511 return (flag_openacc
1512 && targetm
.goacc
.create_worker_broadcast_record
);
1515 virtual unsigned int execute (function
*)
1517 return execute_omp_oacc_neuter_broadcast ();
1520 }; // class pass_omp_oacc_neuter_broadcast
1525 make_pass_omp_oacc_neuter_broadcast (gcc::context
*ctxt
)
1527 return new pass_omp_oacc_neuter_broadcast (ctxt
);