1 /* Copyright (C) 2017-2019 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 3, or (at your option) any later
10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 You should have received a copy of the GNU General Public License
16 along with GCC; see the file COPYING3. If not see
17 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
28 #include "tree-pass.h"
29 #include "gimple-iterator.h"
33 #include "stringpool.h"
34 #include "fold-const.h"
37 #include "omp-general.h"
38 #include "internal-fn.h"
40 #include "tree-ssanames.h"
41 #include "tree-ssa-operands.h"
43 #include "tree-phinodes.h"
45 #include "targhooks.h"
46 #include "langhooks-def.h"
51 This pass is intended to make any GCN-specfic transformations to OpenMP
54 At present, its only purpose is to convert some "omp" built-in functions
55 to use closer-to-the-metal "gcn" built-in functions. */
58 execute_omp_gcn (void)
60 tree thr_num_tree
= builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM
);
61 tree thr_num_id
= DECL_NAME (thr_num_tree
);
62 tree team_num_tree
= builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM
);
63 tree team_num_id
= DECL_NAME (team_num_tree
);
65 gimple_stmt_iterator gsi
;
66 unsigned int todo
= 0;
68 FOR_EACH_BB_FN (bb
, cfun
)
69 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
71 gimple
*call
= gsi_stmt (gsi
);
74 if (is_gimple_call (call
) && (decl
= gimple_call_fndecl (call
)))
76 tree decl_id
= DECL_NAME (decl
);
77 tree lhs
= gimple_get_lhs (call
);
79 if (decl_id
== thr_num_id
)
81 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
83 "Replace '%s' with __builtin_gcn_dim_pos.\n",
84 IDENTIFIER_POINTER (decl_id
));
87 lhs = __builtin_omp_get_thread_num ()
89 lhs = __builtin_gcn_dim_pos (1) */
90 tree fn
= targetm
.builtin_decl (GCN_BUILTIN_OMP_DIM_POS
, 0);
91 tree fnarg
= build_int_cst (unsigned_type_node
, 1);
92 gimple
*stmt
= gimple_build_call (fn
, 1, fnarg
);
93 gimple_call_set_lhs (stmt
, lhs
);
94 gsi_replace (&gsi
, stmt
, true);
96 todo
|= TODO_update_ssa
;
98 else if (decl_id
== team_num_id
)
100 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
102 "Replace '%s' with __builtin_gcn_dim_pos.\n",
103 IDENTIFIER_POINTER (decl_id
));
106 lhs = __builtin_omp_get_team_num ()
108 lhs = __builtin_gcn_dim_pos (0) */
109 tree fn
= targetm
.builtin_decl (GCN_BUILTIN_OMP_DIM_POS
, 0);
110 tree fnarg
= build_zero_cst (unsigned_type_node
);
111 gimple
*stmt
= gimple_build_call (fn
, 1, fnarg
);
112 gimple_call_set_lhs (stmt
, lhs
);
113 gsi_replace (&gsi
, stmt
, true);
115 todo
|= TODO_update_ssa
;
126 const pass_data pass_data_omp_gcn
= {
128 "omp_gcn", /* name */
129 OPTGROUP_NONE
, /* optinfo_flags */
131 0, /* properties_required */
132 0, /* properties_provided */
133 0, /* properties_destroyed */
134 0, /* todo_flags_start */
135 TODO_df_finish
, /* todo_flags_finish */
138 class pass_omp_gcn
: public gimple_opt_pass
141 pass_omp_gcn (gcc::context
*ctxt
)
142 : gimple_opt_pass (pass_data_omp_gcn
, ctxt
)
146 /* opt_pass methods: */
147 virtual bool gate (function
*)
152 virtual unsigned int execute (function
*)
154 return execute_omp_gcn ();
157 }; /* class pass_omp_gcn. */
159 } /* anon namespace. */
162 make_pass_omp_gcn (gcc::context
*ctxt
)
164 return new pass_omp_gcn (ctxt
);
168 /* {{{ OpenACC reductions. */
170 /* Global lock variable, needed for 128bit worker & gang reductions. */
172 static GTY(()) tree global_lock_var
;
174 /* Lazily generate the global_lock_var decl and return its address. */
177 gcn_global_lock_addr ()
179 tree v
= global_lock_var
;
183 tree name
= get_identifier ("__reduction_lock");
184 tree type
= build_qualified_type (unsigned_type_node
,
186 v
= build_decl (BUILTINS_LOCATION
, VAR_DECL
, name
, type
);
188 DECL_ARTIFICIAL (v
) = 1;
189 DECL_EXTERNAL (v
) = 1;
193 mark_addressable (v
);
194 mark_decl_referenced (v
);
197 return build_fold_addr_expr (v
);
200 /* Helper function for gcn_reduction_update.
202 Insert code to locklessly update *PTR with *PTR OP VAR just before
203 GSI. We use a lockless scheme for nearly all case, which looks
205 actual = initval (OP);
208 write = guess OP myval;
209 actual = cmp&swap (ptr, guess, write)
210 } while (actual bit-different-to guess);
213 This relies on a cmp&swap instruction, which is available for 32- and
214 64-bit types. Larger types must use a locking scheme. */
217 gcn_lockless_update (location_t loc
, gimple_stmt_iterator
*gsi
,
218 tree ptr
, tree var
, tree_code op
)
220 unsigned fn
= GCN_BUILTIN_CMP_SWAP
;
221 tree_code code
= NOP_EXPR
;
222 tree arg_type
= unsigned_type_node
;
223 tree var_type
= TREE_TYPE (var
);
225 if (TREE_CODE (var_type
) == COMPLEX_TYPE
226 || TREE_CODE (var_type
) == REAL_TYPE
)
227 code
= VIEW_CONVERT_EXPR
;
229 if (TYPE_SIZE (var_type
) == TYPE_SIZE (long_long_unsigned_type_node
))
231 arg_type
= long_long_unsigned_type_node
;
232 fn
= GCN_BUILTIN_CMP_SWAPLL
;
235 tree swap_fn
= gcn_builtin_decl (fn
, true);
237 gimple_seq init_seq
= NULL
;
238 tree init_var
= make_ssa_name (arg_type
);
239 tree init_expr
= omp_reduction_init_op (loc
, op
, var_type
);
240 init_expr
= fold_build1 (code
, arg_type
, init_expr
);
241 gimplify_assign (init_var
, init_expr
, &init_seq
);
242 gimple
*init_end
= gimple_seq_last (init_seq
);
244 gsi_insert_seq_before (gsi
, init_seq
, GSI_SAME_STMT
);
246 /* Split the block just after the init stmts. */
247 basic_block pre_bb
= gsi_bb (*gsi
);
248 edge pre_edge
= split_block (pre_bb
, init_end
);
249 basic_block loop_bb
= pre_edge
->dest
;
250 pre_bb
= pre_edge
->src
;
251 /* Reset the iterator. */
252 *gsi
= gsi_for_stmt (gsi_stmt (*gsi
));
254 tree expect_var
= make_ssa_name (arg_type
);
255 tree actual_var
= make_ssa_name (arg_type
);
256 tree write_var
= make_ssa_name (arg_type
);
258 /* Build and insert the reduction calculation. */
259 gimple_seq red_seq
= NULL
;
260 tree write_expr
= fold_build1 (code
, var_type
, expect_var
);
261 write_expr
= fold_build2 (op
, var_type
, write_expr
, var
);
262 write_expr
= fold_build1 (code
, arg_type
, write_expr
);
263 gimplify_assign (write_var
, write_expr
, &red_seq
);
265 gsi_insert_seq_before (gsi
, red_seq
, GSI_SAME_STMT
);
267 /* Build & insert the cmp&swap sequence. */
268 gimple_seq latch_seq
= NULL
;
269 tree swap_expr
= build_call_expr_loc (loc
, swap_fn
, 3,
270 ptr
, expect_var
, write_var
);
271 gimplify_assign (actual_var
, swap_expr
, &latch_seq
);
273 gcond
*cond
= gimple_build_cond (EQ_EXPR
, actual_var
, expect_var
,
274 NULL_TREE
, NULL_TREE
);
275 gimple_seq_add_stmt (&latch_seq
, cond
);
277 gimple
*latch_end
= gimple_seq_last (latch_seq
);
278 gsi_insert_seq_before (gsi
, latch_seq
, GSI_SAME_STMT
);
280 /* Split the block just after the latch stmts. */
281 edge post_edge
= split_block (loop_bb
, latch_end
);
282 basic_block post_bb
= post_edge
->dest
;
283 loop_bb
= post_edge
->src
;
284 *gsi
= gsi_for_stmt (gsi_stmt (*gsi
));
286 post_edge
->flags
^= EDGE_TRUE_VALUE
| EDGE_FALLTHRU
;
287 /* post_edge->probability = profile_probability::even (); */
288 edge loop_edge
= make_edge (loop_bb
, loop_bb
, EDGE_FALSE_VALUE
);
289 /* loop_edge->probability = profile_probability::even (); */
290 set_immediate_dominator (CDI_DOMINATORS
, loop_bb
, pre_bb
);
291 set_immediate_dominator (CDI_DOMINATORS
, post_bb
, loop_bb
);
293 gphi
*phi
= create_phi_node (expect_var
, loop_bb
);
294 add_phi_arg (phi
, init_var
, pre_edge
, loc
);
295 add_phi_arg (phi
, actual_var
, loop_edge
, loc
);
297 loop
*loop
= alloc_loop ();
298 loop
->header
= loop_bb
;
299 loop
->latch
= loop_bb
;
300 add_loop (loop
, loop_bb
->loop_father
);
302 return fold_build1 (code
, var_type
, write_var
);
305 /* Helper function for gcn_reduction_update.
307 Insert code to lockfully update *PTR with *PTR OP VAR just before
308 GSI. This is necessary for types larger than 64 bits, where there
309 is no cmp&swap instruction to implement a lockless scheme. We use
310 a lock variable in global memory.
312 while (cmp&swap (&lock_var, 0, 1))
315 accum = accum OP var;
317 cmp&swap (&lock_var, 1, 0);
320 A lock in global memory is necessary to force execution engine
321 descheduling and avoid resource starvation that can occur if the
322 lock is in shared memory. */
325 gcn_lockfull_update (location_t loc
, gimple_stmt_iterator
*gsi
,
326 tree ptr
, tree var
, tree_code op
)
328 tree var_type
= TREE_TYPE (var
);
329 tree swap_fn
= gcn_builtin_decl (GCN_BUILTIN_CMP_SWAP
, true);
330 tree uns_unlocked
= build_int_cst (unsigned_type_node
, 0);
331 tree uns_locked
= build_int_cst (unsigned_type_node
, 1);
333 /* Split the block just before the gsi. Insert a gimple nop to make
335 gimple
*nop
= gimple_build_nop ();
336 gsi_insert_before (gsi
, nop
, GSI_SAME_STMT
);
337 basic_block entry_bb
= gsi_bb (*gsi
);
338 edge entry_edge
= split_block (entry_bb
, nop
);
339 basic_block lock_bb
= entry_edge
->dest
;
340 /* Reset the iterator. */
341 *gsi
= gsi_for_stmt (gsi_stmt (*gsi
));
343 /* Build and insert the locking sequence. */
344 gimple_seq lock_seq
= NULL
;
345 tree lock_var
= make_ssa_name (unsigned_type_node
);
346 tree lock_expr
= gcn_global_lock_addr ();
347 lock_expr
= build_call_expr_loc (loc
, swap_fn
, 3, lock_expr
,
348 uns_unlocked
, uns_locked
);
349 gimplify_assign (lock_var
, lock_expr
, &lock_seq
);
350 gcond
*cond
= gimple_build_cond (EQ_EXPR
, lock_var
, uns_unlocked
,
351 NULL_TREE
, NULL_TREE
);
352 gimple_seq_add_stmt (&lock_seq
, cond
);
353 gimple
*lock_end
= gimple_seq_last (lock_seq
);
354 gsi_insert_seq_before (gsi
, lock_seq
, GSI_SAME_STMT
);
356 /* Split the block just after the lock sequence. */
357 edge locked_edge
= split_block (lock_bb
, lock_end
);
358 basic_block update_bb
= locked_edge
->dest
;
359 lock_bb
= locked_edge
->src
;
360 *gsi
= gsi_for_stmt (gsi_stmt (*gsi
));
362 /* Create the lock loop. */
363 locked_edge
->flags
^= EDGE_TRUE_VALUE
| EDGE_FALLTHRU
;
364 locked_edge
->probability
= profile_probability::even ();
365 edge loop_edge
= make_edge (lock_bb
, lock_bb
, EDGE_FALSE_VALUE
);
366 loop_edge
->probability
= profile_probability::even ();
367 set_immediate_dominator (CDI_DOMINATORS
, lock_bb
, entry_bb
);
368 set_immediate_dominator (CDI_DOMINATORS
, update_bb
, lock_bb
);
370 /* Create the loop structure. */
371 loop
*lock_loop
= alloc_loop ();
372 lock_loop
->header
= lock_bb
;
373 lock_loop
->latch
= lock_bb
;
374 lock_loop
->nb_iterations_estimate
= 1;
375 lock_loop
->any_estimate
= true;
376 add_loop (lock_loop
, entry_bb
->loop_father
);
378 /* Build and insert the reduction calculation. */
379 gimple_seq red_seq
= NULL
;
380 tree acc_in
= make_ssa_name (var_type
);
381 tree ref_in
= build_simple_mem_ref (ptr
);
382 TREE_THIS_VOLATILE (ref_in
) = 1;
383 gimplify_assign (acc_in
, ref_in
, &red_seq
);
385 tree acc_out
= make_ssa_name (var_type
);
386 tree update_expr
= fold_build2 (op
, var_type
, ref_in
, var
);
387 gimplify_assign (acc_out
, update_expr
, &red_seq
);
389 tree ref_out
= build_simple_mem_ref (ptr
);
390 TREE_THIS_VOLATILE (ref_out
) = 1;
391 gimplify_assign (ref_out
, acc_out
, &red_seq
);
393 gsi_insert_seq_before (gsi
, red_seq
, GSI_SAME_STMT
);
395 /* Build & insert the unlock sequence. */
396 gimple_seq unlock_seq
= NULL
;
397 tree unlock_expr
= gcn_global_lock_addr ();
398 unlock_expr
= build_call_expr_loc (loc
, swap_fn
, 3, unlock_expr
,
399 uns_locked
, uns_unlocked
);
400 gimplify_and_add (unlock_expr
, &unlock_seq
);
401 gsi_insert_seq_before (gsi
, unlock_seq
, GSI_SAME_STMT
);
406 /* Emit a sequence to update a reduction accumulator at *PTR with the
407 value held in VAR using operator OP. Return the updated value.
409 TODO: optimize for atomic ops and independent complex ops. */
412 gcn_reduction_update (location_t loc
, gimple_stmt_iterator
*gsi
,
413 tree ptr
, tree var
, tree_code op
)
415 tree type
= TREE_TYPE (var
);
416 tree size
= TYPE_SIZE (type
);
418 if (size
== TYPE_SIZE (unsigned_type_node
)
419 || size
== TYPE_SIZE (long_long_unsigned_type_node
))
420 return gcn_lockless_update (loc
, gsi
, ptr
, var
, op
);
422 return gcn_lockfull_update (loc
, gsi
, ptr
, var
, op
);
425 /* Return a temporary variable decl to use for an OpenACC worker reduction. */
428 gcn_goacc_get_worker_red_decl (tree type
, unsigned offset
)
430 machine_function
*machfun
= cfun
->machine
;
433 if (TREE_CODE (type
) == REFERENCE_TYPE
)
434 type
= TREE_TYPE (type
);
437 = build_qualified_type (type
,
439 | ENCODE_QUAL_ADDR_SPACE (ADDR_SPACE_LDS
)));
441 if (machfun
->reduc_decls
442 && offset
< machfun
->reduc_decls
->length ()
443 && (existing_decl
= (*machfun
->reduc_decls
)[offset
]))
445 gcc_assert (TREE_TYPE (existing_decl
) == var_type
);
446 return existing_decl
;
451 sprintf (name
, ".oacc_reduction_%u", offset
);
452 tree decl
= create_tmp_var_raw (var_type
, name
);
454 DECL_CONTEXT (decl
) = NULL_TREE
;
455 TREE_STATIC (decl
) = 1;
457 varpool_node::finalize_decl (decl
);
459 vec_safe_grow_cleared (machfun
->reduc_decls
, offset
+ 1);
460 (*machfun
->reduc_decls
)[offset
] = decl
;
468 /* Expand IFN_GOACC_REDUCTION_SETUP. */
471 gcn_goacc_reduction_setup (gcall
*call
)
473 gimple_stmt_iterator gsi
= gsi_for_stmt (call
);
474 tree lhs
= gimple_call_lhs (call
);
475 tree var
= gimple_call_arg (call
, 2);
476 int level
= TREE_INT_CST_LOW (gimple_call_arg (call
, 3));
477 gimple_seq seq
= NULL
;
479 push_gimplify_context (true);
481 if (level
!= GOMP_DIM_GANG
)
483 /* Copy the receiver object. */
484 tree ref_to_res
= gimple_call_arg (call
, 1);
486 if (!integer_zerop (ref_to_res
))
487 var
= build_simple_mem_ref (ref_to_res
);
490 if (level
== GOMP_DIM_WORKER
)
492 tree var_type
= TREE_TYPE (var
);
493 /* Store incoming value to worker reduction buffer. */
494 tree offset
= gimple_call_arg (call
, 5);
496 = gcn_goacc_get_worker_red_decl (var_type
, TREE_INT_CST_LOW (offset
));
498 gimplify_assign (decl
, var
, &seq
);
502 gimplify_assign (lhs
, var
, &seq
);
504 pop_gimplify_context (NULL
);
505 gsi_replace_with_seq (&gsi
, seq
, true);
508 /* Expand IFN_GOACC_REDUCTION_INIT. */
511 gcn_goacc_reduction_init (gcall
*call
)
513 gimple_stmt_iterator gsi
= gsi_for_stmt (call
);
514 tree lhs
= gimple_call_lhs (call
);
515 tree var
= gimple_call_arg (call
, 2);
516 int level
= TREE_INT_CST_LOW (gimple_call_arg (call
, 3));
518 = (enum tree_code
) TREE_INT_CST_LOW (gimple_call_arg (call
, 4));
519 tree init
= omp_reduction_init_op (gimple_location (call
), rcode
,
521 gimple_seq seq
= NULL
;
523 push_gimplify_context (true);
525 if (level
== GOMP_DIM_GANG
)
527 /* If there's no receiver object, propagate the incoming VAR. */
528 tree ref_to_res
= gimple_call_arg (call
, 1);
529 if (integer_zerop (ref_to_res
))
534 gimplify_assign (lhs
, init
, &seq
);
536 pop_gimplify_context (NULL
);
537 gsi_replace_with_seq (&gsi
, seq
, true);
540 /* Expand IFN_GOACC_REDUCTION_FINI. */
543 gcn_goacc_reduction_fini (gcall
*call
)
545 gimple_stmt_iterator gsi
= gsi_for_stmt (call
);
546 tree lhs
= gimple_call_lhs (call
);
547 tree ref_to_res
= gimple_call_arg (call
, 1);
548 tree var
= gimple_call_arg (call
, 2);
549 int level
= TREE_INT_CST_LOW (gimple_call_arg (call
, 3));
551 = (enum tree_code
) TREE_INT_CST_LOW (gimple_call_arg (call
, 4));
552 gimple_seq seq
= NULL
;
555 push_gimplify_context (true);
557 tree accum
= NULL_TREE
;
559 if (level
== GOMP_DIM_WORKER
)
561 tree var_type
= TREE_TYPE (var
);
562 tree offset
= gimple_call_arg (call
, 5);
564 = gcn_goacc_get_worker_red_decl (var_type
, TREE_INT_CST_LOW (offset
));
566 accum
= build_fold_addr_expr (decl
);
568 else if (integer_zerop (ref_to_res
))
575 /* UPDATE the accumulator. */
576 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
578 r
= gcn_reduction_update (gimple_location (call
), &gsi
, accum
, var
, op
);
582 gimplify_assign (lhs
, r
, &seq
);
583 pop_gimplify_context (NULL
);
585 gsi_replace_with_seq (&gsi
, seq
, true);
588 /* Expand IFN_GOACC_REDUCTION_TEARDOWN. */
591 gcn_goacc_reduction_teardown (gcall
*call
)
593 gimple_stmt_iterator gsi
= gsi_for_stmt (call
);
594 tree lhs
= gimple_call_lhs (call
);
595 tree var
= gimple_call_arg (call
, 2);
596 int level
= TREE_INT_CST_LOW (gimple_call_arg (call
, 3));
597 gimple_seq seq
= NULL
;
599 push_gimplify_context (true);
601 if (level
== GOMP_DIM_WORKER
)
603 tree var_type
= TREE_TYPE (var
);
605 /* Read the worker reduction buffer. */
606 tree offset
= gimple_call_arg (call
, 5);
608 = gcn_goacc_get_worker_red_decl (var_type
, TREE_INT_CST_LOW (offset
));
612 if (level
!= GOMP_DIM_GANG
)
614 /* Write to the receiver object. */
615 tree ref_to_res
= gimple_call_arg (call
, 1);
617 if (!integer_zerop (ref_to_res
))
618 gimplify_assign (build_simple_mem_ref (ref_to_res
), var
, &seq
);
622 gimplify_assign (lhs
, var
, &seq
);
624 pop_gimplify_context (NULL
);
626 gsi_replace_with_seq (&gsi
, seq
, true);
629 /* Implement TARGET_GOACC_REDUCTION.
631 Expand calls to the GOACC REDUCTION internal function, into a sequence of
632 gimple instructions. */
635 gcn_goacc_reduction (gcall
*call
)
637 int level
= TREE_INT_CST_LOW (gimple_call_arg (call
, 3));
639 if (level
== GOMP_DIM_VECTOR
)
641 default_goacc_reduction (call
);
645 unsigned code
= (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call
, 0));
649 case IFN_GOACC_REDUCTION_SETUP
:
650 gcn_goacc_reduction_setup (call
);
653 case IFN_GOACC_REDUCTION_INIT
:
654 gcn_goacc_reduction_init (call
);
657 case IFN_GOACC_REDUCTION_FINI
:
658 gcn_goacc_reduction_fini (call
);
661 case IFN_GOACC_REDUCTION_TEARDOWN
:
662 gcn_goacc_reduction_teardown (call
);
670 /* Implement TARGET_GOACC_ADJUST_PROPAGATION_RECORD.
672 Tweak (worker) propagation record, e.g. to put it in shared memory. */
675 gcn_goacc_adjust_propagation_record (tree record_type
, bool sender
,
678 tree type
= record_type
;
680 TYPE_ADDR_SPACE (type
) = ADDR_SPACE_LDS
;
683 type
= build_pointer_type (type
);
685 tree decl
= create_tmp_var_raw (type
, name
);
689 DECL_CONTEXT (decl
) = NULL_TREE
;
690 TREE_STATIC (decl
) = 1;
694 varpool_node::finalize_decl (decl
);
700 gcn_goacc_adjust_gangprivate_decl (tree var
)
702 tree type
= TREE_TYPE (var
);
703 tree lds_type
= build_qualified_type (type
,
704 TYPE_QUALS_NO_ADDR_SPACE (type
)
705 | ENCODE_QUAL_ADDR_SPACE (ADDR_SPACE_LDS
));
706 machine_function
*machfun
= cfun
->machine
;
708 TREE_TYPE (var
) = lds_type
;
709 TREE_STATIC (var
) = 1;
711 /* We're making VAR static. We have to mangle the name to avoid collisions
712 between different local variables that share the same names. */
713 lhd_set_decl_assembler_name (var
);
715 varpool_node::finalize_decl (var
);
718 machfun
->use_flat_addressing
= true;