RISC-V: Add testcases for unsigned .SAT_ADD vector form 8
[official-gcc.git] / gcc / config / gcn / gcn-tree.cc
blob6a7485a9c5479f4273285f4f82ed7a2584509796
1 /* Copyright (C) 2017-2024 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 3, or (at your option) any later
8 version.
10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 for more details.
15 You should have received a copy of the GNU General Public License
16 along with GCC; see the file COPYING3. If not see
17 <http://www.gnu.org/licenses/>. */
19 /* {{{ Includes. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "tree-pass.h"
29 #include "gimple-iterator.h"
30 #include "cfghooks.h"
31 #include "cfgloop.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "fold-const.h"
35 #include "varasm.h"
36 #include "omp-low.h"
37 #include "omp-general.h"
38 #include "internal-fn.h"
39 #include "tree-vrp.h"
40 #include "tree-ssanames.h"
41 #include "tree-ssa-operands.h"
42 #include "gimplify.h"
43 #include "tree-phinodes.h"
44 #include "cgraph.h"
45 #include "targhooks.h"
46 #include "langhooks-def.h"
48 /* }}} */
49 /* {{{ OpenACC reductions. */
51 /* Global lock variable, needed for 128bit worker & gang reductions. */
53 static GTY(()) tree global_lock_var;
55 /* Lazily generate the global_lock_var decl and return its address. */
57 static tree
58 gcn_global_lock_addr ()
60 tree v = global_lock_var;
62 if (!v)
64 tree name = get_identifier ("__reduction_lock");
65 tree type = build_qualified_type (unsigned_type_node,
66 TYPE_QUAL_VOLATILE);
67 v = build_decl (BUILTINS_LOCATION, VAR_DECL, name, type);
68 global_lock_var = v;
69 DECL_ARTIFICIAL (v) = 1;
70 DECL_EXTERNAL (v) = 1;
71 TREE_STATIC (v) = 1;
72 TREE_PUBLIC (v) = 1;
73 TREE_USED (v) = 1;
74 mark_addressable (v);
75 mark_decl_referenced (v);
78 return build_fold_addr_expr (v);
81 /* Helper function for gcn_reduction_update.
83 Insert code to locklessly update *PTR with *PTR OP VAR just before
84 GSI. We use a lockless scheme for nearly all case, which looks
85 like:
86 actual = initval (OP);
87 do {
88 guess = actual;
89 write = guess OP myval;
90 actual = cmp&swap (ptr, guess, write)
91 } while (actual bit-different-to guess);
92 return write;
94 This relies on a cmp&swap instruction, which is available for 32- and
95 64-bit types. Larger types must use a locking scheme. */
97 static tree
98 gcn_lockless_update (location_t loc, gimple_stmt_iterator *gsi,
99 tree ptr, tree var, tree_code op)
101 unsigned fn = GCN_BUILTIN_CMP_SWAP;
102 tree_code code = NOP_EXPR;
103 tree arg_type = unsigned_type_node;
104 tree var_type = TREE_TYPE (var);
106 if (TREE_CODE (var_type) == COMPLEX_TYPE
107 || SCALAR_FLOAT_TYPE_P (var_type))
108 code = VIEW_CONVERT_EXPR;
110 if (TYPE_SIZE (var_type) == TYPE_SIZE (long_long_unsigned_type_node))
112 arg_type = long_long_unsigned_type_node;
113 fn = GCN_BUILTIN_CMP_SWAPLL;
116 tree swap_fn = gcn_builtin_decl (fn, true);
118 gimple_seq init_seq = NULL;
119 tree init_var = make_ssa_name (arg_type);
120 tree init_expr = omp_reduction_init_op (loc, op, var_type);
121 init_expr = fold_build1 (code, arg_type, init_expr);
122 gimplify_assign (init_var, init_expr, &init_seq);
123 gimple *init_end = gimple_seq_last (init_seq);
125 gsi_insert_seq_before (gsi, init_seq, GSI_SAME_STMT);
127 /* Split the block just after the init stmts. */
128 basic_block pre_bb = gsi_bb (*gsi);
129 edge pre_edge = split_block (pre_bb, init_end);
130 basic_block loop_bb = pre_edge->dest;
131 pre_bb = pre_edge->src;
132 /* Reset the iterator. */
133 *gsi = gsi_for_stmt (gsi_stmt (*gsi));
135 tree expect_var = make_ssa_name (arg_type);
136 tree actual_var = make_ssa_name (arg_type);
137 tree write_var = make_ssa_name (arg_type);
139 /* Build and insert the reduction calculation. */
140 gimple_seq red_seq = NULL;
141 tree write_expr = fold_build1 (code, var_type, expect_var);
142 write_expr = fold_build2 (op, var_type, write_expr, var);
143 write_expr = fold_build1 (code, arg_type, write_expr);
144 gimplify_assign (write_var, write_expr, &red_seq);
146 gsi_insert_seq_before (gsi, red_seq, GSI_SAME_STMT);
148 /* Build & insert the cmp&swap sequence. */
149 gimple_seq latch_seq = NULL;
150 tree swap_expr = build_call_expr_loc (loc, swap_fn, 3,
151 ptr, expect_var, write_var);
152 gimplify_assign (actual_var, swap_expr, &latch_seq);
154 gcond *cond = gimple_build_cond (EQ_EXPR, actual_var, expect_var,
155 NULL_TREE, NULL_TREE);
156 gimple_seq_add_stmt (&latch_seq, cond);
158 gimple *latch_end = gimple_seq_last (latch_seq);
159 gsi_insert_seq_before (gsi, latch_seq, GSI_SAME_STMT);
161 /* Split the block just after the latch stmts. */
162 edge post_edge = split_block (loop_bb, latch_end);
163 basic_block post_bb = post_edge->dest;
164 loop_bb = post_edge->src;
165 *gsi = gsi_for_stmt (gsi_stmt (*gsi));
167 post_edge->flags ^= EDGE_TRUE_VALUE | EDGE_FALLTHRU;
168 /* post_edge->probability = profile_probability::even (); */
169 edge loop_edge = make_edge (loop_bb, loop_bb, EDGE_FALSE_VALUE);
170 /* loop_edge->probability = profile_probability::even (); */
171 set_immediate_dominator (CDI_DOMINATORS, loop_bb, pre_bb);
172 set_immediate_dominator (CDI_DOMINATORS, post_bb, loop_bb);
174 gphi *phi = create_phi_node (expect_var, loop_bb);
175 add_phi_arg (phi, init_var, pre_edge, loc);
176 add_phi_arg (phi, actual_var, loop_edge, loc);
178 loop *loop = alloc_loop ();
179 loop->header = loop_bb;
180 loop->latch = loop_bb;
181 add_loop (loop, loop_bb->loop_father);
183 return fold_build1 (code, var_type, write_var);
186 /* Helper function for gcn_reduction_update.
188 Insert code to lockfully update *PTR with *PTR OP VAR just before
189 GSI. This is necessary for types larger than 64 bits, where there
190 is no cmp&swap instruction to implement a lockless scheme. We use
191 a lock variable in global memory.
193 while (cmp&swap (&lock_var, 0, 1))
194 continue;
195 T accum = *ptr;
196 accum = accum OP var;
197 *ptr = accum;
198 cmp&swap (&lock_var, 1, 0);
199 return accum;
201 A lock in global memory is necessary to force execution engine
202 descheduling and avoid resource starvation that can occur if the
203 lock is in shared memory. */
205 static tree
206 gcn_lockfull_update (location_t loc, gimple_stmt_iterator *gsi,
207 tree ptr, tree var, tree_code op)
209 tree var_type = TREE_TYPE (var);
210 tree swap_fn = gcn_builtin_decl (GCN_BUILTIN_CMP_SWAP, true);
211 tree uns_unlocked = build_int_cst (unsigned_type_node, 0);
212 tree uns_locked = build_int_cst (unsigned_type_node, 1);
214 /* Split the block just before the gsi. Insert a gimple nop to make
215 this easier. */
216 gimple *nop = gimple_build_nop ();
217 gsi_insert_before (gsi, nop, GSI_SAME_STMT);
218 basic_block entry_bb = gsi_bb (*gsi);
219 edge entry_edge = split_block (entry_bb, nop);
220 basic_block lock_bb = entry_edge->dest;
221 /* Reset the iterator. */
222 *gsi = gsi_for_stmt (gsi_stmt (*gsi));
224 /* Build and insert the locking sequence. */
225 gimple_seq lock_seq = NULL;
226 tree lock_var = make_ssa_name (unsigned_type_node);
227 tree lock_expr = gcn_global_lock_addr ();
228 lock_expr = build_call_expr_loc (loc, swap_fn, 3, lock_expr,
229 uns_unlocked, uns_locked);
230 gimplify_assign (lock_var, lock_expr, &lock_seq);
231 gcond *cond = gimple_build_cond (EQ_EXPR, lock_var, uns_unlocked,
232 NULL_TREE, NULL_TREE);
233 gimple_seq_add_stmt (&lock_seq, cond);
234 gimple *lock_end = gimple_seq_last (lock_seq);
235 gsi_insert_seq_before (gsi, lock_seq, GSI_SAME_STMT);
237 /* Split the block just after the lock sequence. */
238 edge locked_edge = split_block (lock_bb, lock_end);
239 basic_block update_bb = locked_edge->dest;
240 lock_bb = locked_edge->src;
241 *gsi = gsi_for_stmt (gsi_stmt (*gsi));
243 /* Create the lock loop. */
244 locked_edge->flags ^= EDGE_TRUE_VALUE | EDGE_FALLTHRU;
245 locked_edge->probability = profile_probability::even ();
246 edge loop_edge = make_edge (lock_bb, lock_bb, EDGE_FALSE_VALUE);
247 loop_edge->probability = profile_probability::even ();
248 set_immediate_dominator (CDI_DOMINATORS, lock_bb, entry_bb);
249 set_immediate_dominator (CDI_DOMINATORS, update_bb, lock_bb);
251 /* Create the loop structure. */
252 loop *lock_loop = alloc_loop ();
253 lock_loop->header = lock_bb;
254 lock_loop->latch = lock_bb;
255 lock_loop->nb_iterations_estimate = 1;
256 lock_loop->any_estimate = true;
257 add_loop (lock_loop, entry_bb->loop_father);
259 /* Build and insert the reduction calculation. */
260 gimple_seq red_seq = NULL;
261 tree acc_in = make_ssa_name (var_type);
262 tree ref_in = build_simple_mem_ref (ptr);
263 TREE_THIS_VOLATILE (ref_in) = 1;
264 gimplify_assign (acc_in, ref_in, &red_seq);
266 tree acc_out = make_ssa_name (var_type);
267 tree update_expr = fold_build2 (op, var_type, ref_in, var);
268 gimplify_assign (acc_out, update_expr, &red_seq);
270 tree ref_out = build_simple_mem_ref (ptr);
271 TREE_THIS_VOLATILE (ref_out) = 1;
272 gimplify_assign (ref_out, acc_out, &red_seq);
274 gsi_insert_seq_before (gsi, red_seq, GSI_SAME_STMT);
276 /* Build & insert the unlock sequence. */
277 gimple_seq unlock_seq = NULL;
278 tree unlock_expr = gcn_global_lock_addr ();
279 unlock_expr = build_call_expr_loc (loc, swap_fn, 3, unlock_expr,
280 uns_locked, uns_unlocked);
281 gimplify_and_add (unlock_expr, &unlock_seq);
282 gsi_insert_seq_before (gsi, unlock_seq, GSI_SAME_STMT);
284 return acc_out;
287 /* Emit a sequence to update a reduction accumulator at *PTR with the
288 value held in VAR using operator OP. Return the updated value.
290 TODO: optimize for atomic ops and independent complex ops. */
292 static tree
293 gcn_reduction_update (location_t loc, gimple_stmt_iterator *gsi,
294 tree ptr, tree var, tree_code op)
296 tree type = TREE_TYPE (var);
297 tree size = TYPE_SIZE (type);
299 if (size == TYPE_SIZE (unsigned_type_node)
300 || size == TYPE_SIZE (long_long_unsigned_type_node))
301 return gcn_lockless_update (loc, gsi, ptr, var, op);
302 else
303 return gcn_lockfull_update (loc, gsi, ptr, var, op);
306 /* Return a temporary variable decl to use for an OpenACC worker reduction. */
308 static tree
309 gcn_goacc_get_worker_red_decl (tree type, unsigned offset)
311 machine_function *machfun = cfun->machine;
313 if (TREE_CODE (type) == REFERENCE_TYPE)
314 type = TREE_TYPE (type);
316 tree var_type
317 = build_qualified_type (type,
318 (TYPE_QUALS (type)
319 | ENCODE_QUAL_ADDR_SPACE (ADDR_SPACE_LDS)));
321 gcc_assert (offset
322 < (machfun->reduction_limit - machfun->reduction_base));
323 tree ptr_type = build_pointer_type (var_type);
324 tree addr = build_int_cst (ptr_type, machfun->reduction_base + offset);
326 return build_simple_mem_ref (addr);
329 /* Expand IFN_GOACC_REDUCTION_SETUP. */
331 static void
332 gcn_goacc_reduction_setup (gcall *call)
334 gimple_stmt_iterator gsi = gsi_for_stmt (call);
335 tree lhs = gimple_call_lhs (call);
336 tree var = gimple_call_arg (call, 2);
337 int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3));
338 gimple_seq seq = NULL;
340 push_gimplify_context (true);
342 if (level != GOMP_DIM_GANG)
344 /* Copy the receiver object. */
345 tree ref_to_res = gimple_call_arg (call, 1);
347 if (!integer_zerop (ref_to_res))
348 var = build_simple_mem_ref (ref_to_res);
351 if (level == GOMP_DIM_WORKER)
353 tree var_type = TREE_TYPE (var);
354 /* Store incoming value to worker reduction buffer. */
355 tree offset = gimple_call_arg (call, 5);
356 tree decl
357 = gcn_goacc_get_worker_red_decl (var_type, TREE_INT_CST_LOW (offset));
359 gimplify_assign (decl, var, &seq);
362 if (lhs)
363 gimplify_assign (lhs, var, &seq);
365 pop_gimplify_context (NULL);
366 gsi_replace_with_seq (&gsi, seq, true);
369 /* Expand IFN_GOACC_REDUCTION_INIT. */
371 static void
372 gcn_goacc_reduction_init (gcall *call)
374 gimple_stmt_iterator gsi = gsi_for_stmt (call);
375 tree lhs = gimple_call_lhs (call);
376 tree var = gimple_call_arg (call, 2);
377 int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3));
378 enum tree_code rcode
379 = (enum tree_code) TREE_INT_CST_LOW (gimple_call_arg (call, 4));
380 tree init = omp_reduction_init_op (gimple_location (call), rcode,
381 TREE_TYPE (var));
382 gimple_seq seq = NULL;
384 push_gimplify_context (true);
386 if (level == GOMP_DIM_GANG)
388 /* If there's no receiver object, propagate the incoming VAR. */
389 tree ref_to_res = gimple_call_arg (call, 1);
390 if (integer_zerop (ref_to_res))
391 init = var;
394 if (lhs)
395 gimplify_assign (lhs, init, &seq);
397 pop_gimplify_context (NULL);
398 gsi_replace_with_seq (&gsi, seq, true);
401 /* Expand IFN_GOACC_REDUCTION_FINI. */
403 static void
404 gcn_goacc_reduction_fini (gcall *call)
406 gimple_stmt_iterator gsi = gsi_for_stmt (call);
407 tree lhs = gimple_call_lhs (call);
408 tree ref_to_res = gimple_call_arg (call, 1);
409 tree var = gimple_call_arg (call, 2);
410 int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3));
411 enum tree_code op
412 = (enum tree_code) TREE_INT_CST_LOW (gimple_call_arg (call, 4));
413 gimple_seq seq = NULL;
414 tree r = NULL_TREE;;
416 push_gimplify_context (true);
418 tree accum = NULL_TREE;
420 if (level == GOMP_DIM_WORKER)
422 tree var_type = TREE_TYPE (var);
423 tree offset = gimple_call_arg (call, 5);
424 tree decl
425 = gcn_goacc_get_worker_red_decl (var_type, TREE_INT_CST_LOW (offset));
427 accum = build_fold_addr_expr (decl);
429 else if (integer_zerop (ref_to_res))
430 r = var;
431 else
432 accum = ref_to_res;
434 if (accum)
436 /* UPDATE the accumulator. */
437 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
438 seq = NULL;
439 r = gcn_reduction_update (gimple_location (call), &gsi, accum, var, op);
442 if (lhs)
443 gimplify_assign (lhs, r, &seq);
444 pop_gimplify_context (NULL);
446 gsi_replace_with_seq (&gsi, seq, true);
449 /* Expand IFN_GOACC_REDUCTION_TEARDOWN. */
451 static void
452 gcn_goacc_reduction_teardown (gcall *call)
454 gimple_stmt_iterator gsi = gsi_for_stmt (call);
455 tree lhs = gimple_call_lhs (call);
456 tree var = gimple_call_arg (call, 2);
457 int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3));
458 gimple_seq seq = NULL;
460 push_gimplify_context (true);
462 if (level == GOMP_DIM_WORKER)
464 tree var_type = TREE_TYPE (var);
466 /* Read the worker reduction buffer. */
467 tree offset = gimple_call_arg (call, 5);
468 tree decl
469 = gcn_goacc_get_worker_red_decl (var_type, TREE_INT_CST_LOW (offset));
470 var = decl;
473 if (level != GOMP_DIM_GANG)
475 /* Write to the receiver object. */
476 tree ref_to_res = gimple_call_arg (call, 1);
478 if (!integer_zerop (ref_to_res))
479 gimplify_assign (build_simple_mem_ref (ref_to_res), var, &seq);
482 if (lhs)
483 gimplify_assign (lhs, unshare_expr (var), &seq);
485 pop_gimplify_context (NULL);
487 gsi_replace_with_seq (&gsi, seq, true);
490 /* Implement TARGET_GOACC_REDUCTION.
492 Expand calls to the GOACC REDUCTION internal function, into a sequence of
493 gimple instructions. */
495 void
496 gcn_goacc_reduction (gcall *call)
498 int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3));
500 if (level == GOMP_DIM_VECTOR)
502 default_goacc_reduction (call);
503 return;
506 unsigned code = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 0));
508 switch (code)
510 case IFN_GOACC_REDUCTION_SETUP:
511 gcn_goacc_reduction_setup (call);
512 break;
514 case IFN_GOACC_REDUCTION_INIT:
515 gcn_goacc_reduction_init (call);
516 break;
518 case IFN_GOACC_REDUCTION_FINI:
519 gcn_goacc_reduction_fini (call);
520 break;
522 case IFN_GOACC_REDUCTION_TEARDOWN:
523 gcn_goacc_reduction_teardown (call);
524 break;
526 default:
527 gcc_unreachable ();
531 tree
532 gcn_goacc_adjust_private_decl (location_t, tree var, int level)
534 if (level != GOMP_DIM_GANG)
535 return var;
537 tree type = TREE_TYPE (var);
538 tree lds_type = build_qualified_type (type,
539 TYPE_QUALS_NO_ADDR_SPACE (type)
540 | ENCODE_QUAL_ADDR_SPACE (ADDR_SPACE_LDS));
541 machine_function *machfun = cfun->machine;
543 TREE_TYPE (var) = lds_type;
544 TREE_STATIC (var) = 1;
546 /* We're making VAR static. We have to mangle the name to avoid collisions
547 between different local variables that share the same names. */
548 lhd_set_decl_assembler_name (var);
550 varpool_node::finalize_decl (var);
552 if (machfun)
553 machfun->use_flat_addressing = true;
555 return var;
558 /* Implement TARGET_GOACC_CREATE_WORKER_BROADCAST_RECORD.
560 Create OpenACC worker state propagation record in shared memory. */
562 tree
563 gcn_goacc_create_worker_broadcast_record (tree record_type, bool sender,
564 const char *name,
565 unsigned HOST_WIDE_INT offset)
567 tree type = build_qualified_type (record_type,
568 TYPE_QUALS_NO_ADDR_SPACE (record_type)
569 | ENCODE_QUAL_ADDR_SPACE (ADDR_SPACE_LDS));
571 if (!sender)
573 tree ptr_type = build_pointer_type (type);
574 return create_tmp_var_raw (ptr_type, name);
577 if (record_type == char_type_node)
578 offset = 1;
580 tree ptr_type = build_pointer_type (type);
581 return build_int_cst (ptr_type, offset);
584 /* }}} */