rs6000: use li;x?oris to build constant
[official-gcc.git] / gcc / tree-parloops.cc
blobe680d97dd0497846aa825a319265cc871495b66e
1 /* Loop autoparallelization.
2 Copyright (C) 2006-2022 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "cfghooks.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "cgraph.h"
32 #include "gimple-pretty-print.h"
33 #include "fold-const.h"
34 #include "gimplify.h"
35 #include "gimple-iterator.h"
36 #include "gimplify-me.h"
37 #include "gimple-walk.h"
38 #include "stor-layout.h"
39 #include "tree-nested.h"
40 #include "tree-cfg.h"
41 #include "tree-ssa-loop-ivopts.h"
42 #include "tree-ssa-loop-manip.h"
43 #include "tree-ssa-loop-niter.h"
44 #include "tree-ssa-loop.h"
45 #include "tree-into-ssa.h"
46 #include "cfgloop.h"
47 #include "tree-scalar-evolution.h"
48 #include "langhooks.h"
49 #include "tree-vectorizer.h"
50 #include "tree-hasher.h"
51 #include "tree-parloops.h"
52 #include "omp-general.h"
53 #include "omp-low.h"
54 #include "tree-ssa.h"
55 #include "tree-ssa-alias.h"
56 #include "tree-eh.h"
57 #include "gomp-constants.h"
58 #include "tree-dfa.h"
59 #include "stringpool.h"
60 #include "attribs.h"
62 /* This pass tries to distribute iterations of loops into several threads.
63 The implementation is straightforward -- for each loop we test whether its
64 iterations are independent, and if it is the case (and some additional
65 conditions regarding profitability and correctness are satisfied), we
66 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
67 machinery do its job.
69 The most of the complexity is in bringing the code into shape expected
70 by the omp expanders:
71 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
72 variable and that the exit test is at the start of the loop body
73 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
74 variables by accesses through pointers, and breaking up ssa chains
75 by storing the values incoming to the parallelized loop to a structure
76 passed to the new function as an argument (something similar is done
77 in omp gimplification, unfortunately only a small part of the code
78 can be shared).
80 TODO:
81 -- if there are several parallelizable loops in a function, it may be
82 possible to generate the threads just once (using synchronization to
83 ensure that cross-loop dependences are obeyed).
84 -- handling of common reduction patterns for outer loops.
86 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
88 Reduction handling:
89 currently we use code inspired by vect_force_simple_reduction to detect
90 reduction patterns.
91 The code transformation will be introduced by an example.
94 parloop
96 int sum=1;
98 for (i = 0; i < N; i++)
100 x[i] = i + 3;
101 sum+=x[i];
105 gimple-like code:
106 header_bb:
108 # sum_29 = PHI <sum_11(5), 1(3)>
109 # i_28 = PHI <i_12(5), 0(3)>
110 D.1795_8 = i_28 + 3;
111 x[i_28] = D.1795_8;
112 sum_11 = D.1795_8 + sum_29;
113 i_12 = i_28 + 1;
114 if (N_6(D) > i_12)
115 goto header_bb;
118 exit_bb:
120 # sum_21 = PHI <sum_11(4)>
121 printf (&"%d"[0], sum_21);
124 after reduction transformation (only relevant parts):
126 parloop
129 ....
132 # Storing the initial value given by the user. #
134 .paral_data_store.32.sum.27 = 1;
136 #pragma omp parallel num_threads(4)
138 #pragma omp for schedule(static)
140 # The neutral element corresponding to the particular
141 reduction's operation, e.g. 0 for PLUS_EXPR,
142 1 for MULT_EXPR, etc. replaces the user's initial value. #
144 # sum.27_29 = PHI <sum.27_11, 0>
146 sum.27_11 = D.1827_8 + sum.27_29;
148 GIMPLE_OMP_CONTINUE
150 # Adding this reduction phi is done at create_phi_for_local_result() #
151 # sum.27_56 = PHI <sum.27_11, 0>
152 GIMPLE_OMP_RETURN
154 # Creating the atomic operation is done at
155 create_call_for_reduction_1() #
157 #pragma omp atomic_load
158 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
159 D.1840_60 = sum.27_56 + D.1839_59;
160 #pragma omp atomic_store (D.1840_60);
162 GIMPLE_OMP_RETURN
164 # collecting the result after the join of the threads is done at
165 create_loads_for_reductions().
166 The value computed by the threads is loaded from the
167 shared struct. #
170 .paral_data_load.33_52 = &.paral_data_store.32;
171 sum_37 = .paral_data_load.33_52->sum.27;
172 sum_43 = D.1795_41 + sum_37;
174 exit bb:
175 # sum_21 = PHI <sum_43, sum_26>
176 printf (&"%d"[0], sum_21);
184 /* Error reporting helper for parloops_is_simple_reduction below. GIMPLE
185 statement STMT is printed with a message MSG. */
187 static void
188 report_ploop_op (dump_flags_t msg_type, gimple *stmt, const char *msg)
190 dump_printf_loc (msg_type, vect_location, "%s%G", msg, stmt);
193 /* DEF_STMT_INFO occurs in a loop that contains a potential reduction
194 operation. Return true if the results of DEF_STMT_INFO are something
195 that can be accumulated by such a reduction. */
197 static bool
198 parloops_valid_reduction_input_p (stmt_vec_info def_stmt_info)
200 return (is_gimple_assign (def_stmt_info->stmt)
201 || is_gimple_call (def_stmt_info->stmt)
202 || STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_induction_def
203 || (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
204 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def
205 && !is_loop_header_bb_p (gimple_bb (def_stmt_info->stmt))));
208 /* Detect SLP reduction of the form:
210 #a1 = phi <a5, a0>
211 a2 = operation (a1)
212 a3 = operation (a2)
213 a4 = operation (a3)
214 a5 = operation (a4)
216 #a = phi <a5>
218 PHI is the reduction phi node (#a1 = phi <a5, a0> above)
219 FIRST_STMT is the first reduction stmt in the chain
220 (a2 = operation (a1)).
222 Return TRUE if a reduction chain was detected. */
224 static bool
225 parloops_is_slp_reduction (loop_vec_info loop_info, gimple *phi,
226 gimple *first_stmt)
228 class loop *loop = (gimple_bb (phi))->loop_father;
229 class loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
230 enum tree_code code;
231 gimple *loop_use_stmt = NULL;
232 stmt_vec_info use_stmt_info;
233 tree lhs;
234 imm_use_iterator imm_iter;
235 use_operand_p use_p;
236 int nloop_uses, size = 0, n_out_of_loop_uses;
237 bool found = false;
239 if (loop != vect_loop)
240 return false;
242 auto_vec<stmt_vec_info, 8> reduc_chain;
243 lhs = PHI_RESULT (phi);
244 code = gimple_assign_rhs_code (first_stmt);
245 while (1)
247 nloop_uses = 0;
248 n_out_of_loop_uses = 0;
249 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
251 gimple *use_stmt = USE_STMT (use_p);
252 if (is_gimple_debug (use_stmt))
253 continue;
255 /* Check if we got back to the reduction phi. */
256 if (use_stmt == phi)
258 loop_use_stmt = use_stmt;
259 found = true;
260 break;
263 if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
265 loop_use_stmt = use_stmt;
266 nloop_uses++;
268 else
269 n_out_of_loop_uses++;
271 /* There are can be either a single use in the loop or two uses in
272 phi nodes. */
273 if (nloop_uses > 1 || (n_out_of_loop_uses && nloop_uses))
274 return false;
277 if (found)
278 break;
280 /* We reached a statement with no loop uses. */
281 if (nloop_uses == 0)
282 return false;
284 /* This is a loop exit phi, and we haven't reached the reduction phi. */
285 if (gimple_code (loop_use_stmt) == GIMPLE_PHI)
286 return false;
288 if (!is_gimple_assign (loop_use_stmt)
289 || code != gimple_assign_rhs_code (loop_use_stmt)
290 || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt)))
291 return false;
293 /* Insert USE_STMT into reduction chain. */
294 use_stmt_info = loop_info->lookup_stmt (loop_use_stmt);
295 reduc_chain.safe_push (use_stmt_info);
297 lhs = gimple_assign_lhs (loop_use_stmt);
298 size++;
301 if (!found || loop_use_stmt != phi || size < 2)
302 return false;
304 /* Swap the operands, if needed, to make the reduction operand be the second
305 operand. */
306 lhs = PHI_RESULT (phi);
307 for (unsigned i = 0; i < reduc_chain.length (); ++i)
309 gassign *next_stmt = as_a <gassign *> (reduc_chain[i]->stmt);
310 if (gimple_assign_rhs2 (next_stmt) == lhs)
312 tree op = gimple_assign_rhs1 (next_stmt);
313 stmt_vec_info def_stmt_info = loop_info->lookup_def (op);
315 /* Check that the other def is either defined in the loop
316 ("vect_internal_def"), or it's an induction (defined by a
317 loop-header phi-node). */
318 if (def_stmt_info
319 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt))
320 && parloops_valid_reduction_input_p (def_stmt_info))
322 lhs = gimple_assign_lhs (next_stmt);
323 continue;
326 return false;
328 else
330 tree op = gimple_assign_rhs2 (next_stmt);
331 stmt_vec_info def_stmt_info = loop_info->lookup_def (op);
333 /* Check that the other def is either defined in the loop
334 ("vect_internal_def"), or it's an induction (defined by a
335 loop-header phi-node). */
336 if (def_stmt_info
337 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt))
338 && parloops_valid_reduction_input_p (def_stmt_info))
340 if (dump_enabled_p ())
341 dump_printf_loc (MSG_NOTE, vect_location,
342 "swapping oprnds: %G", (gimple *) next_stmt);
344 swap_ssa_operands (next_stmt,
345 gimple_assign_rhs1_ptr (next_stmt),
346 gimple_assign_rhs2_ptr (next_stmt));
347 update_stmt (next_stmt);
349 else
350 return false;
353 lhs = gimple_assign_lhs (next_stmt);
356 /* Build up the actual chain. */
357 for (unsigned i = 0; i < reduc_chain.length () - 1; ++i)
359 REDUC_GROUP_FIRST_ELEMENT (reduc_chain[i]) = reduc_chain[0];
360 REDUC_GROUP_NEXT_ELEMENT (reduc_chain[i]) = reduc_chain[i+1];
362 REDUC_GROUP_FIRST_ELEMENT (reduc_chain.last ()) = reduc_chain[0];
363 REDUC_GROUP_NEXT_ELEMENT (reduc_chain.last ()) = NULL;
365 /* Save the chain for further analysis in SLP detection. */
366 LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (reduc_chain[0]);
367 REDUC_GROUP_SIZE (reduc_chain[0]) = size;
369 return true;
372 /* Return true if we need an in-order reduction for operation CODE
373 on type TYPE. NEED_WRAPPING_INTEGRAL_OVERFLOW is true if integer
374 overflow must wrap. */
376 static bool
377 parloops_needs_fold_left_reduction_p (tree type, tree_code code,
378 bool need_wrapping_integral_overflow)
380 /* CHECKME: check for !flag_finite_math_only too? */
381 if (SCALAR_FLOAT_TYPE_P (type))
382 switch (code)
384 case MIN_EXPR:
385 case MAX_EXPR:
386 return false;
388 default:
389 return !flag_associative_math;
392 if (INTEGRAL_TYPE_P (type))
394 if (!operation_no_trapping_overflow (type, code))
395 return true;
396 if (need_wrapping_integral_overflow
397 && !TYPE_OVERFLOW_WRAPS (type)
398 && operation_can_overflow (code))
399 return true;
400 return false;
403 if (SAT_FIXED_POINT_TYPE_P (type))
404 return true;
406 return false;
410 /* Function parloops_is_simple_reduction
412 (1) Detect a cross-iteration def-use cycle that represents a simple
413 reduction computation. We look for the following pattern:
415 loop_header:
416 a1 = phi < a0, a2 >
417 a3 = ...
418 a2 = operation (a3, a1)
422 a3 = ...
423 loop_header:
424 a1 = phi < a0, a2 >
425 a2 = operation (a3, a1)
427 such that:
428 1. operation is commutative and associative and it is safe to
429 change the order of the computation
430 2. no uses for a2 in the loop (a2 is used out of the loop)
431 3. no uses of a1 in the loop besides the reduction operation
432 4. no uses of a1 outside the loop.
434 Conditions 1,4 are tested here.
435 Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
437 (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
438 nested cycles.
440 (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
441 reductions:
443 a1 = phi < a0, a2 >
444 inner loop (def of a3)
445 a2 = phi < a3 >
447 (4) Detect condition expressions, ie:
448 for (int i = 0; i < N; i++)
449 if (a[i] < val)
450 ret_val = a[i];
454 static stmt_vec_info
455 parloops_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
456 bool *double_reduc,
457 bool need_wrapping_integral_overflow,
458 enum vect_reduction_type *v_reduc_type)
460 gphi *phi = as_a <gphi *> (phi_info->stmt);
461 class loop *loop = (gimple_bb (phi))->loop_father;
462 class loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
463 bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop);
464 gimple *phi_use_stmt = NULL;
465 enum tree_code orig_code, code;
466 tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
467 tree type;
468 tree name;
469 imm_use_iterator imm_iter;
470 use_operand_p use_p;
471 bool phi_def;
473 *double_reduc = false;
474 *v_reduc_type = TREE_CODE_REDUCTION;
476 tree phi_name = PHI_RESULT (phi);
477 /* ??? If there are no uses of the PHI result the inner loop reduction
478 won't be detected as possibly double-reduction by vectorizable_reduction
479 because that tries to walk the PHI arg from the preheader edge which
480 can be constant. See PR60382. */
481 if (has_zero_uses (phi_name))
482 return NULL;
483 unsigned nphi_def_loop_uses = 0;
484 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, phi_name)
486 gimple *use_stmt = USE_STMT (use_p);
487 if (is_gimple_debug (use_stmt))
488 continue;
490 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
492 if (dump_enabled_p ())
493 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
494 "intermediate value used outside loop.\n");
496 return NULL;
499 nphi_def_loop_uses++;
500 phi_use_stmt = use_stmt;
503 edge latch_e = loop_latch_edge (loop);
504 tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
505 if (TREE_CODE (loop_arg) != SSA_NAME)
507 if (dump_enabled_p ())
508 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
509 "reduction: not ssa_name: %T\n", loop_arg);
510 return NULL;
513 stmt_vec_info def_stmt_info = loop_info->lookup_def (loop_arg);
514 if (!def_stmt_info
515 || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt)))
516 return NULL;
518 if (gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt))
520 name = gimple_assign_lhs (def_stmt);
521 phi_def = false;
523 else if (gphi *def_stmt = dyn_cast <gphi *> (def_stmt_info->stmt))
525 name = PHI_RESULT (def_stmt);
526 phi_def = true;
528 else
530 if (dump_enabled_p ())
531 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
532 "reduction: unhandled reduction operation: %G",
533 def_stmt_info->stmt);
534 return NULL;
537 unsigned nlatch_def_loop_uses = 0;
538 auto_vec<gphi *, 3> lcphis;
539 bool inner_loop_of_double_reduc = false;
540 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
542 gimple *use_stmt = USE_STMT (use_p);
543 if (is_gimple_debug (use_stmt))
544 continue;
545 if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
546 nlatch_def_loop_uses++;
547 else
549 /* We can have more than one loop-closed PHI. */
550 lcphis.safe_push (as_a <gphi *> (use_stmt));
551 if (nested_in_vect_loop
552 && (STMT_VINFO_DEF_TYPE (loop_info->lookup_stmt (use_stmt))
553 == vect_double_reduction_def))
554 inner_loop_of_double_reduc = true;
558 /* If this isn't a nested cycle or if the nested cycle reduction value
559 is used ouside of the inner loop we cannot handle uses of the reduction
560 value. */
561 if ((!nested_in_vect_loop || inner_loop_of_double_reduc)
562 && (nlatch_def_loop_uses > 1 || nphi_def_loop_uses > 1))
564 if (dump_enabled_p ())
565 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
566 "reduction used in loop.\n");
567 return NULL;
570 /* If DEF_STMT is a phi node itself, we expect it to have a single argument
571 defined in the inner loop. */
572 if (phi_def)
574 gphi *def_stmt = as_a <gphi *> (def_stmt_info->stmt);
575 op1 = PHI_ARG_DEF (def_stmt, 0);
577 if (gimple_phi_num_args (def_stmt) != 1
578 || TREE_CODE (op1) != SSA_NAME)
580 if (dump_enabled_p ())
581 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
582 "unsupported phi node definition.\n");
584 return NULL;
587 gimple *def1 = SSA_NAME_DEF_STMT (op1);
588 if (gimple_bb (def1)
589 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
590 && loop->inner
591 && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
592 && is_gimple_assign (def1)
593 && is_a <gphi *> (phi_use_stmt)
594 && flow_bb_inside_loop_p (loop->inner, gimple_bb (phi_use_stmt)))
596 if (dump_enabled_p ())
597 report_ploop_op (MSG_NOTE, def_stmt,
598 "detected double reduction: ");
600 *double_reduc = true;
601 return def_stmt_info;
604 return NULL;
607 /* If we are vectorizing an inner reduction we are executing that
608 in the original order only in case we are not dealing with a
609 double reduction. */
610 bool check_reduction = true;
611 if (flow_loop_nested_p (vect_loop, loop))
613 gphi *lcphi;
614 unsigned i;
615 check_reduction = false;
616 FOR_EACH_VEC_ELT (lcphis, i, lcphi)
617 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (lcphi))
619 gimple *use_stmt = USE_STMT (use_p);
620 if (is_gimple_debug (use_stmt))
621 continue;
622 if (! flow_bb_inside_loop_p (vect_loop, gimple_bb (use_stmt)))
623 check_reduction = true;
627 gassign *def_stmt = as_a <gassign *> (def_stmt_info->stmt);
628 code = orig_code = gimple_assign_rhs_code (def_stmt);
630 if (nested_in_vect_loop && !check_reduction)
632 /* FIXME: Even for non-reductions code generation is funneled
633 through vectorizable_reduction for the stmt defining the
634 PHI latch value. So we have to artificially restrict ourselves
635 for the supported operations. */
636 switch (get_gimple_rhs_class (code))
638 case GIMPLE_BINARY_RHS:
639 case GIMPLE_TERNARY_RHS:
640 break;
641 default:
642 /* Not supported by vectorizable_reduction. */
643 if (dump_enabled_p ())
644 report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
645 "nested cycle: not handled operation: ");
646 return NULL;
648 if (dump_enabled_p ())
649 report_ploop_op (MSG_NOTE, def_stmt, "detected nested cycle: ");
650 return def_stmt_info;
653 /* We can handle "res -= x[i]", which is non-associative by
654 simply rewriting this into "res += -x[i]". Avoid changing
655 gimple instruction for the first simple tests and only do this
656 if we're allowed to change code at all. */
657 if (code == MINUS_EXPR && gimple_assign_rhs2 (def_stmt) != phi_name)
658 code = PLUS_EXPR;
660 if (code == COND_EXPR)
662 if (! nested_in_vect_loop)
663 *v_reduc_type = COND_REDUCTION;
665 op3 = gimple_assign_rhs1 (def_stmt);
666 if (COMPARISON_CLASS_P (op3))
668 op4 = TREE_OPERAND (op3, 1);
669 op3 = TREE_OPERAND (op3, 0);
671 if (op3 == phi_name || op4 == phi_name)
673 if (dump_enabled_p ())
674 report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
675 "reduction: condition depends on previous"
676 " iteration: ");
677 return NULL;
680 op1 = gimple_assign_rhs2 (def_stmt);
681 op2 = gimple_assign_rhs3 (def_stmt);
683 else if (!commutative_tree_code (code) || !associative_tree_code (code))
685 if (dump_enabled_p ())
686 report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
687 "reduction: not commutative/associative: ");
688 return NULL;
690 else if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
692 op1 = gimple_assign_rhs1 (def_stmt);
693 op2 = gimple_assign_rhs2 (def_stmt);
695 else
697 if (dump_enabled_p ())
698 report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
699 "reduction: not handled operation: ");
700 return NULL;
703 if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
705 if (dump_enabled_p ())
706 report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
707 "reduction: both uses not ssa_names: ");
709 return NULL;
712 type = TREE_TYPE (gimple_assign_lhs (def_stmt));
713 if ((TREE_CODE (op1) == SSA_NAME
714 && !types_compatible_p (type,TREE_TYPE (op1)))
715 || (TREE_CODE (op2) == SSA_NAME
716 && !types_compatible_p (type, TREE_TYPE (op2)))
717 || (op3 && TREE_CODE (op3) == SSA_NAME
718 && !types_compatible_p (type, TREE_TYPE (op3)))
719 || (op4 && TREE_CODE (op4) == SSA_NAME
720 && !types_compatible_p (type, TREE_TYPE (op4))))
722 if (dump_enabled_p ())
724 dump_printf_loc (MSG_NOTE, vect_location,
725 "reduction: multiple types: operation type: "
726 "%T, operands types: %T,%T",
727 type, TREE_TYPE (op1), TREE_TYPE (op2));
728 if (op3)
729 dump_printf (MSG_NOTE, ",%T", TREE_TYPE (op3));
731 if (op4)
732 dump_printf (MSG_NOTE, ",%T", TREE_TYPE (op4));
733 dump_printf (MSG_NOTE, "\n");
736 return NULL;
739 /* Check whether it's ok to change the order of the computation.
740 Generally, when vectorizing a reduction we change the order of the
741 computation. This may change the behavior of the program in some
742 cases, so we need to check that this is ok. One exception is when
743 vectorizing an outer-loop: the inner-loop is executed sequentially,
744 and therefore vectorizing reductions in the inner-loop during
745 outer-loop vectorization is safe. */
746 if (check_reduction
747 && *v_reduc_type == TREE_CODE_REDUCTION
748 && parloops_needs_fold_left_reduction_p (type, code,
749 need_wrapping_integral_overflow))
750 *v_reduc_type = FOLD_LEFT_REDUCTION;
752 /* Reduction is safe. We're dealing with one of the following:
753 1) integer arithmetic and no trapv
754 2) floating point arithmetic, and special flags permit this optimization
755 3) nested cycle (i.e., outer loop vectorization). */
756 stmt_vec_info def1_info = loop_info->lookup_def (op1);
757 stmt_vec_info def2_info = loop_info->lookup_def (op2);
758 if (code != COND_EXPR && !def1_info && !def2_info)
760 if (dump_enabled_p ())
761 report_ploop_op (MSG_NOTE, def_stmt,
762 "reduction: no defs for operands: ");
763 return NULL;
766 /* Check that one def is the reduction def, defined by PHI,
767 the other def is either defined in the loop ("vect_internal_def"),
768 or it's an induction (defined by a loop-header phi-node). */
770 if (def2_info
771 && def2_info->stmt == phi
772 && (code == COND_EXPR
773 || !def1_info
774 || !flow_bb_inside_loop_p (loop, gimple_bb (def1_info->stmt))
775 || parloops_valid_reduction_input_p (def1_info)))
777 if (dump_enabled_p ())
778 report_ploop_op (MSG_NOTE, def_stmt, "detected reduction: ");
779 return def_stmt_info;
782 if (def1_info
783 && def1_info->stmt == phi
784 && (code == COND_EXPR
785 || !def2_info
786 || !flow_bb_inside_loop_p (loop, gimple_bb (def2_info->stmt))
787 || parloops_valid_reduction_input_p (def2_info)))
789 if (! nested_in_vect_loop && orig_code != MINUS_EXPR)
791 /* Check if we can swap operands (just for simplicity - so that
792 the rest of the code can assume that the reduction variable
793 is always the last (second) argument). */
794 if (code == COND_EXPR)
796 /* Swap cond_expr by inverting the condition. */
797 tree cond_expr = gimple_assign_rhs1 (def_stmt);
798 enum tree_code invert_code = ERROR_MARK;
799 enum tree_code cond_code = TREE_CODE (cond_expr);
801 if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
803 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
804 invert_code = invert_tree_comparison (cond_code, honor_nans);
806 if (invert_code != ERROR_MARK)
808 TREE_SET_CODE (cond_expr, invert_code);
809 swap_ssa_operands (def_stmt,
810 gimple_assign_rhs2_ptr (def_stmt),
811 gimple_assign_rhs3_ptr (def_stmt));
813 else
815 if (dump_enabled_p ())
816 report_ploop_op (MSG_NOTE, def_stmt,
817 "detected reduction: cannot swap operands "
818 "for cond_expr");
819 return NULL;
822 else
823 swap_ssa_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
824 gimple_assign_rhs2_ptr (def_stmt));
826 if (dump_enabled_p ())
827 report_ploop_op (MSG_NOTE, def_stmt,
828 "detected reduction: need to swap operands: ");
830 else
832 if (dump_enabled_p ())
833 report_ploop_op (MSG_NOTE, def_stmt, "detected reduction: ");
836 return def_stmt_info;
839 /* Try to find SLP reduction chain. */
840 if (! nested_in_vect_loop
841 && code != COND_EXPR
842 && orig_code != MINUS_EXPR
843 && parloops_is_slp_reduction (loop_info, phi, def_stmt))
845 if (dump_enabled_p ())
846 report_ploop_op (MSG_NOTE, def_stmt,
847 "reduction: detected reduction chain: ");
849 return def_stmt_info;
852 /* Look for the expression computing loop_arg from loop PHI result. */
853 if (check_reduction_path (vect_location, loop, phi, loop_arg, code))
854 return def_stmt_info;
856 if (dump_enabled_p ())
858 report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
859 "reduction: unknown pattern: ");
862 return NULL;
865 /* Wrapper around vect_is_simple_reduction, which will modify code
866 in-place if it enables detection of more reductions. Arguments
867 as there. */
869 stmt_vec_info
870 parloops_force_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
871 bool *double_reduc,
872 bool need_wrapping_integral_overflow)
874 enum vect_reduction_type v_reduc_type;
875 stmt_vec_info def_info
876 = parloops_is_simple_reduction (loop_info, phi_info, double_reduc,
877 need_wrapping_integral_overflow,
878 &v_reduc_type);
879 if (def_info)
881 STMT_VINFO_REDUC_TYPE (phi_info) = v_reduc_type;
882 STMT_VINFO_REDUC_DEF (phi_info) = def_info;
883 STMT_VINFO_REDUC_TYPE (def_info) = v_reduc_type;
884 STMT_VINFO_REDUC_DEF (def_info) = phi_info;
886 return def_info;
889 /* Minimal number of iterations of a loop that should be executed in each
890 thread. */
891 #define MIN_PER_THREAD param_parloops_min_per_thread
893 /* Element of the hashtable, representing a
894 reduction in the current loop. */
895 struct reduction_info
897 gimple *reduc_stmt; /* reduction statement. */
898 gimple *reduc_phi; /* The phi node defining the reduction. */
899 enum tree_code reduction_code;/* code for the reduction operation. */
900 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
901 result. */
902 gphi *keep_res; /* The PHI_RESULT of this phi is the resulting value
903 of the reduction variable when existing the loop. */
904 tree initial_value; /* The initial value of the reduction var before entering the loop. */
905 tree field; /* the name of the field in the parloop data structure intended for reduction. */
906 tree reduc_addr; /* The address of the reduction variable for
907 openacc reductions. */
908 tree init; /* reduction initialization value. */
909 gphi *new_phi; /* (helper field) Newly created phi node whose result
910 will be passed to the atomic operation. Represents
911 the local result each thread computed for the reduction
912 operation. */
915 /* Reduction info hashtable helpers. */
917 struct reduction_hasher : free_ptr_hash <reduction_info>
919 static inline hashval_t hash (const reduction_info *);
920 static inline bool equal (const reduction_info *, const reduction_info *);
923 /* Equality and hash functions for hashtab code. */
925 inline bool
926 reduction_hasher::equal (const reduction_info *a, const reduction_info *b)
928 return (a->reduc_phi == b->reduc_phi);
931 inline hashval_t
932 reduction_hasher::hash (const reduction_info *a)
934 return a->reduc_version;
937 typedef hash_table<reduction_hasher> reduction_info_table_type;
940 static struct reduction_info *
941 reduction_phi (reduction_info_table_type *reduction_list, gimple *phi)
943 struct reduction_info tmpred, *red;
945 if (reduction_list->is_empty () || phi == NULL)
946 return NULL;
948 if (gimple_uid (phi) == (unsigned int)-1
949 || gimple_uid (phi) == 0)
950 return NULL;
952 tmpred.reduc_phi = phi;
953 tmpred.reduc_version = gimple_uid (phi);
954 red = reduction_list->find (&tmpred);
955 gcc_assert (red == NULL || red->reduc_phi == phi);
957 return red;
960 /* Element of hashtable of names to copy. */
962 struct name_to_copy_elt
964 unsigned version; /* The version of the name to copy. */
965 tree new_name; /* The new name used in the copy. */
966 tree field; /* The field of the structure used to pass the
967 value. */
970 /* Name copies hashtable helpers. */
972 struct name_to_copy_hasher : free_ptr_hash <name_to_copy_elt>
974 static inline hashval_t hash (const name_to_copy_elt *);
975 static inline bool equal (const name_to_copy_elt *, const name_to_copy_elt *);
978 /* Equality and hash functions for hashtab code. */
980 inline bool
981 name_to_copy_hasher::equal (const name_to_copy_elt *a, const name_to_copy_elt *b)
983 return a->version == b->version;
986 inline hashval_t
987 name_to_copy_hasher::hash (const name_to_copy_elt *a)
989 return (hashval_t) a->version;
992 typedef hash_table<name_to_copy_hasher> name_to_copy_table_type;
994 /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
995 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
996 represents the denominator for every element in the matrix. */
997 typedef struct lambda_trans_matrix_s
999 lambda_matrix matrix;
1000 int rowsize;
1001 int colsize;
1002 int denominator;
1003 } *lambda_trans_matrix;
1004 #define LTM_MATRIX(T) ((T)->matrix)
1005 #define LTM_ROWSIZE(T) ((T)->rowsize)
1006 #define LTM_COLSIZE(T) ((T)->colsize)
1007 #define LTM_DENOMINATOR(T) ((T)->denominator)
1009 /* Allocate a new transformation matrix. */
1011 static lambda_trans_matrix
1012 lambda_trans_matrix_new (int colsize, int rowsize,
1013 struct obstack * lambda_obstack)
1015 lambda_trans_matrix ret;
1017 ret = (lambda_trans_matrix)
1018 obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
1019 LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
1020 LTM_ROWSIZE (ret) = rowsize;
1021 LTM_COLSIZE (ret) = colsize;
1022 LTM_DENOMINATOR (ret) = 1;
1023 return ret;
1026 /* Multiply a vector VEC by a matrix MAT.
1027 MAT is an M*N matrix, and VEC is a vector with length N. The result
1028 is stored in DEST which must be a vector of length M. */
1030 static void
1031 lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
1032 lambda_vector vec, lambda_vector dest)
1034 int i, j;
1036 lambda_vector_clear (dest, m);
1037 for (i = 0; i < m; i++)
1038 for (j = 0; j < n; j++)
1039 dest[i] += matrix[i][j] * vec[j];
1042 /* Return true if TRANS is a legal transformation matrix that respects
1043 the dependence vectors in DISTS and DIRS. The conservative answer
1044 is false.
1046 "Wolfe proves that a unimodular transformation represented by the
1047 matrix T is legal when applied to a loop nest with a set of
1048 lexicographically non-negative distance vectors RDG if and only if
1049 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
1050 i.e.: if and only if it transforms the lexicographically positive
1051 distance vectors to lexicographically positive vectors. Note that
1052 a unimodular matrix must transform the zero vector (and only it) to
1053 the zero vector." S.Muchnick. */
1055 static bool
1056 lambda_transform_legal_p (lambda_trans_matrix trans,
1057 int nb_loops,
1058 vec<ddr_p> dependence_relations)
1060 unsigned int i, j;
1061 lambda_vector distres;
1062 struct data_dependence_relation *ddr;
1064 gcc_assert (LTM_COLSIZE (trans) == nb_loops
1065 && LTM_ROWSIZE (trans) == nb_loops);
1067 /* When there are no dependences, the transformation is correct. */
1068 if (dependence_relations.length () == 0)
1069 return true;
1071 ddr = dependence_relations[0];
1072 if (ddr == NULL)
1073 return true;
1075 /* When there is an unknown relation in the dependence_relations, we
1076 know that it is no worth looking at this loop nest: give up. */
1077 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1078 return false;
1080 distres = lambda_vector_new (nb_loops);
1082 /* For each distance vector in the dependence graph. */
1083 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
1085 /* Don't care about relations for which we know that there is no
1086 dependence, nor about read-read (aka. output-dependences):
1087 these data accesses can happen in any order. */
1088 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
1089 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
1090 continue;
1092 /* Conservatively answer: "this transformation is not valid". */
1093 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1094 return false;
1096 /* If the dependence could not be captured by a distance vector,
1097 conservatively answer that the transform is not valid. */
1098 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1099 return false;
1101 /* Compute trans.dist_vect */
1102 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
1104 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
1105 DDR_DIST_VECT (ddr, j), distres);
1107 if (!lambda_vector_lexico_pos (distres, nb_loops))
1108 return false;
1111 return true;
1114 /* Data dependency analysis. Returns true if the iterations of LOOP
1115 are independent on each other (that is, if we can execute them
1116 in parallel). */
1118 static bool
1119 loop_parallel_p (class loop *loop, struct obstack * parloop_obstack)
1121 vec<ddr_p> dependence_relations;
1122 vec<data_reference_p> datarefs;
1123 lambda_trans_matrix trans;
1124 bool ret = false;
1126 if (dump_file && (dump_flags & TDF_DETAILS))
1128 fprintf (dump_file, "Considering loop %d\n", loop->num);
1129 if (!loop->inner)
1130 fprintf (dump_file, "loop is innermost\n");
1131 else
1132 fprintf (dump_file, "loop NOT innermost\n");
1135 /* Check for problems with dependences. If the loop can be reversed,
1136 the iterations are independent. */
1137 auto_vec<loop_p, 3> loop_nest;
1138 datarefs.create (10);
1139 dependence_relations.create (100);
1140 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
1141 &dependence_relations))
1143 if (dump_file && (dump_flags & TDF_DETAILS))
1144 fprintf (dump_file, " FAILED: cannot analyze data dependencies\n");
1145 ret = false;
1146 goto end;
1148 if (dump_file && (dump_flags & TDF_DETAILS))
1149 dump_data_dependence_relations (dump_file, dependence_relations);
1151 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
1152 LTM_MATRIX (trans)[0][0] = -1;
1154 if (lambda_transform_legal_p (trans, 1, dependence_relations))
1156 ret = true;
1157 if (dump_file && (dump_flags & TDF_DETAILS))
1158 fprintf (dump_file, " SUCCESS: may be parallelized\n");
1160 else if (dump_file && (dump_flags & TDF_DETAILS))
1161 fprintf (dump_file,
1162 " FAILED: data dependencies exist across iterations\n");
1164 end:
1165 free_dependence_relations (dependence_relations);
1166 free_data_refs (datarefs);
1168 return ret;
1171 /* Return true when LOOP contains basic blocks marked with the
1172 BB_IRREDUCIBLE_LOOP flag. */
1174 static inline bool
1175 loop_has_blocks_with_irreducible_flag (class loop *loop)
1177 unsigned i;
1178 basic_block *bbs = get_loop_body_in_dom_order (loop);
1179 bool res = true;
1181 for (i = 0; i < loop->num_nodes; i++)
1182 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
1183 goto end;
1185 res = false;
1186 end:
1187 free (bbs);
1188 return res;
1191 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
1192 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
1193 to their addresses that can be reused. The address of OBJ is known to
1194 be invariant in the whole function. Other needed statements are placed
1195 right before GSI. */
1197 static tree
1198 take_address_of (tree obj, tree type, edge entry,
1199 int_tree_htab_type *decl_address, gimple_stmt_iterator *gsi)
1201 int uid;
1202 tree *var_p, name, addr;
1203 gassign *stmt;
1204 gimple_seq stmts;
1206 /* Since the address of OBJ is invariant, the trees may be shared.
1207 Avoid rewriting unrelated parts of the code. */
1208 obj = unshare_expr (obj);
1209 for (var_p = &obj;
1210 handled_component_p (*var_p);
1211 var_p = &TREE_OPERAND (*var_p, 0))
1212 continue;
1214 /* Canonicalize the access to base on a MEM_REF. */
1215 if (DECL_P (*var_p))
1216 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
1218 /* Assign a canonical SSA name to the address of the base decl used
1219 in the address and share it for all accesses and addresses based
1220 on it. */
1221 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
1222 int_tree_map elt;
1223 elt.uid = uid;
1224 int_tree_map *slot = decl_address->find_slot (elt, INSERT);
1225 if (!slot->to)
1227 if (gsi == NULL)
1228 return NULL;
1229 addr = TREE_OPERAND (*var_p, 0);
1230 const char *obj_name
1231 = get_name (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
1232 if (obj_name)
1233 name = make_temp_ssa_name (TREE_TYPE (addr), NULL, obj_name);
1234 else
1235 name = make_ssa_name (TREE_TYPE (addr));
1236 stmt = gimple_build_assign (name, addr);
1237 gsi_insert_on_edge_immediate (entry, stmt);
1239 slot->uid = uid;
1240 slot->to = name;
1242 else
1243 name = slot->to;
1245 /* Express the address in terms of the canonical SSA name. */
1246 TREE_OPERAND (*var_p, 0) = name;
1247 if (gsi == NULL)
1248 return build_fold_addr_expr_with_type (obj, type);
1250 name = force_gimple_operand (build_addr (obj),
1251 &stmts, true, NULL_TREE);
1252 if (!gimple_seq_empty_p (stmts))
1253 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1255 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
1257 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
1258 NULL_TREE);
1259 if (!gimple_seq_empty_p (stmts))
1260 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1263 return name;
1266 static tree
1267 reduc_stmt_res (gimple *stmt)
1269 return (gimple_code (stmt) == GIMPLE_PHI
1270 ? gimple_phi_result (stmt)
1271 : gimple_assign_lhs (stmt));
1274 /* Callback for htab_traverse. Create the initialization statement
1275 for reduction described in SLOT, and place it at the preheader of
1276 the loop described in DATA. */
1279 initialize_reductions (reduction_info **slot, class loop *loop)
1281 tree init;
1282 tree type, arg;
1283 edge e;
1285 struct reduction_info *const reduc = *slot;
1287 /* Create initialization in preheader:
1288 reduction_variable = initialization value of reduction. */
1290 /* In the phi node at the header, replace the argument coming
1291 from the preheader with the reduction initialization value. */
1293 /* Initialize the reduction. */
1294 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
1295 init = omp_reduction_init_op (gimple_location (reduc->reduc_stmt),
1296 reduc->reduction_code, type);
1297 reduc->init = init;
1299 /* Replace the argument representing the initialization value
1300 with the initialization value for the reduction (neutral
1301 element for the particular operation, e.g. 0 for PLUS_EXPR,
1302 1 for MULT_EXPR, etc).
1303 Keep the old value in a new variable "reduction_initial",
1304 that will be taken in consideration after the parallel
1305 computing is done. */
1307 e = loop_preheader_edge (loop);
1308 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
1309 /* Create new variable to hold the initial value. */
1311 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
1312 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
1313 reduc->initial_value = arg;
1314 return 1;
1317 struct elv_data
1319 struct walk_stmt_info info;
1320 edge entry;
1321 int_tree_htab_type *decl_address;
1322 gimple_stmt_iterator *gsi;
1323 bool changed;
1324 bool reset;
1327 /* Eliminates references to local variables in *TP out of the single
1328 entry single exit region starting at DTA->ENTRY.
1329 DECL_ADDRESS contains addresses of the references that had their
1330 address taken already. If the expression is changed, CHANGED is
1331 set to true. Callback for walk_tree. */
1333 static tree
1334 eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
1336 struct elv_data *const dta = (struct elv_data *) data;
1337 tree t = *tp, var, addr, addr_type, type, obj;
1339 if (DECL_P (t))
1341 *walk_subtrees = 0;
1343 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
1344 return NULL_TREE;
1346 type = TREE_TYPE (t);
1347 addr_type = build_pointer_type (type);
1348 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
1349 dta->gsi);
1350 if (dta->gsi == NULL && addr == NULL_TREE)
1352 dta->reset = true;
1353 return NULL_TREE;
1356 *tp = build_simple_mem_ref (addr);
1358 dta->changed = true;
1359 return NULL_TREE;
1362 if (TREE_CODE (t) == ADDR_EXPR)
1364 /* ADDR_EXPR may appear in two contexts:
1365 -- as a gimple operand, when the address taken is a function invariant
1366 -- as gimple rhs, when the resulting address in not a function
1367 invariant
1368 We do not need to do anything special in the latter case (the base of
1369 the memory reference whose address is taken may be replaced in the
1370 DECL_P case). The former case is more complicated, as we need to
1371 ensure that the new address is still a gimple operand. Thus, it
1372 is not sufficient to replace just the base of the memory reference --
1373 we need to move the whole computation of the address out of the
1374 loop. */
1375 if (!is_gimple_val (t))
1376 return NULL_TREE;
1378 *walk_subtrees = 0;
1379 obj = TREE_OPERAND (t, 0);
1380 var = get_base_address (obj);
1381 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
1382 return NULL_TREE;
1384 addr_type = TREE_TYPE (t);
1385 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
1386 dta->gsi);
1387 if (dta->gsi == NULL && addr == NULL_TREE)
1389 dta->reset = true;
1390 return NULL_TREE;
1392 *tp = addr;
1394 dta->changed = true;
1395 return NULL_TREE;
1398 if (!EXPR_P (t))
1399 *walk_subtrees = 0;
1401 return NULL_TREE;
1404 /* Moves the references to local variables in STMT at *GSI out of the single
1405 entry single exit region starting at ENTRY. DECL_ADDRESS contains
1406 addresses of the references that had their address taken
1407 already. */
1409 static void
1410 eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
1411 int_tree_htab_type *decl_address)
1413 struct elv_data dta;
1414 gimple *stmt = gsi_stmt (*gsi);
1416 memset (&dta.info, '\0', sizeof (dta.info));
1417 dta.entry = entry;
1418 dta.decl_address = decl_address;
1419 dta.changed = false;
1420 dta.reset = false;
1422 if (gimple_debug_bind_p (stmt))
1424 dta.gsi = NULL;
1425 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
1426 eliminate_local_variables_1, &dta.info, NULL);
1427 if (dta.reset)
1429 gimple_debug_bind_reset_value (stmt);
1430 dta.changed = true;
1433 else if (gimple_clobber_p (stmt))
1435 unlink_stmt_vdef (stmt);
1436 stmt = gimple_build_nop ();
1437 gsi_replace (gsi, stmt, false);
1438 dta.changed = true;
1440 else
1442 dta.gsi = gsi;
1443 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
1446 if (dta.changed)
1447 update_stmt (stmt);
1450 /* Eliminates the references to local variables from the single entry
1451 single exit region between the ENTRY and EXIT edges.
1453 This includes:
1454 1) Taking address of a local variable -- these are moved out of the
1455 region (and temporary variable is created to hold the address if
1456 necessary).
1458 2) Dereferencing a local variable -- these are replaced with indirect
1459 references. */
1461 static void
1462 eliminate_local_variables (edge entry, edge exit)
1464 basic_block bb;
1465 auto_vec<basic_block, 3> body;
1466 unsigned i;
1467 gimple_stmt_iterator gsi;
1468 bool has_debug_stmt = false;
1469 int_tree_htab_type decl_address (10);
1470 basic_block entry_bb = entry->src;
1471 basic_block exit_bb = exit->dest;
1473 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1475 FOR_EACH_VEC_ELT (body, i, bb)
1476 if (bb != entry_bb && bb != exit_bb)
1478 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1479 if (is_gimple_debug (gsi_stmt (gsi)))
1481 if (gimple_debug_bind_p (gsi_stmt (gsi)))
1482 has_debug_stmt = true;
1484 else
1485 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
1488 if (has_debug_stmt)
1489 FOR_EACH_VEC_ELT (body, i, bb)
1490 if (bb != entry_bb && bb != exit_bb)
1491 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1492 if (gimple_debug_bind_p (gsi_stmt (gsi)))
1493 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
1496 /* Returns true if expression EXPR is not defined between ENTRY and
1497 EXIT, i.e. if all its operands are defined outside of the region. */
1499 static bool
1500 expr_invariant_in_region_p (edge entry, edge exit, tree expr)
1502 basic_block entry_bb = entry->src;
1503 basic_block exit_bb = exit->dest;
1504 basic_block def_bb;
1506 if (is_gimple_min_invariant (expr))
1507 return true;
1509 if (TREE_CODE (expr) == SSA_NAME)
1511 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1512 if (def_bb
1513 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
1514 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
1515 return false;
1517 return true;
1520 return false;
1523 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
1524 The copies are stored to NAME_COPIES, if NAME was already duplicated,
1525 its duplicate stored in NAME_COPIES is returned.
1527 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
1528 duplicated, storing the copies in DECL_COPIES. */
1530 static tree
1531 separate_decls_in_region_name (tree name, name_to_copy_table_type *name_copies,
1532 int_tree_htab_type *decl_copies,
1533 bool copy_name_p)
1535 tree copy, var, var_copy;
1536 unsigned idx, uid, nuid;
1537 struct int_tree_map ielt;
1538 struct name_to_copy_elt elt, *nelt;
1539 name_to_copy_elt **slot;
1540 int_tree_map *dslot;
1542 if (TREE_CODE (name) != SSA_NAME)
1543 return name;
1545 idx = SSA_NAME_VERSION (name);
1546 elt.version = idx;
1547 slot = name_copies->find_slot_with_hash (&elt, idx,
1548 copy_name_p ? INSERT : NO_INSERT);
1549 if (slot && *slot)
1550 return (*slot)->new_name;
1552 if (copy_name_p)
1554 copy = duplicate_ssa_name (name, NULL);
1555 nelt = XNEW (struct name_to_copy_elt);
1556 nelt->version = idx;
1557 nelt->new_name = copy;
1558 nelt->field = NULL_TREE;
1559 *slot = nelt;
1561 else
1563 gcc_assert (!slot);
1564 copy = name;
1567 var = SSA_NAME_VAR (name);
1568 if (!var)
1569 return copy;
1571 uid = DECL_UID (var);
1572 ielt.uid = uid;
1573 dslot = decl_copies->find_slot_with_hash (ielt, uid, INSERT);
1574 if (!dslot->to)
1576 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
1577 DECL_NOT_GIMPLE_REG_P (var_copy) = DECL_NOT_GIMPLE_REG_P (var);
1578 dslot->uid = uid;
1579 dslot->to = var_copy;
1581 /* Ensure that when we meet this decl next time, we won't duplicate
1582 it again. */
1583 nuid = DECL_UID (var_copy);
1584 ielt.uid = nuid;
1585 dslot = decl_copies->find_slot_with_hash (ielt, nuid, INSERT);
1586 gcc_assert (!dslot->to);
1587 dslot->uid = nuid;
1588 dslot->to = var_copy;
1590 else
1591 var_copy = dslot->to;
1593 replace_ssa_name_symbol (copy, var_copy);
1594 return copy;
1597 /* Finds the ssa names used in STMT that are defined outside the
1598 region between ENTRY and EXIT and replaces such ssa names with
1599 their duplicates. The duplicates are stored to NAME_COPIES. Base
1600 decls of all ssa names used in STMT (including those defined in
1601 LOOP) are replaced with the new temporary variables; the
1602 replacement decls are stored in DECL_COPIES. */
1604 static void
1605 separate_decls_in_region_stmt (edge entry, edge exit, gimple *stmt,
1606 name_to_copy_table_type *name_copies,
1607 int_tree_htab_type *decl_copies)
1609 use_operand_p use;
1610 def_operand_p def;
1611 ssa_op_iter oi;
1612 tree name, copy;
1613 bool copy_name_p;
1615 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
1617 name = DEF_FROM_PTR (def);
1618 gcc_assert (TREE_CODE (name) == SSA_NAME);
1619 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
1620 false);
1621 gcc_assert (copy == name);
1624 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
1626 name = USE_FROM_PTR (use);
1627 if (TREE_CODE (name) != SSA_NAME)
1628 continue;
1630 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
1631 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
1632 copy_name_p);
1633 SET_USE (use, copy);
1637 /* Finds the ssa names used in STMT that are defined outside the
1638 region between ENTRY and EXIT and replaces such ssa names with
1639 their duplicates. The duplicates are stored to NAME_COPIES. Base
1640 decls of all ssa names used in STMT (including those defined in
1641 LOOP) are replaced with the new temporary variables; the
1642 replacement decls are stored in DECL_COPIES. */
1644 static bool
1645 separate_decls_in_region_debug (gimple *stmt,
1646 name_to_copy_table_type *name_copies,
1647 int_tree_htab_type *decl_copies)
1649 use_operand_p use;
1650 ssa_op_iter oi;
1651 tree var, name;
1652 struct int_tree_map ielt;
1653 struct name_to_copy_elt elt;
1654 name_to_copy_elt **slot;
1655 int_tree_map *dslot;
1657 if (gimple_debug_bind_p (stmt))
1658 var = gimple_debug_bind_get_var (stmt);
1659 else if (gimple_debug_source_bind_p (stmt))
1660 var = gimple_debug_source_bind_get_var (stmt);
1661 else
1662 return true;
1663 if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
1664 return true;
1665 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
1666 ielt.uid = DECL_UID (var);
1667 dslot = decl_copies->find_slot_with_hash (ielt, ielt.uid, NO_INSERT);
1668 if (!dslot)
1669 return true;
1670 if (gimple_debug_bind_p (stmt))
1671 gimple_debug_bind_set_var (stmt, dslot->to);
1672 else if (gimple_debug_source_bind_p (stmt))
1673 gimple_debug_source_bind_set_var (stmt, dslot->to);
1675 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
1677 name = USE_FROM_PTR (use);
1678 if (TREE_CODE (name) != SSA_NAME)
1679 continue;
1681 elt.version = SSA_NAME_VERSION (name);
1682 slot = name_copies->find_slot_with_hash (&elt, elt.version, NO_INSERT);
1683 if (!slot)
1685 gimple_debug_bind_reset_value (stmt);
1686 update_stmt (stmt);
1687 break;
1690 SET_USE (use, (*slot)->new_name);
1693 return false;
1696 /* Callback for htab_traverse. Adds a field corresponding to the reduction
1697 specified in SLOT. The type is passed in DATA. */
1700 add_field_for_reduction (reduction_info **slot, tree type)
1703 struct reduction_info *const red = *slot;
1704 tree var = reduc_stmt_res (red->reduc_stmt);
1705 tree field = build_decl (gimple_location (red->reduc_stmt), FIELD_DECL,
1706 SSA_NAME_IDENTIFIER (var), TREE_TYPE (var));
1708 insert_field_into_struct (type, field);
1710 red->field = field;
1712 return 1;
1715 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
1716 described in SLOT. The type is passed in DATA. */
1719 add_field_for_name (name_to_copy_elt **slot, tree type)
1721 struct name_to_copy_elt *const elt = *slot;
1722 tree name = ssa_name (elt->version);
1723 tree field = build_decl (UNKNOWN_LOCATION,
1724 FIELD_DECL, SSA_NAME_IDENTIFIER (name),
1725 TREE_TYPE (name));
1727 insert_field_into_struct (type, field);
1728 elt->field = field;
1730 return 1;
1733 /* Callback for htab_traverse. A local result is the intermediate result
1734 computed by a single
1735 thread, or the initial value in case no iteration was executed.
1736 This function creates a phi node reflecting these values.
1737 The phi's result will be stored in NEW_PHI field of the
1738 reduction's data structure. */
1741 create_phi_for_local_result (reduction_info **slot, class loop *loop)
1743 struct reduction_info *const reduc = *slot;
1744 edge e;
1745 gphi *new_phi;
1746 basic_block store_bb, continue_bb;
1747 tree local_res;
1748 location_t locus;
1750 /* STORE_BB is the block where the phi
1751 should be stored. It is the destination of the loop exit.
1752 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
1753 continue_bb = single_pred (loop->latch);
1754 store_bb = FALLTHRU_EDGE (continue_bb)->dest;
1756 /* STORE_BB has two predecessors. One coming from the loop
1757 (the reduction's result is computed at the loop),
1758 and another coming from a block preceding the loop,
1759 when no iterations
1760 are executed (the initial value should be taken). */
1761 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (continue_bb))
1762 e = EDGE_PRED (store_bb, 1);
1763 else
1764 e = EDGE_PRED (store_bb, 0);
1765 tree lhs = reduc_stmt_res (reduc->reduc_stmt);
1766 local_res = copy_ssa_name (lhs);
1767 locus = gimple_location (reduc->reduc_stmt);
1768 new_phi = create_phi_node (local_res, store_bb);
1769 add_phi_arg (new_phi, reduc->init, e, locus);
1770 add_phi_arg (new_phi, lhs, FALLTHRU_EDGE (continue_bb), locus);
1771 reduc->new_phi = new_phi;
1773 return 1;
1776 struct clsn_data
1778 tree store;
1779 tree load;
1781 basic_block store_bb;
1782 basic_block load_bb;
1785 /* Callback for htab_traverse. Create an atomic instruction for the
1786 reduction described in SLOT.
1787 DATA annotates the place in memory the atomic operation relates to,
1788 and the basic block it needs to be generated in. */
1791 create_call_for_reduction_1 (reduction_info **slot, struct clsn_data *clsn_data)
1793 struct reduction_info *const reduc = *slot;
1794 gimple_stmt_iterator gsi;
1795 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
1796 tree load_struct;
1797 basic_block bb;
1798 basic_block new_bb;
1799 edge e;
1800 tree t, addr, ref, x;
1801 tree tmp_load, name;
1802 gimple *load;
1804 if (reduc->reduc_addr == NULL_TREE)
1806 load_struct = build_simple_mem_ref (clsn_data->load);
1807 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
1809 addr = build_addr (t);
1811 else
1813 /* Set the address for the atomic store. */
1814 addr = reduc->reduc_addr;
1816 /* Remove the non-atomic store '*addr = sum'. */
1817 tree res = PHI_RESULT (reduc->keep_res);
1818 use_operand_p use_p;
1819 gimple *stmt;
1820 bool single_use_p = single_imm_use (res, &use_p, &stmt);
1821 gcc_assert (single_use_p);
1822 replace_uses_by (gimple_vdef (stmt),
1823 gimple_vuse (stmt));
1824 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1825 gsi_remove (&gsi, true);
1828 /* Create phi node. */
1829 bb = clsn_data->load_bb;
1831 gsi = gsi_last_bb (bb);
1832 e = split_block (bb, gsi_stmt (gsi));
1833 new_bb = e->dest;
1835 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)));
1836 tmp_load = make_ssa_name (tmp_load);
1837 load = gimple_build_omp_atomic_load (tmp_load, addr,
1838 OMP_MEMORY_ORDER_RELAXED);
1839 SSA_NAME_DEF_STMT (tmp_load) = load;
1840 gsi = gsi_start_bb (new_bb);
1841 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
1843 e = split_block (new_bb, load);
1844 new_bb = e->dest;
1845 gsi = gsi_start_bb (new_bb);
1846 ref = tmp_load;
1847 x = fold_build2 (reduc->reduction_code,
1848 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1849 PHI_RESULT (reduc->new_phi));
1851 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1852 GSI_CONTINUE_LINKING);
1854 gimple *store = gimple_build_omp_atomic_store (name,
1855 OMP_MEMORY_ORDER_RELAXED);
1856 gsi_insert_after (&gsi, store, GSI_NEW_STMT);
1857 return 1;
1860 /* Create the atomic operation at the join point of the threads.
1861 REDUCTION_LIST describes the reductions in the LOOP.
1862 LD_ST_DATA describes the shared data structure where
1863 shared data is stored in and loaded from. */
1864 static void
1865 create_call_for_reduction (class loop *loop,
1866 reduction_info_table_type *reduction_list,
1867 struct clsn_data *ld_st_data)
1869 reduction_list->traverse <class loop *, create_phi_for_local_result> (loop);
1870 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
1871 basic_block continue_bb = single_pred (loop->latch);
1872 ld_st_data->load_bb = FALLTHRU_EDGE (continue_bb)->dest;
1873 reduction_list
1874 ->traverse <struct clsn_data *, create_call_for_reduction_1> (ld_st_data);
1877 /* Callback for htab_traverse. Loads the final reduction value at the
1878 join point of all threads, and inserts it in the right place. */
1881 create_loads_for_reductions (reduction_info **slot, struct clsn_data *clsn_data)
1883 struct reduction_info *const red = *slot;
1884 gimple *stmt;
1885 gimple_stmt_iterator gsi;
1886 tree type = TREE_TYPE (reduc_stmt_res (red->reduc_stmt));
1887 tree load_struct;
1888 tree name;
1889 tree x;
1891 /* If there's no exit phi, the result of the reduction is unused. */
1892 if (red->keep_res == NULL)
1893 return 1;
1895 gsi = gsi_after_labels (clsn_data->load_bb);
1896 load_struct = build_simple_mem_ref (clsn_data->load);
1897 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1898 NULL_TREE);
1900 x = load_struct;
1901 name = PHI_RESULT (red->keep_res);
1902 stmt = gimple_build_assign (name, x);
1904 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1906 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1907 !gsi_end_p (gsi); gsi_next (&gsi))
1908 if (gsi_stmt (gsi) == red->keep_res)
1910 remove_phi_node (&gsi, false);
1911 return 1;
1913 gcc_unreachable ();
1916 /* Load the reduction result that was stored in LD_ST_DATA.
1917 REDUCTION_LIST describes the list of reductions that the
1918 loads should be generated for. */
1919 static void
1920 create_final_loads_for_reduction (reduction_info_table_type *reduction_list,
1921 struct clsn_data *ld_st_data)
1923 gimple_stmt_iterator gsi;
1924 tree t;
1925 gimple *stmt;
1927 gsi = gsi_after_labels (ld_st_data->load_bb);
1928 t = build_fold_addr_expr (ld_st_data->store);
1929 stmt = gimple_build_assign (ld_st_data->load, t);
1931 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1933 reduction_list
1934 ->traverse <struct clsn_data *, create_loads_for_reductions> (ld_st_data);
1938 /* Callback for htab_traverse. Store the neutral value for the
1939 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1940 1 for MULT_EXPR, etc. into the reduction field.
1941 The reduction is specified in SLOT. The store information is
1942 passed in DATA. */
1945 create_stores_for_reduction (reduction_info **slot, struct clsn_data *clsn_data)
1947 struct reduction_info *const red = *slot;
1948 tree t;
1949 gimple *stmt;
1950 gimple_stmt_iterator gsi;
1951 tree type = TREE_TYPE (reduc_stmt_res (red->reduc_stmt));
1953 gsi = gsi_last_bb (clsn_data->store_bb);
1954 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1955 stmt = gimple_build_assign (t, red->initial_value);
1956 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1958 return 1;
1961 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1962 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1963 specified in SLOT. */
1966 create_loads_and_stores_for_name (name_to_copy_elt **slot,
1967 struct clsn_data *clsn_data)
1969 struct name_to_copy_elt *const elt = *slot;
1970 tree t;
1971 gimple *stmt;
1972 gimple_stmt_iterator gsi;
1973 tree type = TREE_TYPE (elt->new_name);
1974 tree load_struct;
1976 gsi = gsi_last_bb (clsn_data->store_bb);
1977 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1978 stmt = gimple_build_assign (t, ssa_name (elt->version));
1979 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1981 gsi = gsi_last_bb (clsn_data->load_bb);
1982 load_struct = build_simple_mem_ref (clsn_data->load);
1983 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1984 stmt = gimple_build_assign (elt->new_name, t);
1985 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1987 return 1;
1990 /* Moves all the variables used in LOOP and defined outside of it (including
1991 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1992 name) to a structure created for this purpose. The code
1994 while (1)
1996 use (a);
1997 use (b);
2000 is transformed this way:
2002 bb0:
2003 old.a = a;
2004 old.b = b;
2006 bb1:
2007 a' = new->a;
2008 b' = new->b;
2009 while (1)
2011 use (a');
2012 use (b');
2015 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
2016 pointer `new' is intentionally not initialized (the loop will be split to a
2017 separate function later, and `new' will be initialized from its arguments).
2018 LD_ST_DATA holds information about the shared data structure used to pass
2019 information among the threads. It is initialized here, and
2020 gen_parallel_loop will pass it to create_call_for_reduction that
2021 needs this information. REDUCTION_LIST describes the reductions
2022 in LOOP. */
2024 static void
2025 separate_decls_in_region (edge entry, edge exit,
2026 reduction_info_table_type *reduction_list,
2027 tree *arg_struct, tree *new_arg_struct,
2028 struct clsn_data *ld_st_data)
2031 basic_block bb1 = split_edge (entry);
2032 basic_block bb0 = single_pred (bb1);
2033 name_to_copy_table_type name_copies (10);
2034 int_tree_htab_type decl_copies (10);
2035 unsigned i;
2036 tree type, type_name, nvar;
2037 gimple_stmt_iterator gsi;
2038 struct clsn_data clsn_data;
2039 auto_vec<basic_block, 3> body;
2040 basic_block bb;
2041 basic_block entry_bb = bb1;
2042 basic_block exit_bb = exit->dest;
2043 bool has_debug_stmt = false;
2045 entry = single_succ_edge (entry_bb);
2046 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
2048 FOR_EACH_VEC_ELT (body, i, bb)
2050 if (bb != entry_bb && bb != exit_bb)
2052 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2053 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
2054 &name_copies, &decl_copies);
2056 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2058 gimple *stmt = gsi_stmt (gsi);
2060 if (is_gimple_debug (stmt))
2061 has_debug_stmt = true;
2062 else
2063 separate_decls_in_region_stmt (entry, exit, stmt,
2064 &name_copies, &decl_copies);
2069 /* Now process debug bind stmts. We must not create decls while
2070 processing debug stmts, so we defer their processing so as to
2071 make sure we will have debug info for as many variables as
2072 possible (all of those that were dealt with in the loop above),
2073 and discard those for which we know there's nothing we can
2074 do. */
2075 if (has_debug_stmt)
2076 FOR_EACH_VEC_ELT (body, i, bb)
2077 if (bb != entry_bb && bb != exit_bb)
2079 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
2081 gimple *stmt = gsi_stmt (gsi);
2083 if (is_gimple_debug (stmt))
2085 if (separate_decls_in_region_debug (stmt, &name_copies,
2086 &decl_copies))
2088 gsi_remove (&gsi, true);
2089 continue;
2093 gsi_next (&gsi);
2097 if (name_copies.is_empty () && reduction_list->is_empty ())
2099 /* It may happen that there is nothing to copy (if there are only
2100 loop carried and external variables in the loop). */
2101 *arg_struct = NULL;
2102 *new_arg_struct = NULL;
2104 else
2106 /* Create the type for the structure to store the ssa names to. */
2107 type = lang_hooks.types.make_type (RECORD_TYPE);
2108 type_name = build_decl (UNKNOWN_LOCATION,
2109 TYPE_DECL, create_tmp_var_name (".paral_data"),
2110 type);
2111 TYPE_NAME (type) = type_name;
2113 name_copies.traverse <tree, add_field_for_name> (type);
2114 if (reduction_list && !reduction_list->is_empty ())
2116 /* Create the fields for reductions. */
2117 reduction_list->traverse <tree, add_field_for_reduction> (type);
2119 layout_type (type);
2121 /* Create the loads and stores. */
2122 *arg_struct = create_tmp_var (type, ".paral_data_store");
2123 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
2124 *new_arg_struct = make_ssa_name (nvar);
2126 ld_st_data->store = *arg_struct;
2127 ld_st_data->load = *new_arg_struct;
2128 ld_st_data->store_bb = bb0;
2129 ld_st_data->load_bb = bb1;
2131 name_copies
2132 .traverse <struct clsn_data *, create_loads_and_stores_for_name>
2133 (ld_st_data);
2135 /* Load the calculation from memory (after the join of the threads). */
2137 if (reduction_list && !reduction_list->is_empty ())
2139 reduction_list
2140 ->traverse <struct clsn_data *, create_stores_for_reduction>
2141 (ld_st_data);
2142 clsn_data.load = make_ssa_name (nvar);
2143 clsn_data.load_bb = exit->dest;
2144 clsn_data.store = ld_st_data->store;
2145 create_final_loads_for_reduction (reduction_list, &clsn_data);
2150 /* Returns true if FN was created to run in parallel. */
2152 bool
2153 parallelized_function_p (tree fndecl)
2155 cgraph_node *node = cgraph_node::get (fndecl);
2156 gcc_assert (node != NULL);
2157 return node->parallelized_function;
2160 /* Creates and returns an empty function that will receive the body of
2161 a parallelized loop. */
2163 static tree
2164 create_loop_fn (location_t loc)
2166 char buf[100];
2167 char *tname;
2168 tree decl, type, name, t;
2169 struct function *act_cfun = cfun;
2170 static unsigned loopfn_num;
2172 loc = LOCATION_LOCUS (loc);
2173 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
2174 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
2175 clean_symbol_name (tname);
2176 name = get_identifier (tname);
2177 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
2179 decl = build_decl (loc, FUNCTION_DECL, name, type);
2180 TREE_STATIC (decl) = 1;
2181 TREE_USED (decl) = 1;
2182 DECL_ARTIFICIAL (decl) = 1;
2183 DECL_IGNORED_P (decl) = 0;
2184 TREE_PUBLIC (decl) = 0;
2185 DECL_UNINLINABLE (decl) = 1;
2186 DECL_EXTERNAL (decl) = 0;
2187 DECL_CONTEXT (decl) = NULL_TREE;
2188 DECL_INITIAL (decl) = make_node (BLOCK);
2189 BLOCK_SUPERCONTEXT (DECL_INITIAL (decl)) = decl;
2191 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
2192 DECL_ARTIFICIAL (t) = 1;
2193 DECL_IGNORED_P (t) = 1;
2194 DECL_RESULT (decl) = t;
2196 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
2197 ptr_type_node);
2198 DECL_ARTIFICIAL (t) = 1;
2199 DECL_ARG_TYPE (t) = ptr_type_node;
2200 DECL_CONTEXT (t) = decl;
2201 TREE_USED (t) = 1;
2202 DECL_ARGUMENTS (decl) = t;
2204 allocate_struct_function (decl, false);
2206 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
2207 it. */
2208 set_cfun (act_cfun);
2210 return decl;
2213 /* Replace uses of NAME by VAL in block BB. */
2215 static void
2216 replace_uses_in_bb_by (tree name, tree val, basic_block bb)
2218 gimple *use_stmt;
2219 imm_use_iterator imm_iter;
2221 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, name)
2223 if (gimple_bb (use_stmt) != bb)
2224 continue;
2226 use_operand_p use_p;
2227 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
2228 SET_USE (use_p, val);
2232 /* Do transformation from:
2234 <bb preheader>:
2236 goto <bb header>
2238 <bb header>:
2239 ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
2240 sum_a = PHI <sum_init (preheader), sum_b (latch)>
2242 use (ivtmp_a)
2244 sum_b = sum_a + sum_update
2246 if (ivtmp_a < n)
2247 goto <bb latch>;
2248 else
2249 goto <bb exit>;
2251 <bb latch>:
2252 ivtmp_b = ivtmp_a + 1;
2253 goto <bb header>
2255 <bb exit>:
2256 sum_z = PHI <sum_b (cond[1]), ...>
2258 [1] Where <bb cond> is single_pred (bb latch); In the simplest case,
2259 that's <bb header>.
2263 <bb preheader>:
2265 goto <bb newheader>
2267 <bb header>:
2268 ivtmp_a = PHI <ivtmp_c (latch)>
2269 sum_a = PHI <sum_c (latch)>
2271 use (ivtmp_a)
2273 sum_b = sum_a + sum_update
2275 goto <bb latch>;
2277 <bb newheader>:
2278 ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
2279 sum_c = PHI <sum_init (preheader), sum_b (latch)>
2280 if (ivtmp_c < n + 1)
2281 goto <bb header>;
2282 else
2283 goto <bb newexit>;
2285 <bb latch>:
2286 ivtmp_b = ivtmp_a + 1;
2287 goto <bb newheader>
2289 <bb newexit>:
2290 sum_y = PHI <sum_c (newheader)>
2292 <bb exit>:
2293 sum_z = PHI <sum_y (newexit), ...>
2296 In unified diff format:
2298 <bb preheader>:
2300 - goto <bb header>
2301 + goto <bb newheader>
2303 <bb header>:
2304 - ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
2305 - sum_a = PHI <sum_init (preheader), sum_b (latch)>
2306 + ivtmp_a = PHI <ivtmp_c (latch)>
2307 + sum_a = PHI <sum_c (latch)>
2309 use (ivtmp_a)
2311 sum_b = sum_a + sum_update
2313 - if (ivtmp_a < n)
2314 - goto <bb latch>;
2315 + goto <bb latch>;
2317 + <bb newheader>:
2318 + ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
2319 + sum_c = PHI <sum_init (preheader), sum_b (latch)>
2320 + if (ivtmp_c < n + 1)
2321 + goto <bb header>;
2322 else
2323 goto <bb exit>;
2325 <bb latch>:
2326 ivtmp_b = ivtmp_a + 1;
2327 - goto <bb header>
2328 + goto <bb newheader>
2330 + <bb newexit>:
2331 + sum_y = PHI <sum_c (newheader)>
2333 <bb exit>:
2334 - sum_z = PHI <sum_b (cond[1]), ...>
2335 + sum_z = PHI <sum_y (newexit), ...>
2337 Note: the example does not show any virtual phis, but these are handled more
2338 or less as reductions.
2341 Moves the exit condition of LOOP to the beginning of its header.
2342 REDUCTION_LIST describes the reductions in LOOP. BOUND is the new loop
2343 bound. */
2345 static void
2346 transform_to_exit_first_loop_alt (class loop *loop,
2347 reduction_info_table_type *reduction_list,
2348 tree bound)
2350 basic_block header = loop->header;
2351 basic_block latch = loop->latch;
2352 edge exit = single_dom_exit (loop);
2353 basic_block exit_block = exit->dest;
2354 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
2355 tree control = gimple_cond_lhs (cond_stmt);
2356 edge e;
2358 /* Create the new_header block. */
2359 basic_block new_header = split_block_before_cond_jump (exit->src);
2360 edge edge_at_split = single_pred_edge (new_header);
2362 /* Redirect entry edge to new_header. */
2363 edge entry = loop_preheader_edge (loop);
2364 e = redirect_edge_and_branch (entry, new_header);
2365 gcc_assert (e == entry);
2367 /* Redirect post_inc_edge to new_header. */
2368 edge post_inc_edge = single_succ_edge (latch);
2369 e = redirect_edge_and_branch (post_inc_edge, new_header);
2370 gcc_assert (e == post_inc_edge);
2372 /* Redirect post_cond_edge to header. */
2373 edge post_cond_edge = single_pred_edge (latch);
2374 e = redirect_edge_and_branch (post_cond_edge, header);
2375 gcc_assert (e == post_cond_edge);
2377 /* Redirect edge_at_split to latch. */
2378 e = redirect_edge_and_branch (edge_at_split, latch);
2379 gcc_assert (e == edge_at_split);
2381 /* Set the new loop bound. */
2382 gimple_cond_set_rhs (cond_stmt, bound);
2383 update_stmt (cond_stmt);
2385 /* Repair the ssa. */
2386 vec<edge_var_map> *v = redirect_edge_var_map_vector (post_inc_edge);
2387 edge_var_map *vm;
2388 gphi_iterator gsi;
2389 int i;
2390 for (gsi = gsi_start_phis (header), i = 0;
2391 !gsi_end_p (gsi) && v->iterate (i, &vm);
2392 gsi_next (&gsi), i++)
2394 gphi *phi = gsi.phi ();
2395 tree res_a = PHI_RESULT (phi);
2397 /* Create new phi. */
2398 tree res_c = copy_ssa_name (res_a, phi);
2399 gphi *nphi = create_phi_node (res_c, new_header);
2401 /* Replace ivtmp_a with ivtmp_c in condition 'if (ivtmp_a < n)'. */
2402 replace_uses_in_bb_by (res_a, res_c, new_header);
2404 /* Replace ivtmp/sum_b with ivtmp/sum_c in header phi. */
2405 add_phi_arg (phi, res_c, post_cond_edge, UNKNOWN_LOCATION);
2407 /* Replace sum_b with sum_c in exit phi. */
2408 tree res_b = redirect_edge_var_map_def (vm);
2409 replace_uses_in_bb_by (res_b, res_c, exit_block);
2411 struct reduction_info *red = reduction_phi (reduction_list, phi);
2412 gcc_assert (virtual_operand_p (res_a)
2413 || res_a == control
2414 || red != NULL);
2416 if (red)
2418 /* Register the new reduction phi. */
2419 red->reduc_phi = nphi;
2420 gimple_set_uid (red->reduc_phi, red->reduc_version);
2423 gcc_assert (gsi_end_p (gsi) && !v->iterate (i, &vm));
2425 /* Set the preheader argument of the new phis to ivtmp/sum_init. */
2426 flush_pending_stmts (entry);
2428 /* Set the latch arguments of the new phis to ivtmp/sum_b. */
2429 flush_pending_stmts (post_inc_edge);
2432 basic_block new_exit_block = NULL;
2433 if (!single_pred_p (exit->dest))
2435 /* Create a new empty exit block, inbetween the new loop header and the
2436 old exit block. The function separate_decls_in_region needs this block
2437 to insert code that is active on loop exit, but not any other path. */
2438 new_exit_block = split_edge (exit);
2441 /* Insert and register the reduction exit phis. */
2442 for (gphi_iterator gsi = gsi_start_phis (exit_block);
2443 !gsi_end_p (gsi);
2444 gsi_next (&gsi))
2446 gphi *phi = gsi.phi ();
2447 gphi *nphi = NULL;
2448 tree res_z = PHI_RESULT (phi);
2449 tree res_c;
2451 if (new_exit_block != NULL)
2453 /* Now that we have a new exit block, duplicate the phi of the old
2454 exit block in the new exit block to preserve loop-closed ssa. */
2455 edge succ_new_exit_block = single_succ_edge (new_exit_block);
2456 edge pred_new_exit_block = single_pred_edge (new_exit_block);
2457 tree res_y = copy_ssa_name (res_z, phi);
2458 nphi = create_phi_node (res_y, new_exit_block);
2459 res_c = PHI_ARG_DEF_FROM_EDGE (phi, succ_new_exit_block);
2460 add_phi_arg (nphi, res_c, pred_new_exit_block, UNKNOWN_LOCATION);
2461 add_phi_arg (phi, res_y, succ_new_exit_block, UNKNOWN_LOCATION);
2463 else
2464 res_c = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2466 if (virtual_operand_p (res_z))
2467 continue;
2469 gimple *reduc_phi = SSA_NAME_DEF_STMT (res_c);
2470 struct reduction_info *red = reduction_phi (reduction_list, reduc_phi);
2471 if (red != NULL)
2472 red->keep_res = (nphi != NULL
2473 ? nphi
2474 : phi);
2477 /* We're going to cancel the loop at the end of gen_parallel_loop, but until
2478 then we're still using some fields, so only bother about fields that are
2479 still used: header and latch.
2480 The loop has a new header bb, so we update it. The latch bb stays the
2481 same. */
2482 loop->header = new_header;
2484 /* Recalculate dominance info. */
2485 free_dominance_info (CDI_DOMINATORS);
2486 calculate_dominance_info (CDI_DOMINATORS);
2489 /* Tries to moves the exit condition of LOOP to the beginning of its header
2490 without duplication of the loop body. NIT is the number of iterations of the
2491 loop. REDUCTION_LIST describes the reductions in LOOP. Return true if
2492 transformation is successful. */
2494 static bool
2495 try_transform_to_exit_first_loop_alt (class loop *loop,
2496 reduction_info_table_type *reduction_list,
2497 tree nit)
2499 /* Check whether the latch contains a single statement. */
2500 if (!gimple_seq_nondebug_singleton_p (bb_seq (loop->latch)))
2501 return false;
2503 /* Check whether the latch contains no phis. */
2504 if (phi_nodes (loop->latch) != NULL)
2505 return false;
2507 /* Check whether the latch contains the loop iv increment. */
2508 edge back = single_succ_edge (loop->latch);
2509 edge exit = single_dom_exit (loop);
2510 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
2511 tree control = gimple_cond_lhs (cond_stmt);
2512 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (control));
2513 tree inc_res = gimple_phi_arg_def (phi, back->dest_idx);
2514 if (gimple_bb (SSA_NAME_DEF_STMT (inc_res)) != loop->latch)
2515 return false;
2517 /* Check whether there's no code between the loop condition and the latch. */
2518 if (!single_pred_p (loop->latch)
2519 || single_pred (loop->latch) != exit->src)
2520 return false;
2522 tree alt_bound = NULL_TREE;
2523 tree nit_type = TREE_TYPE (nit);
2525 /* Figure out whether nit + 1 overflows. */
2526 if (TREE_CODE (nit) == INTEGER_CST)
2528 if (!tree_int_cst_equal (nit, TYPE_MAX_VALUE (nit_type)))
2530 alt_bound = fold_build2_loc (UNKNOWN_LOCATION, PLUS_EXPR, nit_type,
2531 nit, build_one_cst (nit_type));
2533 gcc_assert (TREE_CODE (alt_bound) == INTEGER_CST);
2534 transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
2535 return true;
2537 else
2539 /* Todo: Figure out if we can trigger this, if it's worth to handle
2540 optimally, and if we can handle it optimally. */
2541 return false;
2545 gcc_assert (TREE_CODE (nit) == SSA_NAME);
2547 /* Variable nit is the loop bound as returned by canonicalize_loop_ivs, for an
2548 iv with base 0 and step 1 that is incremented in the latch, like this:
2550 <bb header>:
2551 # iv_1 = PHI <0 (preheader), iv_2 (latch)>
2553 if (iv_1 < nit)
2554 goto <bb latch>;
2555 else
2556 goto <bb exit>;
2558 <bb latch>:
2559 iv_2 = iv_1 + 1;
2560 goto <bb header>;
2562 The range of iv_1 is [0, nit]. The latch edge is taken for
2563 iv_1 == [0, nit - 1] and the exit edge is taken for iv_1 == nit. So the
2564 number of latch executions is equal to nit.
2566 The function max_loop_iterations gives us the maximum number of latch
2567 executions, so it gives us the maximum value of nit. */
2568 widest_int nit_max;
2569 if (!max_loop_iterations (loop, &nit_max))
2570 return false;
2572 /* Check if nit + 1 overflows. */
2573 widest_int type_max = wi::to_widest (TYPE_MAX_VALUE (nit_type));
2574 if (nit_max >= type_max)
2575 return false;
2577 gimple *def = SSA_NAME_DEF_STMT (nit);
2579 /* Try to find nit + 1, in the form of n in an assignment nit = n - 1. */
2580 if (def
2581 && is_gimple_assign (def)
2582 && gimple_assign_rhs_code (def) == PLUS_EXPR)
2584 tree op1 = gimple_assign_rhs1 (def);
2585 tree op2 = gimple_assign_rhs2 (def);
2586 if (integer_minus_onep (op1))
2587 alt_bound = op2;
2588 else if (integer_minus_onep (op2))
2589 alt_bound = op1;
2592 /* If not found, insert nit + 1. */
2593 if (alt_bound == NULL_TREE)
2595 alt_bound = fold_build2 (PLUS_EXPR, nit_type, nit,
2596 build_int_cst_type (nit_type, 1));
2598 gimple_stmt_iterator gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
2600 alt_bound
2601 = force_gimple_operand_gsi (&gsi, alt_bound, true, NULL_TREE, false,
2602 GSI_CONTINUE_LINKING);
2605 transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
2606 return true;
2609 /* Moves the exit condition of LOOP to the beginning of its header. NIT is the
2610 number of iterations of the loop. REDUCTION_LIST describes the reductions in
2611 LOOP. */
2613 static void
2614 transform_to_exit_first_loop (class loop *loop,
2615 reduction_info_table_type *reduction_list,
2616 tree nit)
2618 basic_block *bbs, *nbbs, ex_bb, orig_header;
2619 unsigned n;
2620 bool ok;
2621 edge exit = single_dom_exit (loop), hpred;
2622 tree control, control_name, res, t;
2623 gphi *phi, *nphi;
2624 gassign *stmt;
2625 gcond *cond_stmt, *cond_nit;
2626 tree nit_1;
2628 split_block_after_labels (loop->header);
2629 orig_header = single_succ (loop->header);
2630 hpred = single_succ_edge (loop->header);
2632 cond_stmt = as_a <gcond *> (last_stmt (exit->src));
2633 control = gimple_cond_lhs (cond_stmt);
2634 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
2636 /* Make sure that we have phi nodes on exit for all loop header phis
2637 (create_parallel_loop requires that). */
2638 for (gphi_iterator gsi = gsi_start_phis (loop->header);
2639 !gsi_end_p (gsi);
2640 gsi_next (&gsi))
2642 phi = gsi.phi ();
2643 res = PHI_RESULT (phi);
2644 t = copy_ssa_name (res, phi);
2645 SET_PHI_RESULT (phi, t);
2646 nphi = create_phi_node (res, orig_header);
2647 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
2649 if (res == control)
2651 gimple_cond_set_lhs (cond_stmt, t);
2652 update_stmt (cond_stmt);
2653 control = t;
2657 bbs = get_loop_body_in_dom_order (loop);
2659 for (n = 0; bbs[n] != exit->src; n++)
2660 continue;
2661 nbbs = XNEWVEC (basic_block, n);
2662 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
2663 bbs + 1, n, nbbs);
2664 gcc_assert (ok);
2665 free (bbs);
2666 ex_bb = nbbs[0];
2667 free (nbbs);
2669 /* Other than reductions, the only gimple reg that should be copied
2670 out of the loop is the control variable. */
2671 exit = single_dom_exit (loop);
2672 control_name = NULL_TREE;
2673 for (gphi_iterator gsi = gsi_start_phis (ex_bb);
2674 !gsi_end_p (gsi); )
2676 phi = gsi.phi ();
2677 res = PHI_RESULT (phi);
2678 if (virtual_operand_p (res))
2680 gsi_next (&gsi);
2681 continue;
2684 /* Check if it is a part of reduction. If it is,
2685 keep the phi at the reduction's keep_res field. The
2686 PHI_RESULT of this phi is the resulting value of the reduction
2687 variable when exiting the loop. */
2689 if (!reduction_list->is_empty ())
2691 struct reduction_info *red;
2693 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2694 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
2695 if (red)
2697 red->keep_res = phi;
2698 gsi_next (&gsi);
2699 continue;
2702 gcc_assert (control_name == NULL_TREE
2703 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
2704 control_name = res;
2705 remove_phi_node (&gsi, false);
2707 gcc_assert (control_name != NULL_TREE);
2709 /* Initialize the control variable to number of iterations
2710 according to the rhs of the exit condition. */
2711 gimple_stmt_iterator gsi = gsi_after_labels (ex_bb);
2712 cond_nit = as_a <gcond *> (last_stmt (exit->src));
2713 nit_1 = gimple_cond_rhs (cond_nit);
2714 nit_1 = force_gimple_operand_gsi (&gsi,
2715 fold_convert (TREE_TYPE (control_name), nit_1),
2716 false, NULL_TREE, false, GSI_SAME_STMT);
2717 stmt = gimple_build_assign (control_name, nit_1);
2718 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
2721 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
2722 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
2723 NEW_DATA is the variable that should be initialized from the argument
2724 of LOOP_FN. N_THREADS is the requested number of threads, which can be 0 if
2725 that number is to be determined later. */
2727 static void
2728 create_parallel_loop (class loop *loop, tree loop_fn, tree data,
2729 tree new_data, unsigned n_threads, location_t loc,
2730 bool oacc_kernels_p)
2732 gimple_stmt_iterator gsi;
2733 basic_block for_bb, ex_bb, continue_bb;
2734 tree t, param;
2735 gomp_parallel *omp_par_stmt;
2736 gimple *omp_return_stmt1, *omp_return_stmt2;
2737 gimple *phi;
2738 gcond *cond_stmt;
2739 gomp_for *for_stmt;
2740 gomp_continue *omp_cont_stmt;
2741 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
2742 edge exit, nexit, guard, end, e;
2744 if (oacc_kernels_p)
2746 gcc_checking_assert (lookup_attribute ("oacc kernels",
2747 DECL_ATTRIBUTES (cfun->decl)));
2748 /* Indicate to later processing that this is a parallelized OpenACC
2749 kernels construct. */
2750 DECL_ATTRIBUTES (cfun->decl)
2751 = tree_cons (get_identifier ("oacc kernels parallelized"),
2752 NULL_TREE, DECL_ATTRIBUTES (cfun->decl));
2754 else
2756 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
2758 basic_block bb = loop_preheader_edge (loop)->src;
2759 basic_block paral_bb = single_pred (bb);
2760 gsi = gsi_last_bb (paral_bb);
2762 gcc_checking_assert (n_threads != 0);
2763 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
2764 OMP_CLAUSE_NUM_THREADS_EXPR (t)
2765 = build_int_cst (integer_type_node, n_threads);
2766 omp_par_stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
2767 gimple_set_location (omp_par_stmt, loc);
2769 gsi_insert_after (&gsi, omp_par_stmt, GSI_NEW_STMT);
2771 /* Initialize NEW_DATA. */
2772 if (data)
2774 gassign *assign_stmt;
2776 gsi = gsi_after_labels (bb);
2778 param = make_ssa_name (DECL_ARGUMENTS (loop_fn));
2779 assign_stmt = gimple_build_assign (param, build_fold_addr_expr (data));
2780 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
2782 assign_stmt = gimple_build_assign (new_data,
2783 fold_convert (TREE_TYPE (new_data), param));
2784 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
2787 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
2788 bb = split_loop_exit_edge (single_dom_exit (loop));
2789 gsi = gsi_last_bb (bb);
2790 omp_return_stmt1 = gimple_build_omp_return (false);
2791 gimple_set_location (omp_return_stmt1, loc);
2792 gsi_insert_after (&gsi, omp_return_stmt1, GSI_NEW_STMT);
2795 /* Extract data for GIMPLE_OMP_FOR. */
2796 gcc_assert (loop->header == single_dom_exit (loop)->src);
2797 cond_stmt = as_a <gcond *> (last_stmt (loop->header));
2799 cvar = gimple_cond_lhs (cond_stmt);
2800 cvar_base = SSA_NAME_VAR (cvar);
2801 phi = SSA_NAME_DEF_STMT (cvar);
2802 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
2803 initvar = copy_ssa_name (cvar);
2804 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
2805 initvar);
2806 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
2808 gsi = gsi_last_nondebug_bb (loop->latch);
2809 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
2810 gsi_remove (&gsi, true);
2812 /* Prepare cfg. */
2813 for_bb = split_edge (loop_preheader_edge (loop));
2814 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
2815 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
2816 gcc_assert (exit == single_dom_exit (loop));
2818 guard = make_edge (for_bb, ex_bb, 0);
2819 /* FIXME: What is the probability? */
2820 guard->probability = profile_probability::guessed_never ();
2821 /* Split the latch edge, so LOOPS_HAVE_SIMPLE_LATCHES is still valid. */
2822 loop->latch = split_edge (single_succ_edge (loop->latch));
2823 single_pred_edge (loop->latch)->flags = 0;
2824 end = make_single_succ_edge (single_pred (loop->latch), ex_bb, EDGE_FALLTHRU);
2825 rescan_loop_exit (end, true, false);
2827 for (gphi_iterator gpi = gsi_start_phis (ex_bb);
2828 !gsi_end_p (gpi); gsi_next (&gpi))
2830 location_t locus;
2831 gphi *phi = gpi.phi ();
2832 tree def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2833 gimple *def_stmt = SSA_NAME_DEF_STMT (def);
2835 /* If the exit phi is not connected to a header phi in the same loop, this
2836 value is not modified in the loop, and we're done with this phi. */
2837 if (!(gimple_code (def_stmt) == GIMPLE_PHI
2838 && gimple_bb (def_stmt) == loop->header))
2840 locus = gimple_phi_arg_location_from_edge (phi, exit);
2841 add_phi_arg (phi, def, guard, locus);
2842 add_phi_arg (phi, def, end, locus);
2843 continue;
2846 gphi *stmt = as_a <gphi *> (def_stmt);
2847 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
2848 locus = gimple_phi_arg_location_from_edge (stmt,
2849 loop_preheader_edge (loop));
2850 add_phi_arg (phi, def, guard, locus);
2852 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
2853 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
2854 add_phi_arg (phi, def, end, locus);
2856 e = redirect_edge_and_branch (exit, nexit->dest);
2857 PENDING_STMT (e) = NULL;
2859 /* Emit GIMPLE_OMP_FOR. */
2860 if (oacc_kernels_p)
2861 /* Parallelized OpenACC kernels constructs use gang parallelism. See also
2862 omp-offload.cc:execute_oacc_loop_designation. */
2863 t = build_omp_clause (loc, OMP_CLAUSE_GANG);
2864 else
2866 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
2867 int chunk_size = param_parloops_chunk_size;
2868 switch (param_parloops_schedule)
2870 case PARLOOPS_SCHEDULE_STATIC:
2871 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
2872 break;
2873 case PARLOOPS_SCHEDULE_DYNAMIC:
2874 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_DYNAMIC;
2875 break;
2876 case PARLOOPS_SCHEDULE_GUIDED:
2877 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_GUIDED;
2878 break;
2879 case PARLOOPS_SCHEDULE_AUTO:
2880 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_AUTO;
2881 chunk_size = 0;
2882 break;
2883 case PARLOOPS_SCHEDULE_RUNTIME:
2884 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_RUNTIME;
2885 chunk_size = 0;
2886 break;
2887 default:
2888 gcc_unreachable ();
2890 if (chunk_size != 0)
2891 OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t)
2892 = build_int_cst (integer_type_node, chunk_size);
2895 for_stmt = gimple_build_omp_for (NULL,
2896 (oacc_kernels_p
2897 ? GF_OMP_FOR_KIND_OACC_LOOP
2898 : GF_OMP_FOR_KIND_FOR),
2899 t, 1, NULL);
2901 gimple_cond_set_lhs (cond_stmt, cvar_base);
2902 type = TREE_TYPE (cvar);
2903 gimple_set_location (for_stmt, loc);
2904 gimple_omp_for_set_index (for_stmt, 0, initvar);
2905 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
2906 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
2907 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
2908 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
2909 cvar_base,
2910 build_int_cst (type, 1)));
2912 gsi = gsi_last_bb (for_bb);
2913 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
2914 SSA_NAME_DEF_STMT (initvar) = for_stmt;
2916 /* Emit GIMPLE_OMP_CONTINUE. */
2917 continue_bb = single_pred (loop->latch);
2918 gsi = gsi_last_bb (continue_bb);
2919 omp_cont_stmt = gimple_build_omp_continue (cvar_next, cvar);
2920 gimple_set_location (omp_cont_stmt, loc);
2921 gsi_insert_after (&gsi, omp_cont_stmt, GSI_NEW_STMT);
2922 SSA_NAME_DEF_STMT (cvar_next) = omp_cont_stmt;
2924 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
2925 gsi = gsi_last_bb (ex_bb);
2926 omp_return_stmt2 = gimple_build_omp_return (true);
2927 gimple_set_location (omp_return_stmt2, loc);
2928 gsi_insert_after (&gsi, omp_return_stmt2, GSI_NEW_STMT);
2930 /* After the above dom info is hosed. Re-compute it. */
2931 free_dominance_info (CDI_DOMINATORS);
2932 calculate_dominance_info (CDI_DOMINATORS);
2935 /* Return number of phis in bb. If COUNT_VIRTUAL_P is false, don't count the
2936 virtual phi. */
2938 static unsigned int
2939 num_phis (basic_block bb, bool count_virtual_p)
2941 unsigned int nr_phis = 0;
2942 gphi_iterator gsi;
2943 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2945 if (!count_virtual_p && virtual_operand_p (PHI_RESULT (gsi.phi ())))
2946 continue;
2948 nr_phis++;
2951 return nr_phis;
2954 /* Generates code to execute the iterations of LOOP in N_THREADS
2955 threads in parallel, which can be 0 if that number is to be determined
2956 later.
2958 NITER describes number of iterations of LOOP.
2959 REDUCTION_LIST describes the reductions existent in the LOOP. */
2961 static void
2962 gen_parallel_loop (class loop *loop,
2963 reduction_info_table_type *reduction_list,
2964 unsigned n_threads, class tree_niter_desc *niter,
2965 bool oacc_kernels_p)
2967 tree many_iterations_cond, type, nit;
2968 tree arg_struct, new_arg_struct;
2969 gimple_seq stmts;
2970 edge entry, exit;
2971 struct clsn_data clsn_data;
2972 location_t loc;
2973 gimple *cond_stmt;
2974 unsigned int m_p_thread=2;
2976 /* From
2978 ---------------------------------------------------------------------
2979 loop
2981 IV = phi (INIT, IV + STEP)
2982 BODY1;
2983 if (COND)
2984 break;
2985 BODY2;
2987 ---------------------------------------------------------------------
2989 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
2990 we generate the following code:
2992 ---------------------------------------------------------------------
2994 if (MAY_BE_ZERO
2995 || NITER < MIN_PER_THREAD * N_THREADS)
2996 goto original;
2998 BODY1;
2999 store all local loop-invariant variables used in body of the loop to DATA.
3000 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
3001 load the variables from DATA.
3002 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
3003 BODY2;
3004 BODY1;
3005 GIMPLE_OMP_CONTINUE;
3006 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
3007 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
3008 goto end;
3010 original:
3011 loop
3013 IV = phi (INIT, IV + STEP)
3014 BODY1;
3015 if (COND)
3016 break;
3017 BODY2;
3020 end:
3024 /* Create two versions of the loop -- in the old one, we know that the
3025 number of iterations is large enough, and we will transform it into the
3026 loop that will be split to loop_fn, the new one will be used for the
3027 remaining iterations. */
3029 /* We should compute a better number-of-iterations value for outer loops.
3030 That is, if we have
3032 for (i = 0; i < n; ++i)
3033 for (j = 0; j < m; ++j)
3036 we should compute nit = n * m, not nit = n.
3037 Also may_be_zero handling would need to be adjusted. */
3039 type = TREE_TYPE (niter->niter);
3040 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
3041 NULL_TREE);
3042 if (stmts)
3043 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
3045 if (!oacc_kernels_p)
3047 if (loop->inner)
3048 m_p_thread=2;
3049 else
3050 m_p_thread=MIN_PER_THREAD;
3052 gcc_checking_assert (n_threads != 0);
3053 many_iterations_cond =
3054 fold_build2 (GE_EXPR, boolean_type_node,
3055 nit, build_int_cst (type, m_p_thread * n_threads - 1));
3057 many_iterations_cond
3058 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
3059 invert_truthvalue (unshare_expr (niter->may_be_zero)),
3060 many_iterations_cond);
3061 many_iterations_cond
3062 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
3063 if (stmts)
3064 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
3065 if (!is_gimple_condexpr_for_cond (many_iterations_cond))
3067 many_iterations_cond
3068 = force_gimple_operand (many_iterations_cond, &stmts,
3069 true, NULL_TREE);
3070 if (stmts)
3071 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop),
3072 stmts);
3075 initialize_original_copy_tables ();
3077 /* We assume that the loop usually iterates a lot. */
3078 loop_version (loop, many_iterations_cond, NULL,
3079 profile_probability::likely (),
3080 profile_probability::unlikely (),
3081 profile_probability::likely (),
3082 profile_probability::unlikely (), true);
3083 update_ssa (TODO_update_ssa_no_phi);
3084 free_original_copy_tables ();
3087 /* Base all the induction variables in LOOP on a single control one. */
3088 canonicalize_loop_ivs (loop, &nit, true);
3089 if (num_phis (loop->header, false) != reduction_list->elements () + 1)
3091 /* The call to canonicalize_loop_ivs above failed to "base all the
3092 induction variables in LOOP on a single control one". Do damage
3093 control. */
3094 basic_block preheader = loop_preheader_edge (loop)->src;
3095 basic_block cond_bb = single_pred (preheader);
3096 gcond *cond = as_a <gcond *> (gsi_stmt (gsi_last_bb (cond_bb)));
3097 gimple_cond_make_true (cond);
3098 update_stmt (cond);
3099 /* We've gotten rid of the duplicate loop created by loop_version, but
3100 we can't undo whatever canonicalize_loop_ivs has done.
3101 TODO: Fix this properly by ensuring that the call to
3102 canonicalize_loop_ivs succeeds. */
3103 if (dump_file
3104 && (dump_flags & TDF_DETAILS))
3105 fprintf (dump_file, "canonicalize_loop_ivs failed for loop %d,"
3106 " aborting transformation\n", loop->num);
3107 return;
3110 /* Ensure that the exit condition is the first statement in the loop.
3111 The common case is that latch of the loop is empty (apart from the
3112 increment) and immediately follows the loop exit test. Attempt to move the
3113 entry of the loop directly before the exit check and increase the number of
3114 iterations of the loop by one. */
3115 if (try_transform_to_exit_first_loop_alt (loop, reduction_list, nit))
3117 if (dump_file
3118 && (dump_flags & TDF_DETAILS))
3119 fprintf (dump_file,
3120 "alternative exit-first loop transform succeeded"
3121 " for loop %d\n", loop->num);
3123 else
3125 if (oacc_kernels_p)
3126 n_threads = 1;
3128 /* Fall back on the method that handles more cases, but duplicates the
3129 loop body: move the exit condition of LOOP to the beginning of its
3130 header, and duplicate the part of the last iteration that gets disabled
3131 to the exit of the loop. */
3132 transform_to_exit_first_loop (loop, reduction_list, nit);
3134 update_ssa (TODO_update_ssa_no_phi);
3136 /* Generate initializations for reductions. */
3137 if (!reduction_list->is_empty ())
3138 reduction_list->traverse <class loop *, initialize_reductions> (loop);
3140 /* Eliminate the references to local variables from the loop. */
3141 gcc_assert (single_exit (loop));
3142 entry = loop_preheader_edge (loop);
3143 exit = single_dom_exit (loop);
3145 /* This rewrites the body in terms of new variables. This has already
3146 been done for oacc_kernels_p in pass_lower_omp/lower_omp (). */
3147 if (!oacc_kernels_p)
3149 eliminate_local_variables (entry, exit);
3150 /* In the old loop, move all variables non-local to the loop to a
3151 structure and back, and create separate decls for the variables used in
3152 loop. */
3153 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
3154 &new_arg_struct, &clsn_data);
3156 else
3158 arg_struct = NULL_TREE;
3159 new_arg_struct = NULL_TREE;
3160 clsn_data.load = NULL_TREE;
3161 clsn_data.load_bb = exit->dest;
3162 clsn_data.store = NULL_TREE;
3163 clsn_data.store_bb = NULL;
3166 /* Create the parallel constructs. */
3167 loc = UNKNOWN_LOCATION;
3168 cond_stmt = last_stmt (loop->header);
3169 if (cond_stmt)
3170 loc = gimple_location (cond_stmt);
3171 create_parallel_loop (loop, create_loop_fn (loc), arg_struct, new_arg_struct,
3172 n_threads, loc, oacc_kernels_p);
3173 if (!reduction_list->is_empty ())
3174 create_call_for_reduction (loop, reduction_list, &clsn_data);
3176 scev_reset ();
3178 /* Free loop bound estimations that could contain references to
3179 removed statements. */
3180 free_numbers_of_iterations_estimates (cfun);
3183 /* Returns true when LOOP contains vector phi nodes. */
3185 static bool
3186 loop_has_vector_phi_nodes (class loop *loop ATTRIBUTE_UNUSED)
3188 unsigned i;
3189 basic_block *bbs = get_loop_body_in_dom_order (loop);
3190 gphi_iterator gsi;
3191 bool res = true;
3193 for (i = 0; i < loop->num_nodes; i++)
3194 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
3195 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi.phi ()))) == VECTOR_TYPE)
3196 goto end;
3198 res = false;
3199 end:
3200 free (bbs);
3201 return res;
3204 /* Create a reduction_info struct, initialize it with REDUC_STMT
3205 and PHI, insert it to the REDUCTION_LIST. */
3207 static void
3208 build_new_reduction (reduction_info_table_type *reduction_list,
3209 gimple *reduc_stmt, gphi *phi)
3211 reduction_info **slot;
3212 struct reduction_info *new_reduction;
3213 enum tree_code reduction_code;
3215 gcc_assert (reduc_stmt);
3217 if (gimple_code (reduc_stmt) == GIMPLE_PHI)
3219 tree op1 = PHI_ARG_DEF (reduc_stmt, 0);
3220 gimple *def1 = SSA_NAME_DEF_STMT (op1);
3221 reduction_code = gimple_assign_rhs_code (def1);
3223 else
3224 reduction_code = gimple_assign_rhs_code (reduc_stmt);
3225 /* Check for OpenMP supported reduction. */
3226 switch (reduction_code)
3228 case PLUS_EXPR:
3229 case MULT_EXPR:
3230 case MAX_EXPR:
3231 case MIN_EXPR:
3232 case BIT_IOR_EXPR:
3233 case BIT_XOR_EXPR:
3234 case BIT_AND_EXPR:
3235 case TRUTH_OR_EXPR:
3236 case TRUTH_XOR_EXPR:
3237 case TRUTH_AND_EXPR:
3238 break;
3239 default:
3240 return;
3243 if (dump_file && (dump_flags & TDF_DETAILS))
3245 fprintf (dump_file,
3246 "Detected reduction. reduction stmt is:\n");
3247 print_gimple_stmt (dump_file, reduc_stmt, 0);
3248 fprintf (dump_file, "\n");
3251 new_reduction = XCNEW (struct reduction_info);
3253 new_reduction->reduc_stmt = reduc_stmt;
3254 new_reduction->reduc_phi = phi;
3255 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
3256 new_reduction->reduction_code = reduction_code;
3257 slot = reduction_list->find_slot (new_reduction, INSERT);
3258 *slot = new_reduction;
3261 /* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
3264 set_reduc_phi_uids (reduction_info **slot, void *data ATTRIBUTE_UNUSED)
3266 struct reduction_info *const red = *slot;
3267 gimple_set_uid (red->reduc_phi, red->reduc_version);
3268 return 1;
3271 /* Return true if the type of reduction performed by STMT_INFO is suitable
3272 for this pass. */
3274 static bool
3275 valid_reduction_p (stmt_vec_info stmt_info)
3277 /* Parallelization would reassociate the operation, which isn't
3278 allowed for in-order reductions. */
3279 vect_reduction_type reduc_type = STMT_VINFO_REDUC_TYPE (stmt_info);
3280 return reduc_type != FOLD_LEFT_REDUCTION;
3283 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
3285 static void
3286 gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list)
3288 gphi_iterator gsi;
3289 loop_vec_info simple_loop_info;
3290 auto_vec<gphi *, 4> double_reduc_phis;
3291 auto_vec<gimple *, 4> double_reduc_stmts;
3293 vec_info_shared shared;
3294 vect_loop_form_info info;
3295 if (!vect_analyze_loop_form (loop, &info))
3296 goto gather_done;
3298 simple_loop_info = vect_create_loop_vinfo (loop, &shared, &info);
3299 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
3301 gphi *phi = gsi.phi ();
3302 affine_iv iv;
3303 tree res = PHI_RESULT (phi);
3304 bool double_reduc;
3306 if (virtual_operand_p (res))
3307 continue;
3309 if (simple_iv (loop, loop, res, &iv, true))
3310 continue;
3312 stmt_vec_info reduc_stmt_info
3313 = parloops_force_simple_reduction (simple_loop_info,
3314 simple_loop_info->lookup_stmt (phi),
3315 &double_reduc, true);
3316 if (!reduc_stmt_info || !valid_reduction_p (reduc_stmt_info))
3317 continue;
3319 if (double_reduc)
3321 if (loop->inner->inner != NULL)
3322 continue;
3324 double_reduc_phis.safe_push (phi);
3325 double_reduc_stmts.safe_push (reduc_stmt_info->stmt);
3326 continue;
3329 build_new_reduction (reduction_list, reduc_stmt_info->stmt, phi);
3331 delete simple_loop_info;
3333 if (!double_reduc_phis.is_empty ())
3335 vec_info_shared shared;
3336 vect_loop_form_info info;
3337 if (vect_analyze_loop_form (loop->inner, &info))
3339 simple_loop_info
3340 = vect_create_loop_vinfo (loop->inner, &shared, &info);
3341 gphi *phi;
3342 unsigned int i;
3344 FOR_EACH_VEC_ELT (double_reduc_phis, i, phi)
3346 affine_iv iv;
3347 tree res = PHI_RESULT (phi);
3348 bool double_reduc;
3350 use_operand_p use_p;
3351 gimple *inner_stmt;
3352 bool single_use_p = single_imm_use (res, &use_p, &inner_stmt);
3353 gcc_assert (single_use_p);
3354 if (gimple_code (inner_stmt) != GIMPLE_PHI)
3355 continue;
3356 gphi *inner_phi = as_a <gphi *> (inner_stmt);
3357 if (simple_iv (loop->inner, loop->inner, PHI_RESULT (inner_phi),
3358 &iv, true))
3359 continue;
3361 stmt_vec_info inner_phi_info
3362 = simple_loop_info->lookup_stmt (inner_phi);
3363 stmt_vec_info inner_reduc_stmt_info
3364 = parloops_force_simple_reduction (simple_loop_info,
3365 inner_phi_info,
3366 &double_reduc, true);
3367 gcc_assert (!double_reduc);
3368 if (!inner_reduc_stmt_info
3369 || !valid_reduction_p (inner_reduc_stmt_info))
3370 continue;
3372 build_new_reduction (reduction_list, double_reduc_stmts[i], phi);
3374 delete simple_loop_info;
3378 gather_done:
3379 if (reduction_list->is_empty ())
3380 return;
3382 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
3383 and delete simple_loop_info, we can set gimple_uid of reduc_phi stmts only
3384 now. */
3385 basic_block bb;
3386 FOR_EACH_BB_FN (bb, cfun)
3387 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3388 gimple_set_uid (gsi_stmt (gsi), (unsigned int)-1);
3389 reduction_list->traverse <void *, set_reduc_phi_uids> (NULL);
3392 /* Try to initialize NITER for code generation part. */
3394 static bool
3395 try_get_loop_niter (loop_p loop, class tree_niter_desc *niter)
3397 edge exit = single_dom_exit (loop);
3399 gcc_assert (exit);
3401 /* We need to know # of iterations, and there should be no uses of values
3402 defined inside loop outside of it, unless the values are invariants of
3403 the loop. */
3404 if (!number_of_iterations_exit (loop, exit, niter, false))
3406 if (dump_file && (dump_flags & TDF_DETAILS))
3407 fprintf (dump_file, " FAILED: number of iterations not known\n");
3408 return false;
3411 return true;
3414 /* Return the default def of the first function argument. */
3416 static tree
3417 get_omp_data_i_param (void)
3419 tree decl = DECL_ARGUMENTS (cfun->decl);
3420 gcc_assert (DECL_CHAIN (decl) == NULL_TREE);
3421 return ssa_default_def (cfun, decl);
3424 /* For PHI in loop header of LOOP, look for pattern:
3426 <bb preheader>
3427 .omp_data_i = &.omp_data_arr;
3428 addr = .omp_data_i->sum;
3429 sum_a = *addr;
3431 <bb header>:
3432 sum_b = PHI <sum_a (preheader), sum_c (latch)>
3434 and return addr. Otherwise, return NULL_TREE. */
3436 static tree
3437 find_reduc_addr (class loop *loop, gphi *phi)
3439 edge e = loop_preheader_edge (loop);
3440 tree arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
3441 gimple *stmt = SSA_NAME_DEF_STMT (arg);
3442 if (!gimple_assign_single_p (stmt))
3443 return NULL_TREE;
3444 tree memref = gimple_assign_rhs1 (stmt);
3445 if (TREE_CODE (memref) != MEM_REF)
3446 return NULL_TREE;
3447 tree addr = TREE_OPERAND (memref, 0);
3449 gimple *stmt2 = SSA_NAME_DEF_STMT (addr);
3450 if (!gimple_assign_single_p (stmt2))
3451 return NULL_TREE;
3452 tree compref = gimple_assign_rhs1 (stmt2);
3453 if (TREE_CODE (compref) != COMPONENT_REF)
3454 return NULL_TREE;
3455 tree addr2 = TREE_OPERAND (compref, 0);
3456 if (TREE_CODE (addr2) != MEM_REF)
3457 return NULL_TREE;
3458 addr2 = TREE_OPERAND (addr2, 0);
3459 if (TREE_CODE (addr2) != SSA_NAME
3460 || addr2 != get_omp_data_i_param ())
3461 return NULL_TREE;
3463 return addr;
3466 /* Try to initialize REDUCTION_LIST for code generation part.
3467 REDUCTION_LIST describes the reductions. */
3469 static bool
3470 try_create_reduction_list (loop_p loop,
3471 reduction_info_table_type *reduction_list,
3472 bool oacc_kernels_p)
3474 edge exit = single_dom_exit (loop);
3475 gphi_iterator gsi;
3477 gcc_assert (exit);
3479 /* Try to get rid of exit phis. */
3480 final_value_replacement_loop (loop);
3482 gather_scalar_reductions (loop, reduction_list);
3485 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
3487 gphi *phi = gsi.phi ();
3488 struct reduction_info *red;
3489 imm_use_iterator imm_iter;
3490 use_operand_p use_p;
3491 gimple *reduc_phi;
3492 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
3494 if (!virtual_operand_p (val))
3496 if (TREE_CODE (val) != SSA_NAME)
3498 if (dump_file && (dump_flags & TDF_DETAILS))
3499 fprintf (dump_file,
3500 " FAILED: exit PHI argument invariant.\n");
3501 return false;
3504 if (dump_file && (dump_flags & TDF_DETAILS))
3506 fprintf (dump_file, "phi is ");
3507 print_gimple_stmt (dump_file, phi, 0);
3508 fprintf (dump_file, "arg of phi to exit: value ");
3509 print_generic_expr (dump_file, val);
3510 fprintf (dump_file, " used outside loop\n");
3511 fprintf (dump_file,
3512 " checking if it is part of reduction pattern:\n");
3514 if (reduction_list->is_empty ())
3516 if (dump_file && (dump_flags & TDF_DETAILS))
3517 fprintf (dump_file,
3518 " FAILED: it is not a part of reduction.\n");
3519 return false;
3521 reduc_phi = NULL;
3522 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
3524 if (!gimple_debug_bind_p (USE_STMT (use_p))
3525 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
3527 reduc_phi = USE_STMT (use_p);
3528 break;
3531 red = reduction_phi (reduction_list, reduc_phi);
3532 if (red == NULL)
3534 if (dump_file && (dump_flags & TDF_DETAILS))
3535 fprintf (dump_file,
3536 " FAILED: it is not a part of reduction.\n");
3537 return false;
3539 if (red->keep_res != NULL)
3541 if (dump_file && (dump_flags & TDF_DETAILS))
3542 fprintf (dump_file,
3543 " FAILED: reduction has multiple exit phis.\n");
3544 return false;
3546 red->keep_res = phi;
3547 if (dump_file && (dump_flags & TDF_DETAILS))
3549 fprintf (dump_file, "reduction phi is ");
3550 print_gimple_stmt (dump_file, red->reduc_phi, 0);
3551 fprintf (dump_file, "reduction stmt is ");
3552 print_gimple_stmt (dump_file, red->reduc_stmt, 0);
3557 /* The iterations of the loop may communicate only through bivs whose
3558 iteration space can be distributed efficiently. */
3559 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
3561 gphi *phi = gsi.phi ();
3562 tree def = PHI_RESULT (phi);
3563 affine_iv iv;
3565 if (!virtual_operand_p (def) && !simple_iv (loop, loop, def, &iv, true))
3567 struct reduction_info *red;
3569 red = reduction_phi (reduction_list, phi);
3570 if (red == NULL)
3572 if (dump_file && (dump_flags & TDF_DETAILS))
3573 fprintf (dump_file,
3574 " FAILED: scalar dependency between iterations\n");
3575 return false;
3580 if (oacc_kernels_p)
3582 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi);
3583 gsi_next (&gsi))
3585 gphi *phi = gsi.phi ();
3586 tree def = PHI_RESULT (phi);
3587 affine_iv iv;
3589 if (!virtual_operand_p (def)
3590 && !simple_iv (loop, loop, def, &iv, true))
3592 tree addr = find_reduc_addr (loop, phi);
3593 if (addr == NULL_TREE)
3594 return false;
3595 struct reduction_info *red = reduction_phi (reduction_list, phi);
3596 red->reduc_addr = addr;
3601 return true;
3604 /* Return true if LOOP contains phis with ADDR_EXPR in args. */
3606 static bool
3607 loop_has_phi_with_address_arg (class loop *loop)
3609 basic_block *bbs = get_loop_body (loop);
3610 bool res = false;
3612 unsigned i, j;
3613 gphi_iterator gsi;
3614 for (i = 0; i < loop->num_nodes; i++)
3615 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
3617 gphi *phi = gsi.phi ();
3618 for (j = 0; j < gimple_phi_num_args (phi); j++)
3620 tree arg = gimple_phi_arg_def (phi, j);
3621 if (TREE_CODE (arg) == ADDR_EXPR)
3623 /* This should be handled by eliminate_local_variables, but that
3624 function currently ignores phis. */
3625 res = true;
3626 goto end;
3630 end:
3631 free (bbs);
3633 return res;
3636 /* Return true if memory ref REF (corresponding to the stmt at GSI in
3637 REGIONS_BB[I]) conflicts with the statements in REGIONS_BB[I] after gsi,
3638 or the statements in REGIONS_BB[I + n]. REF_IS_STORE indicates if REF is a
3639 store. Ignore conflicts with SKIP_STMT. */
3641 static bool
3642 ref_conflicts_with_region (gimple_stmt_iterator gsi, ao_ref *ref,
3643 bool ref_is_store, vec<basic_block> region_bbs,
3644 unsigned int i, gimple *skip_stmt)
3646 basic_block bb = region_bbs[i];
3647 gsi_next (&gsi);
3649 while (true)
3651 for (; !gsi_end_p (gsi);
3652 gsi_next (&gsi))
3654 gimple *stmt = gsi_stmt (gsi);
3655 if (stmt == skip_stmt)
3657 if (dump_file)
3659 fprintf (dump_file, "skipping reduction store: ");
3660 print_gimple_stmt (dump_file, stmt, 0);
3662 continue;
3665 if (!gimple_vdef (stmt)
3666 && !gimple_vuse (stmt))
3667 continue;
3669 if (gimple_code (stmt) == GIMPLE_RETURN)
3670 continue;
3672 if (ref_is_store)
3674 if (ref_maybe_used_by_stmt_p (stmt, ref))
3676 if (dump_file)
3678 fprintf (dump_file, "Stmt ");
3679 print_gimple_stmt (dump_file, stmt, 0);
3681 return true;
3684 else
3686 if (stmt_may_clobber_ref_p_1 (stmt, ref))
3688 if (dump_file)
3690 fprintf (dump_file, "Stmt ");
3691 print_gimple_stmt (dump_file, stmt, 0);
3693 return true;
3697 i++;
3698 if (i == region_bbs.length ())
3699 break;
3700 bb = region_bbs[i];
3701 gsi = gsi_start_bb (bb);
3704 return false;
3707 /* Return true if the bbs in REGION_BBS but not in in_loop_bbs can be executed
3708 in parallel with REGION_BBS containing the loop. Return the stores of
3709 reduction results in REDUCTION_STORES. */
3711 static bool
3712 oacc_entry_exit_ok_1 (bitmap in_loop_bbs, const vec<basic_block> &region_bbs,
3713 reduction_info_table_type *reduction_list,
3714 bitmap reduction_stores)
3716 tree omp_data_i = get_omp_data_i_param ();
3718 unsigned i;
3719 basic_block bb;
3720 FOR_EACH_VEC_ELT (region_bbs, i, bb)
3722 if (bitmap_bit_p (in_loop_bbs, bb->index))
3723 continue;
3725 gimple_stmt_iterator gsi;
3726 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
3727 gsi_next (&gsi))
3729 gimple *stmt = gsi_stmt (gsi);
3730 gimple *skip_stmt = NULL;
3732 if (is_gimple_debug (stmt)
3733 || gimple_code (stmt) == GIMPLE_COND)
3734 continue;
3736 ao_ref ref;
3737 bool ref_is_store = false;
3738 if (gimple_assign_load_p (stmt))
3740 tree rhs = gimple_assign_rhs1 (stmt);
3741 tree base = get_base_address (rhs);
3742 if (TREE_CODE (base) == MEM_REF
3743 && operand_equal_p (TREE_OPERAND (base, 0), omp_data_i, 0))
3744 continue;
3746 tree lhs = gimple_assign_lhs (stmt);
3747 if (TREE_CODE (lhs) == SSA_NAME
3748 && has_single_use (lhs))
3750 use_operand_p use_p;
3751 gimple *use_stmt;
3752 struct reduction_info *red;
3753 single_imm_use (lhs, &use_p, &use_stmt);
3754 if (gimple_code (use_stmt) == GIMPLE_PHI
3755 && (red = reduction_phi (reduction_list, use_stmt)))
3757 tree val = PHI_RESULT (red->keep_res);
3758 if (has_single_use (val))
3760 single_imm_use (val, &use_p, &use_stmt);
3761 if (gimple_store_p (use_stmt))
3763 unsigned int id
3764 = SSA_NAME_VERSION (gimple_vdef (use_stmt));
3765 bitmap_set_bit (reduction_stores, id);
3766 skip_stmt = use_stmt;
3767 if (dump_file)
3769 fprintf (dump_file, "found reduction load: ");
3770 print_gimple_stmt (dump_file, stmt, 0);
3777 ao_ref_init (&ref, rhs);
3779 else if (gimple_store_p (stmt))
3781 ao_ref_init (&ref, gimple_assign_lhs (stmt));
3782 ref_is_store = true;
3784 else if (gimple_code (stmt) == GIMPLE_OMP_RETURN)
3785 continue;
3786 else if (!gimple_has_side_effects (stmt)
3787 && !gimple_could_trap_p (stmt)
3788 && !stmt_could_throw_p (cfun, stmt)
3789 && !gimple_vdef (stmt)
3790 && !gimple_vuse (stmt))
3791 continue;
3792 else if (gimple_call_internal_p (stmt, IFN_GOACC_DIM_POS))
3793 continue;
3794 else if (gimple_code (stmt) == GIMPLE_RETURN)
3795 continue;
3796 else
3798 if (dump_file)
3800 fprintf (dump_file, "Unhandled stmt in entry/exit: ");
3801 print_gimple_stmt (dump_file, stmt, 0);
3803 return false;
3806 if (ref_conflicts_with_region (gsi, &ref, ref_is_store, region_bbs,
3807 i, skip_stmt))
3809 if (dump_file)
3811 fprintf (dump_file, "conflicts with entry/exit stmt: ");
3812 print_gimple_stmt (dump_file, stmt, 0);
3814 return false;
3819 return true;
3822 /* Find stores inside REGION_BBS and outside IN_LOOP_BBS, and guard them with
3823 gang_pos == 0, except when the stores are REDUCTION_STORES. Return true
3824 if any changes were made. */
3826 static bool
3827 oacc_entry_exit_single_gang (bitmap in_loop_bbs,
3828 const vec<basic_block> &region_bbs,
3829 bitmap reduction_stores)
3831 tree gang_pos = NULL_TREE;
3832 bool changed = false;
3834 unsigned i;
3835 basic_block bb;
3836 FOR_EACH_VEC_ELT (region_bbs, i, bb)
3838 if (bitmap_bit_p (in_loop_bbs, bb->index))
3839 continue;
3841 gimple_stmt_iterator gsi;
3842 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
3844 gimple *stmt = gsi_stmt (gsi);
3846 if (!gimple_store_p (stmt))
3848 /* Update gsi to point to next stmt. */
3849 gsi_next (&gsi);
3850 continue;
3853 if (bitmap_bit_p (reduction_stores,
3854 SSA_NAME_VERSION (gimple_vdef (stmt))))
3856 if (dump_file)
3858 fprintf (dump_file,
3859 "skipped reduction store for single-gang"
3860 " neutering: ");
3861 print_gimple_stmt (dump_file, stmt, 0);
3864 /* Update gsi to point to next stmt. */
3865 gsi_next (&gsi);
3866 continue;
3869 changed = true;
3871 if (gang_pos == NULL_TREE)
3873 tree arg = build_int_cst (integer_type_node, GOMP_DIM_GANG);
3874 gcall *gang_single
3875 = gimple_build_call_internal (IFN_GOACC_DIM_POS, 1, arg);
3876 gang_pos = make_ssa_name (integer_type_node);
3877 gimple_call_set_lhs (gang_single, gang_pos);
3878 gimple_stmt_iterator start
3879 = gsi_start_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
3880 tree vuse = ssa_default_def (cfun, gimple_vop (cfun));
3881 gimple_set_vuse (gang_single, vuse);
3882 gsi_insert_before (&start, gang_single, GSI_SAME_STMT);
3885 if (dump_file)
3887 fprintf (dump_file,
3888 "found store that needs single-gang neutering: ");
3889 print_gimple_stmt (dump_file, stmt, 0);
3893 /* Split block before store. */
3894 gimple_stmt_iterator gsi2 = gsi;
3895 gsi_prev (&gsi2);
3896 edge e;
3897 if (gsi_end_p (gsi2))
3899 e = split_block_after_labels (bb);
3900 gsi2 = gsi_last_bb (bb);
3902 else
3903 e = split_block (bb, gsi_stmt (gsi2));
3904 basic_block bb2 = e->dest;
3906 /* Split block after store. */
3907 gimple_stmt_iterator gsi3 = gsi_start_bb (bb2);
3908 edge e2 = split_block (bb2, gsi_stmt (gsi3));
3909 basic_block bb3 = e2->dest;
3911 gimple *cond
3912 = gimple_build_cond (EQ_EXPR, gang_pos, integer_zero_node,
3913 NULL_TREE, NULL_TREE);
3914 gsi_insert_after (&gsi2, cond, GSI_NEW_STMT);
3916 edge e3 = make_edge (bb, bb3, EDGE_FALSE_VALUE);
3917 /* FIXME: What is the probability? */
3918 e3->probability = profile_probability::guessed_never ();
3919 e->flags = EDGE_TRUE_VALUE;
3921 tree vdef = gimple_vdef (stmt);
3922 tree vuse = gimple_vuse (stmt);
3924 tree phi_res = copy_ssa_name (vdef);
3925 gphi *new_phi = create_phi_node (phi_res, bb3);
3926 replace_uses_by (vdef, phi_res);
3927 add_phi_arg (new_phi, vuse, e3, UNKNOWN_LOCATION);
3928 add_phi_arg (new_phi, vdef, e2, UNKNOWN_LOCATION);
3930 /* Update gsi to point to next stmt. */
3931 bb = bb3;
3932 gsi = gsi_start_bb (bb);
3937 return changed;
3940 /* Return true if the statements before and after the LOOP can be executed in
3941 parallel with the function containing the loop. Resolve conflicting stores
3942 outside LOOP by guarding them such that only a single gang executes them. */
3944 static bool
3945 oacc_entry_exit_ok (class loop *loop,
3946 reduction_info_table_type *reduction_list)
3948 basic_block *loop_bbs = get_loop_body_in_dom_order (loop);
3949 auto_vec<basic_block> region_bbs
3950 = get_all_dominated_blocks (CDI_DOMINATORS, ENTRY_BLOCK_PTR_FOR_FN (cfun));
3952 bitmap in_loop_bbs = BITMAP_ALLOC (NULL);
3953 bitmap_clear (in_loop_bbs);
3954 for (unsigned int i = 0; i < loop->num_nodes; i++)
3955 bitmap_set_bit (in_loop_bbs, loop_bbs[i]->index);
3957 bitmap reduction_stores = BITMAP_ALLOC (NULL);
3958 bool res = oacc_entry_exit_ok_1 (in_loop_bbs, region_bbs, reduction_list,
3959 reduction_stores);
3961 if (res)
3963 bool changed = oacc_entry_exit_single_gang (in_loop_bbs, region_bbs,
3964 reduction_stores);
3965 if (changed)
3967 free_dominance_info (CDI_DOMINATORS);
3968 calculate_dominance_info (CDI_DOMINATORS);
3972 free (loop_bbs);
3974 BITMAP_FREE (in_loop_bbs);
3975 BITMAP_FREE (reduction_stores);
3977 return res;
3980 /* Detect parallel loops and generate parallel code using libgomp
3981 primitives. Returns true if some loop was parallelized, false
3982 otherwise. */
3984 static bool
3985 parallelize_loops (bool oacc_kernels_p)
3987 unsigned n_threads;
3988 bool changed = false;
3989 class loop *skip_loop = NULL;
3990 class tree_niter_desc niter_desc;
3991 struct obstack parloop_obstack;
3992 HOST_WIDE_INT estimated;
3994 /* Do not parallelize loops in the functions created by parallelization. */
3995 if (!oacc_kernels_p
3996 && parallelized_function_p (cfun->decl))
3997 return false;
3999 /* Do not parallelize loops in offloaded functions. */
4000 if (!oacc_kernels_p
4001 && oacc_get_fn_attrib (cfun->decl) != NULL)
4002 return false;
4004 if (cfun->has_nonlocal_label)
4005 return false;
4007 /* For OpenACC kernels, n_threads will be determined later; otherwise, it's
4008 the argument to -ftree-parallelize-loops. */
4009 if (oacc_kernels_p)
4010 n_threads = 0;
4011 else
4012 n_threads = flag_tree_parallelize_loops;
4014 gcc_obstack_init (&parloop_obstack);
4015 reduction_info_table_type reduction_list (10);
4017 calculate_dominance_info (CDI_DOMINATORS);
4019 for (auto loop : loops_list (cfun, 0))
4021 if (loop == skip_loop)
4023 if (!loop->in_oacc_kernels_region
4024 && dump_file && (dump_flags & TDF_DETAILS))
4025 fprintf (dump_file,
4026 "Skipping loop %d as inner loop of parallelized loop\n",
4027 loop->num);
4029 skip_loop = loop->inner;
4030 continue;
4032 else
4033 skip_loop = NULL;
4035 reduction_list.empty ();
4037 if (oacc_kernels_p)
4039 if (!loop->in_oacc_kernels_region)
4040 continue;
4042 /* Don't try to parallelize inner loops in an oacc kernels region. */
4043 if (loop->inner)
4044 skip_loop = loop->inner;
4046 if (dump_file && (dump_flags & TDF_DETAILS))
4047 fprintf (dump_file,
4048 "Trying loop %d with header bb %d in oacc kernels"
4049 " region\n", loop->num, loop->header->index);
4052 if (dump_file && (dump_flags & TDF_DETAILS))
4054 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
4055 if (loop->inner)
4056 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
4057 else
4058 fprintf (dump_file, "loop %d is innermost\n",loop->num);
4061 if (!single_dom_exit (loop))
4064 if (dump_file && (dump_flags & TDF_DETAILS))
4065 fprintf (dump_file, "loop is !single_dom_exit\n");
4067 continue;
4070 if (/* And of course, the loop must be parallelizable. */
4071 !can_duplicate_loop_p (loop)
4072 || loop_has_blocks_with_irreducible_flag (loop)
4073 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
4074 /* FIXME: the check for vector phi nodes could be removed. */
4075 || loop_has_vector_phi_nodes (loop))
4076 continue;
4078 estimated = estimated_loop_iterations_int (loop);
4079 if (estimated == -1)
4080 estimated = get_likely_max_loop_iterations_int (loop);
4081 /* FIXME: Bypass this check as graphite doesn't update the
4082 count and frequency correctly now. */
4083 if (!flag_loop_parallelize_all
4084 && !oacc_kernels_p
4085 && ((estimated != -1
4086 && (estimated
4087 < ((HOST_WIDE_INT) n_threads
4088 * (loop->inner ? 2 : MIN_PER_THREAD) - 1)))
4089 /* Do not bother with loops in cold areas. */
4090 || optimize_loop_nest_for_size_p (loop)))
4091 continue;
4093 if (!try_get_loop_niter (loop, &niter_desc))
4094 continue;
4096 if (!try_create_reduction_list (loop, &reduction_list, oacc_kernels_p))
4097 continue;
4099 if (loop_has_phi_with_address_arg (loop))
4100 continue;
4102 if (!loop->can_be_parallel
4103 && !loop_parallel_p (loop, &parloop_obstack))
4104 continue;
4106 if (oacc_kernels_p
4107 && !oacc_entry_exit_ok (loop, &reduction_list))
4109 if (dump_file)
4110 fprintf (dump_file, "entry/exit not ok: FAILED\n");
4111 continue;
4114 changed = true;
4115 skip_loop = loop->inner;
4117 if (dump_enabled_p ())
4119 dump_user_location_t loop_loc = find_loop_location (loop);
4120 if (loop->inner)
4121 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loop_loc,
4122 "parallelizing outer loop %d\n", loop->num);
4123 else
4124 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loop_loc,
4125 "parallelizing inner loop %d\n", loop->num);
4128 gen_parallel_loop (loop, &reduction_list,
4129 n_threads, &niter_desc, oacc_kernels_p);
4132 obstack_free (&parloop_obstack, NULL);
4134 /* Parallelization will cause new function calls to be inserted through
4135 which local variables will escape. Reset the points-to solution
4136 for ESCAPED. */
4137 if (changed)
4138 pt_solution_reset (&cfun->gimple_df->escaped);
4140 return changed;
4143 /* Parallelization. */
4145 namespace {
4147 const pass_data pass_data_parallelize_loops =
4149 GIMPLE_PASS, /* type */
4150 "parloops", /* name */
4151 OPTGROUP_LOOP, /* optinfo_flags */
4152 TV_TREE_PARALLELIZE_LOOPS, /* tv_id */
4153 ( PROP_cfg | PROP_ssa ), /* properties_required */
4154 0, /* properties_provided */
4155 0, /* properties_destroyed */
4156 0, /* todo_flags_start */
4157 0, /* todo_flags_finish */
4160 class pass_parallelize_loops : public gimple_opt_pass
4162 public:
4163 pass_parallelize_loops (gcc::context *ctxt)
4164 : gimple_opt_pass (pass_data_parallelize_loops, ctxt),
4165 oacc_kernels_p (false)
4168 /* opt_pass methods: */
4169 bool gate (function *) final override
4171 if (oacc_kernels_p)
4172 return flag_openacc;
4173 else
4174 return flag_tree_parallelize_loops > 1;
4176 unsigned int execute (function *) final override;
4177 opt_pass * clone () final override
4179 return new pass_parallelize_loops (m_ctxt);
4181 void set_pass_param (unsigned int n, bool param) final override
4183 gcc_assert (n == 0);
4184 oacc_kernels_p = param;
4187 private:
4188 bool oacc_kernels_p;
4189 }; // class pass_parallelize_loops
4191 unsigned
4192 pass_parallelize_loops::execute (function *fun)
4194 tree nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4195 if (nthreads == NULL_TREE)
4196 return 0;
4198 bool in_loop_pipeline = scev_initialized_p ();
4199 if (!in_loop_pipeline)
4200 loop_optimizer_init (LOOPS_NORMAL
4201 | LOOPS_HAVE_RECORDED_EXITS);
4203 if (number_of_loops (fun) <= 1)
4204 return 0;
4206 if (!in_loop_pipeline)
4208 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
4209 scev_initialize ();
4212 unsigned int todo = 0;
4213 if (parallelize_loops (oacc_kernels_p))
4215 fun->curr_properties &= ~(PROP_gimple_eomp);
4217 checking_verify_loop_structure ();
4219 /* ??? Intermediate SSA updates with no PHIs might have lost
4220 the virtual operand renaming needed by separate_decls_in_region,
4221 make sure to rename them again. */
4222 mark_virtual_operands_for_renaming (fun);
4223 update_ssa (TODO_update_ssa);
4224 if (in_loop_pipeline)
4225 rewrite_into_loop_closed_ssa (NULL, 0);
4228 if (!in_loop_pipeline)
4230 scev_finalize ();
4231 loop_optimizer_finalize ();
4234 return todo;
4237 } // anon namespace
4239 gimple_opt_pass *
4240 make_pass_parallelize_loops (gcc::context *ctxt)
4242 return new pass_parallelize_loops (ctxt);